diff --git a/velox/connectors/hive/CMakeLists.txt b/velox/connectors/hive/CMakeLists.txt index 15bc182227e..530e5068aeb 100644 --- a/velox/connectors/hive/CMakeLists.txt +++ b/velox/connectors/hive/CMakeLists.txt @@ -38,6 +38,7 @@ target_link_libraries( velox_dwio_catalog_fbhive velox_dwio_dwrf_reader velox_dwio_dwrf_writer + velox_dwio_orc_reader velox_dwio_parquet_reader velox_dwio_parquet_writer velox_file diff --git a/velox/connectors/hive/HiveConnector.cpp b/velox/connectors/hive/HiveConnector.cpp index 00d463b25b9..5f5faf1671a 100644 --- a/velox/connectors/hive/HiveConnector.cpp +++ b/velox/connectors/hive/HiveConnector.cpp @@ -36,6 +36,7 @@ #endif #include "velox/dwio/dwrf/reader/DwrfReader.h" #include "velox/dwio/dwrf/writer/Writer.h" +#include "velox/dwio/orc/reader/OrcReader.h" // Meta's buck build system needs this check. #ifdef VELOX_ENABLE_PARQUET #include "velox/dwio/parquet/RegisterParquetReader.h" // @manual @@ -133,6 +134,7 @@ void HiveConnectorFactory::initialize() { dwio::common::registerFileSinks(); dwrf::registerDwrfReaderFactory(); dwrf::registerDwrfWriterFactory(); + orc::registerOrcReaderFactory(); // Meta's buck build system needs this check. #ifdef VELOX_ENABLE_PARQUET parquet::registerParquetReaderFactory(); diff --git a/velox/dwio/CMakeLists.txt b/velox/dwio/CMakeLists.txt index 2dd3ea5fac2..c8e002b7331 100644 --- a/velox/dwio/CMakeLists.txt +++ b/velox/dwio/CMakeLists.txt @@ -32,4 +32,5 @@ target_link_libraries( add_subdirectory(common) add_subdirectory(catalog) add_subdirectory(dwrf) +add_subdirectory(orc) add_subdirectory(parquet) diff --git a/velox/dwio/dwrf/test/OrcTest.h b/velox/dwio/dwrf/test/OrcTest.h index 5d9228e7c9a..3c21baf4234 100644 --- a/velox/dwio/dwrf/test/OrcTest.h +++ b/velox/dwio/dwrf/test/OrcTest.h @@ -37,6 +37,13 @@ inline std::string getExampleFilePath(const std::string& fileName) { "velox/dwio/dwrf/test", "examples/" + fileName); } +std::unique_ptr createFileBufferedInput( + const std::string& path, + memory::MemoryPool& pool) { + return std::make_unique( + std::make_shared(path), pool); +} + class MockStripeStreams : public StripeStreams { public: MockStripeStreams() : pool_{memory::memoryManager()->addLeafPool()} {}; diff --git a/velox/dwio/dwrf/test/ReaderTest.cpp b/velox/dwio/dwrf/test/ReaderTest.cpp index 16c8a57ffeb..084370acabb 100644 --- a/velox/dwio/dwrf/test/ReaderTest.cpp +++ b/velox/dwio/dwrf/test/ReaderTest.cpp @@ -16,7 +16,6 @@ #include #include -#include #include "folly/Random.h" #include "folly/executors/CPUThreadPoolExecutor.h" #include "folly/lang/Assume.h" @@ -28,7 +27,6 @@ #include "velox/dwio/dwrf/reader/DwrfReader.h" #include "velox/dwio/dwrf/test/OrcTest.h" #include "velox/dwio/dwrf/test/utils/E2EWriterTestUtil.h" -#include "velox/type/Type.h" #include "velox/type/fbhive/HiveTypeParser.h" #include "velox/vector/ComplexVector.h" #include "velox/vector/FlatVector.h" @@ -129,13 +127,6 @@ TEST_F(TestReader, testWriterVersions) { "future - 99", writerVersionToString(static_cast(99))); } -std::unique_ptr createFileBufferedInput( - const std::string& path, - memory::MemoryPool& pool) { - return std::make_unique( - std::make_shared(path), pool); -} - // This relies on schema and data inside of our fm_small and fm_large orc files, // and is not composeable with other schema/datas void verifyFlatMapReading( @@ -2029,67 +2020,6 @@ TEST_F(TestReader, testFlatmapAsMapFieldLifeCycle) { testFlatmapAsMapFieldLifeCycle(pool(), schema, config, rng, batchSize, true); } -TEST_F(TestReader, testOrcDecimal) { - const std::string simpleTest(getExampleFilePath("orc_decimal.orc")); - const std::shared_ptr expectedType = - std::dynamic_pointer_cast( - HiveTypeParser().parse("struct")); - dwio::common::ReaderOptions readerOpts{pool()}; - // To make DwrfReader reads ORC file, setFileFormat to FileFormat::ORC - readerOpts.setFileFormat(dwio::common::FileFormat::ORC); - auto reader = DwrfReader::create( - createFileBufferedInput(simpleTest, readerOpts.memoryPool()), readerOpts); - // Check schema - auto rowType = reader->rowType(); - EXPECT_TRUE(rowType->equivalent(*expectedType)); - - RowReaderOptions rowReaderOptions; - auto rowReader = reader->createRowReader(rowReaderOptions); - - VectorPtr batch; - while (rowReader->next(500, batch)) { - auto rowVector = batch->as(); - auto longDecimalCol = rowVector->childAt(0)->as>(); - auto shortDecimalCol = rowVector->childAt(1)->as>(); - auto longDecimalType = rowVector->type()->childAt(0); - auto shortDecimalType = rowVector->type()->childAt(1); - EXPECT_EQ( - DecimalUtil::toString(longDecimalCol->valueAt(0), longDecimalType), - "1242141234.123456"); - EXPECT_EQ( - DecimalUtil::toString(shortDecimalCol->valueAt(0), shortDecimalType), - "321423.21"); - } -} - -TEST_F(TestReader, testOrcReaderSimple) { - const std::string simpleTest( - getExampleFilePath("TestStringDictionary.testRowIndex.orc")); - dwio::common::ReaderOptions readerOpts{pool()}; - // To make DwrfReader reads ORC file, setFileFormat to FileFormat::ORC - readerOpts.setFileFormat(dwio::common::FileFormat::ORC); - auto reader = DwrfReader::create( - createFileBufferedInput(simpleTest, readerOpts.memoryPool()), readerOpts); - - RowReaderOptions rowReaderOptions; - auto rowReader = reader->createRowReader(rowReaderOptions); - - VectorPtr batch; - const std::string stringPrefix{"row "}; - size_t rowNumber = 0; - while (rowReader->next(500, batch)) { - auto rowVector = batch->as(); - auto strings = rowVector->childAt(0)->as>(); - for (size_t i = 0; i < rowVector->size(); ++i) { - std::stringstream stream; - stream << std::setfill('0') << std::setw(6) << rowNumber; - EXPECT_EQ(stringPrefix + stream.str(), strings->valueAt(i).str()); - rowNumber++; - } - } - EXPECT_EQ(rowNumber, 32768); -} - TEST_F(TestReader, testFooterWrapper) { proto::Footer impl; FooterWrapper wrapper(&impl); @@ -2116,86 +2046,6 @@ TEST_F(TestReader, testOrcAndDwrfRowIndexStride) { ASSERT_TRUE(dwrfFooterWrapper.hasRowIndexStride()); EXPECT_EQ(dwrfFooterWrapper.rowIndexStride(), 100); } - -TEST_F(TestReader, testOrcReaderComplexTypes) { - const std::string icebergOrc(getExampleFilePath("complextypes_iceberg.orc")); - const std::shared_ptr expectedType = - std::dynamic_pointer_cast(HiveTypeParser().parse("struct<\ - id:bigint,int_array:array,int_array_array:array>,\ - int_map:map,int_map_array:array>,\ - nested_struct:struct<\ - a:int,b:array,c:struct<\ - d:array>>>,\ - g:map>>>>>")); - dwio::common::ReaderOptions readerOpts{pool()}; - readerOpts.setFileFormat(dwio::common::FileFormat::ORC); - auto reader = DwrfReader::create( - createFileBufferedInput(icebergOrc, readerOpts.memoryPool()), readerOpts); - auto rowType = reader->rowType(); - EXPECT_TRUE(rowType->equivalent(*expectedType)); -} - -TEST_F(TestReader, testOrcReaderVarchar) { - const std::string varcharOrc(getExampleFilePath("orc_index_int_string.orc")); - dwio::common::ReaderOptions readerOpts{pool()}; - readerOpts.setFileFormat(dwio::common::FileFormat::ORC); - auto reader = DwrfReader::create( - createFileBufferedInput(varcharOrc, readerOpts.memoryPool()), readerOpts); - - RowReaderOptions rowReaderOptions; - auto rowReader = reader->createRowReader(rowReaderOptions); - - VectorPtr batch; - int counter = 0; - while (rowReader->next(500, batch)) { - auto rowVector = batch->as(); - auto ints = rowVector->childAt(0)->as>(); - auto strings = rowVector->childAt(1)->as>(); - for (size_t i = 0; i < rowVector->size(); ++i) { - counter++; - EXPECT_EQ(counter, ints->valueAt(i)); - std::stringstream stream; - stream << counter; - if (counter < 1000) { - stream << "a"; - } - EXPECT_EQ(stream.str(), strings->valueAt(i).str()); - } - } - EXPECT_EQ(counter, 6000); -} - -TEST_F(TestReader, testOrcReaderDate) { - const std::string dateOrc(getExampleFilePath("TestOrcFile.testDate1900.orc")); - dwio::common::ReaderOptions readerOpts{pool()}; - readerOpts.setFileFormat(dwio::common::FileFormat::ORC); - auto reader = DwrfReader::create( - createFileBufferedInput(dateOrc, readerOpts.memoryPool()), readerOpts); - - RowReaderOptions rowReaderOptions; - auto rowReader = reader->createRowReader(rowReaderOptions); - - VectorPtr batch; - int year = 1900; - while (rowReader->next(1000, batch)) { - auto rowVector = batch->as(); - auto dates = rowVector->childAt(1)->as>(); - - std::stringstream stream; - stream << year << "-12-25"; - EXPECT_EQ(stream.str(), DATE()->toString(dates->valueAt(0))); - - for (size_t i = 1; i < rowVector->size(); ++i) { - EXPECT_EQ(dates->valueAt(0), dates->valueAt(i)); - } - - year++; - } -} - namespace { /* diff --git a/velox/dwio/dwrf/test/examples/orc_decimal.orc b/velox/dwio/dwrf/test/examples/orc_decimal.orc deleted file mode 100644 index fe6c0c8fb66..00000000000 Binary files a/velox/dwio/dwrf/test/examples/orc_decimal.orc and /dev/null differ diff --git a/velox/dwio/orc/CMakeLists.txt b/velox/dwio/orc/CMakeLists.txt new file mode 100644 index 00000000000..44f623de123 --- /dev/null +++ b/velox/dwio/orc/CMakeLists.txt @@ -0,0 +1,19 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_subdirectory(reader) + +if(${VELOX_BUILD_TESTING}) + add_subdirectory(test) +endif() diff --git a/velox/dwio/orc/reader/CMakeLists.txt b/velox/dwio/orc/reader/CMakeLists.txt new file mode 100644 index 00000000000..53ff149469c --- /dev/null +++ b/velox/dwio/orc/reader/CMakeLists.txt @@ -0,0 +1,17 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_library(velox_dwio_orc_reader OrcReader.cpp) + +target_link_libraries(velox_dwio_orc_reader velox_dwio_dwrf_reader) diff --git a/velox/dwio/orc/reader/OrcReader.cpp b/velox/dwio/orc/reader/OrcReader.cpp new file mode 100644 index 00000000000..7839552aeae --- /dev/null +++ b/velox/dwio/orc/reader/OrcReader.cpp @@ -0,0 +1,27 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/dwio/orc/reader/OrcReader.h" + +namespace facebook::velox::orc { +void registerOrcReaderFactory() { + dwio::common::registerReaderFactory(std::make_shared()); +} + +void unregisterOrcReaderFactory() { + dwio::common::unregisterReaderFactory(dwio::common::FileFormat::ORC); +} +} // namespace facebook::velox::orc diff --git a/velox/dwio/orc/reader/OrcReader.h b/velox/dwio/orc/reader/OrcReader.h new file mode 100644 index 00000000000..621d8478721 --- /dev/null +++ b/velox/dwio/orc/reader/OrcReader.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "velox/dwio/common/ReaderFactory.h" +#include "velox/dwio/dwrf/reader/DwrfReader.h" + +namespace facebook::velox::orc { + +class OrcReaderFactory : public dwio::common::ReaderFactory { + public: + OrcReaderFactory() : ReaderFactory(dwio::common::FileFormat::ORC) {} + + std::unique_ptr createReader( + std::unique_ptr input, + const dwio::common::ReaderOptions& options) override { + return velox::dwrf::DwrfReader::create(std::move(input), options); + } +}; + +void registerOrcReaderFactory(); + +void unregisterOrcReaderFactory(); + +} // namespace facebook::velox::orc diff --git a/velox/dwio/orc/test/CMakeLists.txt b/velox/dwio/orc/test/CMakeLists.txt new file mode 100644 index 00000000000..b7b4fdbb9a9 --- /dev/null +++ b/velox/dwio/orc/test/CMakeLists.txt @@ -0,0 +1,22 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_executable(velox_dwio_orc_reader_test ReaderTest.cpp) +add_test( + NAME velox_dwio_orc_reader_test + COMMAND velox_dwio_orc_reader_test + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) + +target_link_libraries(velox_dwio_orc_reader_test velox_dwrf_test_utils + velox_dwio_common_test_utils gtest gtest_main gmock) diff --git a/velox/dwio/orc/test/ReaderTest.cpp b/velox/dwio/orc/test/ReaderTest.cpp new file mode 100644 index 00000000000..dfe35da67ac --- /dev/null +++ b/velox/dwio/orc/test/ReaderTest.cpp @@ -0,0 +1,240 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "velox/dwio/dwrf/common/Common.h" +#include "velox/dwio/dwrf/reader/DwrfReader.h" +#include "velox/dwio/dwrf/test/OrcTest.h" +#include "velox/type/fbhive/HiveTypeParser.h" +#include "velox/vector/tests/utils/VectorTestBase.h" + +using namespace facebook::velox::dwio::common; +using namespace facebook::velox::type::fbhive; +using namespace facebook::velox; +using namespace facebook::velox::dwrf; +using namespace facebook::velox::test; + +namespace { +class OrcReaderTest : public testing::Test, public VectorTestBase { + protected: + static void SetUpTestCase() { + memory::MemoryManager::testingSetInstance({}); + } +}; + +inline std::string getExamplesFilePath(const std::string& fileName) { + return test::getDataFilePath("velox/dwio/orc/test", "examples/" + fileName); +} + +} // namespace + +TEST_F(OrcReaderTest, testOrcReaderSimple) { + const std::string simpleTest( + getExamplesFilePath("TestStringDictionary.testRowIndex.orc")); + dwio::common::ReaderOptions readerOpts{pool()}; + // To make DwrfReader reads ORC file, setFileFormat to FileFormat::ORC + readerOpts.setFileFormat(dwio::common::FileFormat::ORC); + auto reader = DwrfReader::create( + createFileBufferedInput(simpleTest, readerOpts.memoryPool()), readerOpts); + + RowReaderOptions rowReaderOptions; + auto rowReader = reader->createRowReader(rowReaderOptions); + + VectorPtr batch; + const std::string stringPrefix{"row "}; + size_t rowNumber = 0; + while (rowReader->next(500, batch)) { + auto rowVector = batch->as(); + auto strings = rowVector->childAt(0)->as>(); + for (size_t i = 0; i < rowVector->size(); ++i) { + std::stringstream stream; + stream << std::setfill('0') << std::setw(6) << rowNumber; + EXPECT_EQ(stringPrefix + stream.str(), strings->valueAt(i).str()); + rowNumber++; + } + } + EXPECT_EQ(rowNumber, 32768); +} +TEST_F(OrcReaderTest, testOrcReaderComplexTypes) { + const std::string icebergOrc(getExamplesFilePath("complextypes_iceberg.orc")); + const std::shared_ptr expectedType = + std::dynamic_pointer_cast(HiveTypeParser().parse("struct<\ + id:bigint,int_array:array,int_array_array:array>,\ + int_map:map,int_map_array:array>,\ + nested_struct:struct<\ + a:int,b:array,c:struct<\ + d:array>>>,\ + g:map>>>>>")); + dwio::common::ReaderOptions readerOpts{pool()}; + readerOpts.setFileFormat(dwio::common::FileFormat::ORC); + auto reader = DwrfReader::create( + createFileBufferedInput(icebergOrc, readerOpts.memoryPool()), readerOpts); + auto rowType = reader->rowType(); + EXPECT_TRUE(rowType->equivalent(*expectedType)); +} + +TEST_F(OrcReaderTest, testOrcReaderVarchar) { + const std::string varcharOrc(getExamplesFilePath("orc_index_int_string.orc")); + dwio::common::ReaderOptions readerOpts{pool()}; + readerOpts.setFileFormat(dwio::common::FileFormat::ORC); + auto reader = DwrfReader::create( + createFileBufferedInput(varcharOrc, readerOpts.memoryPool()), readerOpts); + + RowReaderOptions rowReaderOptions; + auto rowReader = reader->createRowReader(rowReaderOptions); + + VectorPtr batch; + int counter = 0; + while (rowReader->next(500, batch)) { + auto rowVector = batch->as(); + auto ints = rowVector->childAt(0)->as>(); + auto strings = rowVector->childAt(1)->as>(); + for (size_t i = 0; i < rowVector->size(); ++i) { + counter++; + EXPECT_EQ(counter, ints->valueAt(i)); + std::stringstream stream; + stream << counter; + if (counter < 1000) { + stream << "a"; + } + EXPECT_EQ(stream.str(), strings->valueAt(i).str()); + } + } + EXPECT_EQ(counter, 6000); +} + +TEST_F(OrcReaderTest, testOrcReaderDate) { + const std::string dateOrc( + getExamplesFilePath("TestOrcFile.testDate1900.orc")); + dwio::common::ReaderOptions readerOpts{pool()}; + readerOpts.setFileFormat(dwio::common::FileFormat::ORC); + auto reader = DwrfReader::create( + createFileBufferedInput(dateOrc, readerOpts.memoryPool()), readerOpts); + + RowReaderOptions rowReaderOptions; + auto rowReader = reader->createRowReader(rowReaderOptions); + + VectorPtr batch; + int year = 1900; + while (rowReader->next(1000, batch)) { + auto rowVector = batch->as(); + auto dates = rowVector->childAt(1)->as>(); + + std::stringstream stream; + stream << year << "-12-25"; + EXPECT_EQ(stream.str(), DATE()->toString(dates->valueAt(0))); + + for (size_t i = 1; i < rowVector->size(); ++i) { + EXPECT_EQ(dates->valueAt(0), dates->valueAt(i)); + } + + year++; + } +} + +// create table orc_types_test ( +// "a" integer, +// "b" bigint, +// "c" tinyint, +// "d" smallint, +// "e" real, +// "f" double, +// "g" varchar, +// "h" boolean, +// "i" decimal(38,6), +// "j" decimal(9,2), +// "k" date, +// "l" timestamp, +// "m" array(varchar(100)), +// "n" map(varchar(20), bigint), +// "o" ROW(x BIGINT, y DOUBLE) +// ) with (format = 'ORC'); +TEST_F(OrcReaderTest, testOrcReadAllType) { + const std::string dateOrc(getExamplesFilePath("orc_all_type.orc")); + dwio::common::ReaderOptions readerOpts{pool()}; + readerOpts.setFileFormat(dwio::common::FileFormat::ORC); + auto reader = DwrfReader::create( + createFileBufferedInput(dateOrc, readerOpts.memoryPool()), readerOpts); + + RowReaderOptions rowReaderOptions; + auto rowReader = reader->createRowReader(rowReaderOptions); + + VectorPtr batch; + while (rowReader->next(500, batch)) { + auto rowVector = batch->as(); + auto integerCol = rowVector->childAt(0)->as>(); + auto bigintCol = rowVector->childAt(1)->as>(); + auto tinyintCol = rowVector->childAt(2)->as>(); + auto smallintCol = rowVector->childAt(3)->as>(); + auto realCol = rowVector->childAt(4)->as>(); + auto doubleCol = rowVector->childAt(5)->as>(); + auto varcharCol = rowVector->childAt(6)->as>(); + auto booleanCol = rowVector->childAt(7)->as>(); + auto longDecimalCol = rowVector->childAt(8)->as>(); + auto shortDecimalCol = rowVector->childAt(9)->as>(); + auto dateCol = rowVector->childAt(10)->as>(); + auto timestampCol = rowVector->childAt(11)->as>(); + auto arrayCol = rowVector->childAt(12)->as(); + auto mapCol = rowVector->childAt(13)->as(); + auto structCol = rowVector->childAt(14)->as(); + + EXPECT_EQ(1, rowVector->size()); + EXPECT_EQ(integerCol->valueAt(0), 111); + EXPECT_EQ(bigintCol->valueAt(0), 1111); + EXPECT_EQ(tinyintCol->valueAt(0), 127); + EXPECT_EQ(smallintCol->valueAt(0), 11); + EXPECT_EQ(realCol->valueAt(0), static_cast(1.1)); + EXPECT_EQ(doubleCol->valueAt(0), static_cast(1.12)); + EXPECT_EQ(varcharCol->valueAt(0), "velox"); + EXPECT_EQ(booleanCol->valueAt(0), false); + + auto longDecimalType = rowVector->type()->childAt(8); + auto shortDecimalType = rowVector->type()->childAt(9); + EXPECT_EQ( + DecimalUtil::toString(longDecimalCol->valueAt(0), longDecimalType), + "1242141234.123456"); + EXPECT_EQ( + DecimalUtil::toString(shortDecimalCol->valueAt(0), shortDecimalType), + "321423.21"); + + EXPECT_EQ(dateCol->valueAt(0), DATE()->toDays("2023-08-18")); + EXPECT_EQ( + timestampCol->valueAt(0), + util::fromTimestampString( + "2023-08-18 08:12:23.000", util::TimestampParseMode::kPrestoCast) + .value()); + + auto arrayElements = arrayCol->elements()->as>(); + EXPECT_EQ(arrayElements->size(), 3); + EXPECT_EQ(arrayElements->toString(0, 3, ",", false), "aaaa,BBBB,velox"); + + auto mapKeys = mapCol->mapKeys()->as>(); + auto mapValues = mapCol->mapValues()->as>(); + EXPECT_EQ(mapKeys->size(), 2); + EXPECT_EQ(mapKeys->size(), mapValues->size()); + EXPECT_EQ( + mapCol->toString(0, 2, ",", false), + "2 elements starting at 0 {foo => 1, bar => 2}"); + + EXPECT_EQ(structCol->size(), 1); + EXPECT_EQ(structCol->type()->toString(), "ROW<\"\":BIGINT,\"\":DOUBLE>"); + EXPECT_EQ(structCol->toString(0, 2, ",", false), "{1, 2}"); + } +} diff --git a/velox/dwio/dwrf/test/examples/TestOrcFile.testDate1900.orc b/velox/dwio/orc/test/examples/TestOrcFile.testDate1900.orc similarity index 100% rename from velox/dwio/dwrf/test/examples/TestOrcFile.testDate1900.orc rename to velox/dwio/orc/test/examples/TestOrcFile.testDate1900.orc diff --git a/velox/dwio/dwrf/test/examples/TestStringDictionary.testRowIndex.orc b/velox/dwio/orc/test/examples/TestStringDictionary.testRowIndex.orc similarity index 100% rename from velox/dwio/dwrf/test/examples/TestStringDictionary.testRowIndex.orc rename to velox/dwio/orc/test/examples/TestStringDictionary.testRowIndex.orc diff --git a/velox/dwio/dwrf/test/examples/complextypes_iceberg.orc b/velox/dwio/orc/test/examples/complextypes_iceberg.orc similarity index 100% rename from velox/dwio/dwrf/test/examples/complextypes_iceberg.orc rename to velox/dwio/orc/test/examples/complextypes_iceberg.orc diff --git a/velox/dwio/orc/test/examples/orc_all_type.orc b/velox/dwio/orc/test/examples/orc_all_type.orc new file mode 100644 index 00000000000..d7691975fb0 Binary files /dev/null and b/velox/dwio/orc/test/examples/orc_all_type.orc differ diff --git a/velox/dwio/dwrf/test/examples/orc_index_int_string.orc b/velox/dwio/orc/test/examples/orc_index_int_string.orc similarity index 100% rename from velox/dwio/dwrf/test/examples/orc_index_int_string.orc rename to velox/dwio/orc/test/examples/orc_index_int_string.orc