diff --git a/presto-native-execution/presto_cpp/main/types/CMakeLists.txt b/presto-native-execution/presto_cpp/main/types/CMakeLists.txt index f26b52058711e..f189f952e3428 100644 --- a/presto-native-execution/presto_cpp/main/types/CMakeLists.txt +++ b/presto-native-execution/presto_cpp/main/types/CMakeLists.txt @@ -19,9 +19,9 @@ target_link_libraries(presto_type_converter velox_type) add_library(presto_types OBJECT PrestoToVeloxQueryPlan.cpp PrestoToVeloxExpr.cpp PrestoToVeloxSplit.cpp) add_dependencies(presto_types presto_operators presto_type_converter velox_type - velox_dwio_dwrf_proto) + velox_type_fbhive velox_dwio_dwrf_proto) -target_link_libraries(presto_types presto_type_converter +target_link_libraries(presto_types presto_type_converter velox_type_fbhive velox_hive_partition_function velox_tpch_gen) if(PRESTO_ENABLE_TESTING) diff --git a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxQueryPlan.cpp b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxQueryPlan.cpp index 620ade7b874f8..a510fbd859563 100644 --- a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxQueryPlan.cpp +++ b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxQueryPlan.cpp @@ -743,12 +743,6 @@ std::shared_ptr toConnectorTableHandle( if (auto hiveLayout = std::dynamic_pointer_cast( tableHandle.connectorTableLayout)) { - VELOX_USER_CHECK( - hiveLayout->pushdownFilterEnabled, - "Table scan with filter pushdown disabled is not supported (or possibly the file type is not supported yet): {}.{}", - hiveLayout->schemaTableName.schema, - hiveLayout->schemaTableName.table); - for (const auto& entry : hiveLayout->partitionColumns) { partitionColumns.emplace(entry.name, toColumnHandle(&entry)); } @@ -773,6 +767,20 @@ std::shared_ptr toConnectorTableHandle( remainingFilter = nullptr; } + RowTypePtr dataColumns; + if (!hiveLayout->dataColumns.empty()) { + std::vector names; + std::vector types; + velox::type::fbhive::HiveTypeParser typeParser; + names.reserve(hiveLayout->dataColumns.size()); + types.reserve(hiveLayout->dataColumns.size()); + for (auto& column : hiveLayout->dataColumns) { + names.push_back(column.name); + types.push_back(typeParser.parse(column.type)); + } + dataColumns = ROW(std::move(names), std::move(types)); + } + auto hiveTableHandle = std::dynamic_pointer_cast( tableHandle.connectorHandle); @@ -787,9 +795,10 @@ std::shared_ptr toConnectorTableHandle( return std::make_shared( tableHandle.connectorId, tableName, - true, + hiveLayout->pushdownFilterEnabled, std::move(subfieldFilters), - remainingFilter); + remainingFilter, + dataColumns); } if (auto tpchLayout = diff --git a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxSplit.cpp b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxSplit.cpp index f756159b173f4..b4fd22a82fff3 100644 --- a/presto-native-execution/presto_cpp/main/types/PrestoToVeloxSplit.cpp +++ b/presto-native-execution/presto_cpp/main/types/PrestoToVeloxSplit.cpp @@ -24,16 +24,24 @@ namespace facebook::presto { namespace { dwio::common::FileFormat toVeloxFileFormat( - const facebook::presto::protocol::String& format) { - if (format == "com.facebook.hive.orc.OrcInputFormat") { + const presto::protocol::StorageFormat& format) { + if (format.inputFormat == "com.facebook.hive.orc.OrcInputFormat") { return dwio::common::FileFormat::DWRF; } else if ( - format == + format.inputFormat == "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat") { return dwio::common::FileFormat::PARQUET; - } else { - VELOX_FAIL("Unknown file format {}", format); + } else if (format.inputFormat == "org.apache.hadoop.mapred.TextInputFormat") { + if (format.serDe == "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe") { + return dwio::common::FileFormat::TEXT; + } else if (format.serDe == "org.apache.hive.hcatalog.data.JsonSerDe") { + return dwio::common::FileFormat::JSON; + } + } else if (format.inputFormat == "com.facebook.alpha.AlphaInputFormat") { + return dwio::common::FileFormat::ALPHA; } + VELOX_UNSUPPORTED( + "Unsupported file format: {} {}", format.inputFormat, format.serDe); } } // anonymous namespace @@ -66,7 +74,7 @@ velox::exec::Split toVeloxSplit( std::make_shared( scheduledSplit.split.connectorId, hiveSplit->fileSplit.path, - toVeloxFileFormat(hiveSplit->storage.storageFormat.inputFormat), + toVeloxFileFormat(hiveSplit->storage.storageFormat), hiveSplit->fileSplit.start, hiveSplit->fileSplit.length, partitionKeys, diff --git a/presto-native-execution/presto_cpp/main/types/tests/PlanConverterTest.cpp b/presto-native-execution/presto_cpp/main/types/tests/PlanConverterTest.cpp index cc35738aabbda..a8767f52e5add 100644 --- a/presto-native-execution/presto_cpp/main/types/tests/PlanConverterTest.cpp +++ b/presto-native-execution/presto_cpp/main/types/tests/PlanConverterTest.cpp @@ -111,6 +111,13 @@ TEST_F(PlanConverterTest, scanAgg) { ASSERT_EQ(requiredSubfields[0].toString(), "complex_type[1][\"foo\"].id"); ASSERT_EQ(requiredSubfields[1].toString(), "complex_type[2][\"bar\"].id"); + auto* tableHandle = dynamic_cast( + tableScan->tableHandle().get()); + ASSERT_TRUE(tableHandle); + ASSERT_EQ( + tableHandle->dataColumns()->toString(), + "ROW>>,comment:VARCHAR>"); + protocol::registerConnector("hive-plus", "hive"); assertToVeloxQueryPlan("ScanAggCustomConnectorId.json"); } diff --git a/presto-native-execution/presto_cpp/main/types/tests/data/ScanAgg.json b/presto-native-execution/presto_cpp/main/types/tests/data/ScanAgg.json index 1f1313e68d80f..cdda8bdb383d2 100644 --- a/presto-native-execution/presto_cpp/main/types/tests/data/ScanAgg.json +++ b/presto-native-execution/presto_cpp/main/types/tests/data/ScanAgg.json @@ -45,7 +45,7 @@ }, { "name":"complex_type", - "type":"array(map(varchar, row(id bigint, description varchar)))" + "type":"array>>" }, { "name":"comment", diff --git a/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeGeneralQueries.java b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeGeneralQueries.java index 6d9fdd2d5f019..48f4bf1f54c8b 100644 --- a/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeGeneralQueries.java +++ b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeGeneralQueries.java @@ -866,6 +866,13 @@ public void testCreateTableWithUnsupportedFormats() } } + @Test + public void testReadTableWithUnsupportedFormats() + { + assertQueryFails("SELECT * FROM nation_json", ".*ReaderFactory is not registered for format json.*"); + assertQueryFails("SELECT * FROM nation_text", ".*ReaderFactory is not registered for format text.*"); + } + @Test public void testCreateUnpartitionedTableAsSelect() { diff --git a/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/NativeQueryRunnerUtils.java b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/NativeQueryRunnerUtils.java index 5b3937ab60e11..5847294f00f18 100644 --- a/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/NativeQueryRunnerUtils.java +++ b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/NativeQueryRunnerUtils.java @@ -118,6 +118,12 @@ public static void createNation(QueryRunner queryRunner) if (!queryRunner.tableExists(queryRunner.getDefaultSession(), "nation")) { queryRunner.execute("CREATE TABLE nation AS SELECT * FROM tpch.tiny.nation"); } + if (!queryRunner.tableExists(queryRunner.getDefaultSession(), "nation_json")) { + queryRunner.execute("CREATE TABLE nation_json WITH (FORMAT = 'JSON') AS SELECT * FROM tpch.tiny.nation"); + } + if (!queryRunner.tableExists(queryRunner.getDefaultSession(), "nation_text")) { + queryRunner.execute("CREATE TABLE nation_text WITH (FORMAT = 'TEXTFILE') AS SELECT * FROM tpch.tiny.nation"); + } } public static void createPartitionedNation(QueryRunner queryRunner)