diff --git a/presto-docs/src/main/sphinx/presto_cpp/properties.rst b/presto-docs/src/main/sphinx/presto_cpp/properties.rst index 0fa5d97a0b8f5..d37f09f7a23e9 100644 --- a/presto-docs/src/main/sphinx/presto_cpp/properties.rst +++ b/presto-docs/src/main/sphinx/presto_cpp/properties.rst @@ -443,6 +443,22 @@ avoid exceeding memory limits for the query. only by aborting. This flag is only effective if ``shared-arbitrator.global-arbitration-enabled`` is ``true``. +``text-writer-enabled`` +^^^^^^^^^^^^^^^^^^^^^^^ + +* **Type:** ``boolean`` +* **Default value:** ``true`` + + Enables writing data in ``TEXTFILE`` format. + +``text-reader-enabled`` +^^^^^^^^^^^^^^^^^^^^^^^ + +* **Type:** ``boolean`` +* **Default value:** ``true`` + + Enables reading data in ``TEXTFILE`` format. + Cache Properties ---------------- diff --git a/presto-native-execution/pom.xml b/presto-native-execution/pom.xml index cc652b58f2481..30f44d30d5c93 100644 --- a/presto-native-execution/pom.xml +++ b/presto-native-execution/pom.xml @@ -413,7 +413,7 @@ 1 false - remote-function,textfile_reader + remote-function /root/project/build/debug/presto_cpp/main/presto_server /tmp/velox @@ -465,7 +465,7 @@ org.apache.maven.plugins maven-surefire-plugin - writer,parquet,remote-function,textfile_reader,no_textfile_reader,async_data_cache + writer,parquet,remote-function,textfile,async_data_cache diff --git a/presto-native-execution/presto_cpp/main/CMakeLists.txt b/presto-native-execution/presto_cpp/main/CMakeLists.txt index 4183f1be0bb3f..9ec39df8cff67 100644 --- a/presto-native-execution/presto_cpp/main/CMakeLists.txt +++ b/presto-native-execution/presto_cpp/main/CMakeLists.txt @@ -79,6 +79,7 @@ target_link_libraries( velox_dwio_orc_reader velox_dwio_parquet_reader velox_dwio_parquet_writer + velox_dwio_text_reader_register velox_dwio_text_writer_register velox_dynamic_library_loader velox_encode diff --git a/presto-native-execution/presto_cpp/main/PrestoServer.cpp b/presto-native-execution/presto_cpp/main/PrestoServer.cpp index 0ae7dff06751d..115b0f57f94bc 100644 --- a/presto-native-execution/presto_cpp/main/PrestoServer.cpp +++ b/presto-native-execution/presto_cpp/main/PrestoServer.cpp @@ -62,6 +62,7 @@ #include "velox/dwio/orc/reader/OrcReader.h" #include "velox/dwio/parquet/RegisterParquetReader.h" #include "velox/dwio/parquet/RegisterParquetWriter.h" +#include "velox/dwio/text/RegisterTextReader.h" #include "velox/dwio/text/RegisterTextWriter.h" #include "velox/exec/OutputBufferManager.h" #include "velox/exec/TraceUtil.h" @@ -1452,6 +1453,9 @@ void PrestoServer::registerFileReadersAndWriters() { if (SystemConfig::instance()->textWriterEnabled()) { velox::text::registerTextWriterFactory(); } + if (SystemConfig::instance()->textReaderEnabled()) { + velox::text::registerTextReaderFactory(); + } } void PrestoServer::unregisterFileReadersAndWriters() { @@ -1462,6 +1466,9 @@ void PrestoServer::unregisterFileReadersAndWriters() { if (SystemConfig::instance()->textWriterEnabled()) { velox::text::unregisterTextWriterFactory(); } + if (SystemConfig::instance()->textReaderEnabled()) { + velox::text::unregisterTextReaderFactory(); + } } void PrestoServer::registerStatsCounters() { diff --git a/presto-native-execution/presto_cpp/main/common/Configs.cpp b/presto-native-execution/presto_cpp/main/common/Configs.cpp index 8478b799bb82f..d0a183c86686d 100644 --- a/presto-native-execution/presto_cpp/main/common/Configs.cpp +++ b/presto-native-execution/presto_cpp/main/common/Configs.cpp @@ -281,6 +281,7 @@ SystemConfig::SystemConfig() { NUM_PROP(kHttpSrvIoEvbViolationThresholdMs, 1000), NUM_PROP(kMaxLocalExchangePartitionBufferSize, 65536), BOOL_PROP(kTextWriterEnabled, true), + BOOL_PROP(kTextReaderEnabled, true), BOOL_PROP(kCharNToVarcharImplicitCast, false), BOOL_PROP(kEnumTypesEnabled, true), }; @@ -1046,6 +1047,10 @@ bool SystemConfig::textWriterEnabled() const { return optionalProperty(kTextWriterEnabled).value(); } +bool SystemConfig::textReaderEnabled() const { + return optionalProperty(kTextReaderEnabled).value(); +} + bool SystemConfig::charNToVarcharImplicitCast() const { return optionalProperty(kCharNToVarcharImplicitCast).value(); } diff --git a/presto-native-execution/presto_cpp/main/common/Configs.h b/presto-native-execution/presto_cpp/main/common/Configs.h index 0b3c97d4b8f02..3f59b1968dfd0 100644 --- a/presto-native-execution/presto_cpp/main/common/Configs.h +++ b/presto-native-execution/presto_cpp/main/common/Configs.h @@ -841,6 +841,10 @@ class SystemConfig : public ConfigBase { // TODO: remove once text writer is fully rolled out static constexpr std::string_view kTextWriterEnabled{"text-writer-enabled"}; + // Add to temporarily help with gradual rollout for text reader + // TODO: remove once text reader is fully rolled out + static constexpr std::string_view kTextReaderEnabled{"text-reader-enabled"}; + /// Enable the type char(n) with the same behavior as unbounded varchar. /// char(n) type is not supported by parser when set to false. static constexpr std::string_view kCharNToVarcharImplicitCast{ @@ -1185,6 +1189,8 @@ class SystemConfig : public ConfigBase { bool textWriterEnabled() const; + bool textReaderEnabled() const; + bool charNToVarcharImplicitCast() const; bool enumTypesEnabled() const; diff --git a/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeGeneralQueries.java b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeGeneralQueries.java index 6d0bfceb48c75..fde2a56d6f96a 100644 --- a/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeGeneralQueries.java +++ b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeGeneralQueries.java @@ -413,6 +413,76 @@ public void testDateFilter() } } + @Test(groups = {"textfile"}) + public void testReadTableWithTextfileFormat() + { + assertQuery("SELECT * FROM nation_text"); + + String tmpTableName = generateRandomTableName(); + try { + getExpectedQueryRunner().execute(getSession(), format( + "CREATE TABLE %s (" + + "id BIGINT," + + "name VARCHAR," + + "is_active BOOLEAN," + + "score DOUBLE," + + "created_at TIMESTAMP," + + "tags ARRAY," + + "metrics ARRAY," + + "properties MAP," + + "flags MAP," + + "nested_struct ROW(sub_id INTEGER, sub_name VARCHAR, sub_scores ARRAY, sub_map MAP)," + + "price DECIMAL(15,2)," + + "amount DECIMAL(21,6)," + + "event_date DATE," + + "ds VARCHAR" + + ") WITH (format = 'TEXTFILE', partitioned_by = ARRAY['ds'])", tmpTableName), ImmutableList.of()); + getExpectedQueryRunner().execute(getSession(), format( + "INSERT INTO %s (" + + "id," + + "name," + + "is_active," + + "score," + + "created_at," + + "tags," + + "metrics," + + "properties," + + "flags," + + "nested_struct," + + "price," + + "amount," + + "event_date," + + "ds" + + ") VALUES (" + + "1001," + + "'Jane Doe'," + + "TRUE," + + "88.5," + + "TIMESTAMP '2025-07-23 10:00:00'," + + "ARRAY['alpha', 'beta', 'gamma']," + + "ARRAY[3.14, 2.71, 1.41]," + + "MAP(ARRAY['color', 'size'], ARRAY['blue', 'large'])," + + "MAP(ARRAY[TINYINT '1', TINYINT '2'], ARRAY[TRUE, FALSE])," + + "ROW(" + + "42," + + "'sub_jane'," + + "ARRAY[REAL '1.1', REAL '2.2', REAL '3.3']," + + "MAP(ARRAY[SMALLINT '10', SMALLINT '20'], ARRAY['foo', 'bar'])" + + ")," + + "DECIMAL '12.34'," + + "CAST('-123456789012345.123456' as DECIMAL(21,6))," + + "DATE '2024-02-29'," + + "'2025-07-01'" + + ")", tmpTableName), ImmutableList.of()); + // created_at is skipped because of the inconsistency in TIMESTAMP columns between Presto and Velox. + // https://github.com/facebookincubator/velox/issues/8127 + assertQuery(format("SELECT id, name, is_active, score, tags, metrics, properties, flags, nested_struct, price, amount, event_date, ds FROM %s", tmpTableName)); + } + finally { + dropTableIfExists(tmpTableName); + } + } + @Test public void testOrderBy() { @@ -1269,18 +1339,6 @@ public void testReadTableWithUnsupportedJsonFormat() assertQueryFails("SELECT * FROM nation_json", "(?s).*ReaderFactory is not registered for format json.*"); } - @Test(groups = {"no_textfile_reader"}) - public void testReadTableWithUnsupportedTextfileFormat() - { - assertQueryFails("SELECT * FROM nation_text", "(?s).*ReaderFactory is not registered for format text.*"); - } - - @Test(groups = {"textfile_reader"}) - public void testReadTableWithTextfileFormat() - { - assertQuery("SELECT * FROM nation_text"); - } - private void dropTableIfExists(String tableName) { // An ugly workaround for the lack of getExpectedQueryRunner() diff --git a/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeTpcdsQueries.java b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeTpcdsQueries.java index debe20c99dcc5..f2d6e4e56d48e 100644 --- a/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeTpcdsQueries.java +++ b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeTpcdsQueries.java @@ -32,7 +32,7 @@ public abstract class AbstractTestNativeTpcdsQueries extends AbstractTestQueryFramework { - String storageFormat = "DWRF"; + protected String storageFormat = "DWRF"; Session session; String[] tpcdsTableNames = {"call_center", "catalog_page", "catalog_returns", "catalog_sales", "customer", "customer_address", "customer_demographics", "date_dim", "household_demographics", @@ -90,6 +90,7 @@ private static void createTpcdsCallCenter(QueryRunner queryRunner, Session sessi switch (storageFormat) { case "PARQUET": case "ORC": + case "TEXTFILE": queryRunner.execute(session, "CREATE TABLE call_center AS " + "SELECT * FROM tpcds.tiny.call_center"); break; @@ -158,6 +159,7 @@ private static void createTpcdsDateDim(QueryRunner queryRunner, Session session, switch (storageFormat) { case "PARQUET": case "ORC": + case "TEXTFILE": queryRunner.execute(session, "CREATE TABLE date_dim AS " + "SELECT * FROM tpcds.tiny.date_dim"); break; @@ -202,6 +204,7 @@ private static void createTpcdsItem(QueryRunner queryRunner, Session session, St switch (storageFormat) { case "PARQUET": case "ORC": + case "TEXTFILE": queryRunner.execute(session, "CREATE TABLE item AS " + "SELECT * FROM tpcds.tiny.item"); break; @@ -246,6 +249,7 @@ private static void createTpcdsStore(QueryRunner queryRunner, Session session, S switch (storageFormat) { case "PARQUET": case "ORC": + case "TEXTFILE": queryRunner.execute(session, "CREATE TABLE store AS " + "SELECT * FROM tpcds.tiny.store"); break; @@ -300,6 +304,7 @@ private static void createTpcdsWebPage(QueryRunner queryRunner, Session session, switch (storageFormat) { case "PARQUET": case "ORC": + case "TEXTFILE": queryRunner.execute(session, "CREATE TABLE web_page AS " + "SELECT * FROM tpcds.tiny.web_page"); break; @@ -337,6 +342,7 @@ private static void createTpcdsWebSite(QueryRunner queryRunner, Session session, switch (storageFormat) { case "PARQUET": case "ORC": + case "TEXTFILE": queryRunner.execute(session, "CREATE TABLE web_site AS " + "SELECT * FROM tpcds.tiny.web_site"); break; diff --git a/presto-native-execution/src/test/java/com/facebook/presto/spark/TestPrestoSparkNativeGeneralQueries.java b/presto-native-execution/src/test/java/com/facebook/presto/spark/TestPrestoSparkNativeGeneralQueries.java index 66ac7035fd40c..f0db8601ba10c 100644 --- a/presto-native-execution/src/test/java/com/facebook/presto/spark/TestPrestoSparkNativeGeneralQueries.java +++ b/presto-native-execution/src/test/java/com/facebook/presto/spark/TestPrestoSparkNativeGeneralQueries.java @@ -82,10 +82,6 @@ public void testUnicodeInJson() @Ignore public void testDistributedSortSingleNode() {} - // Disable: Text file reader is not supported. This test is also disabled in pom.xml through disabling groups "textfile_reader". - @Override - public void testReadTableWithTextfileFormat() {} - // Disable: Not supporte by POS @Override @Ignore diff --git a/presto-native-tests/pom.xml b/presto-native-tests/pom.xml index 8788b4c858e4b..4a6c60dcd4530 100644 --- a/presto-native-tests/pom.xml +++ b/presto-native-tests/pom.xml @@ -266,7 +266,7 @@ 1 false - remote-function,textfile_reader + remote-function /root/project/build/debug/presto_cpp/main/presto_server diff --git a/presto-native-tests/src/test/java/com/facebook/presto/nativetests/TestTextReaderWithTpcdsQueriesUsingThrift.java b/presto-native-tests/src/test/java/com/facebook/presto/nativetests/TestTextReaderWithTpcdsQueriesUsingThrift.java new file mode 100644 index 0000000000000..25686e175581c --- /dev/null +++ b/presto-native-tests/src/test/java/com/facebook/presto/nativetests/TestTextReaderWithTpcdsQueriesUsingThrift.java @@ -0,0 +1,47 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.nativetests; + +import com.facebook.presto.nativeworker.AbstractTestNativeTpcdsQueries; +import com.facebook.presto.nativeworker.PrestoNativeQueryRunnerUtils; +import com.facebook.presto.testing.ExpectedQueryRunner; +import com.facebook.presto.testing.QueryRunner; + +public class TestTextReaderWithTpcdsQueriesUsingThrift + extends AbstractTestNativeTpcdsQueries +{ + private static final String TEXTFILE = "TEXTFILE"; + + @Override + protected QueryRunner createQueryRunner() + throws Exception + { + return PrestoNativeQueryRunnerUtils.nativeHiveQueryRunnerBuilder() + .setStorageFormat(TEXTFILE) + .setAddStorageFormatToPath(true) + .setUseThrift(true) + .build(); + } + + @Override + protected ExpectedQueryRunner createExpectedQueryRunner() + throws Exception + { + this.storageFormat = TEXTFILE; + return PrestoNativeQueryRunnerUtils.javaHiveQueryRunnerBuilder() + .setStorageFormat(this.storageFormat) + .setAddStorageFormatToPath(true) + .build(); + } +} diff --git a/presto-native-tests/src/test/java/com/facebook/presto/nativetests/TestTextReaderWithTpchQueriesUsingJSON.java b/presto-native-tests/src/test/java/com/facebook/presto/nativetests/TestTextReaderWithTpchQueriesUsingJSON.java new file mode 100644 index 0000000000000..46ca777038f64 --- /dev/null +++ b/presto-native-tests/src/test/java/com/facebook/presto/nativetests/TestTextReaderWithTpchQueriesUsingJSON.java @@ -0,0 +1,43 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.nativetests; + +import com.facebook.presto.nativeworker.AbstractTestNativeTpchQueries; +import com.facebook.presto.nativeworker.PrestoNativeQueryRunnerUtils; +import com.facebook.presto.testing.ExpectedQueryRunner; +import com.facebook.presto.testing.QueryRunner; + +public class TestTextReaderWithTpchQueriesUsingJSON + extends AbstractTestNativeTpchQueries +{ + private static final String TEXTFILE = "TEXTFILE"; + + @Override + protected QueryRunner createQueryRunner() throws Exception + { + return PrestoNativeQueryRunnerUtils.nativeHiveQueryRunnerBuilder() + .setStorageFormat(TEXTFILE) + .setAddStorageFormatToPath(true) + .build(); + } + + @Override + protected ExpectedQueryRunner createExpectedQueryRunner() throws Exception + { + return PrestoNativeQueryRunnerUtils.javaHiveQueryRunnerBuilder() + .setStorageFormat(TEXTFILE) + .setAddStorageFormatToPath(true) + .build(); + } +}