From 06f7d7e1e5345552268a9b64f097ccd9195dc2e7 Mon Sep 17 00:00:00 2001 From: Xin Zhang Date: Thu, 19 Feb 2026 00:12:16 +0000 Subject: [PATCH] feat: Add textfile custom serde parameters support --- .../src/main/sphinx/connector/hive.rst | 29 +++- .../facebook/presto/hive/HiveMetadata.java | 70 +++++++- .../presto/hive/HiveTableProperties.java | 19 ++- .../hive/TestHiveIntegrationSmokeTest.java | 151 ++++++++++++++++++ .../AbstractTestNativeGeneralQueries.java | 146 ++++++++++------- 5 files changed, 350 insertions(+), 65 deletions(-) diff --git a/presto-docs/src/main/sphinx/connector/hive.rst b/presto-docs/src/main/sphinx/connector/hive.rst index ed343ab75f72e..24295c860e2de 100644 --- a/presto-docs/src/main/sphinx/connector/hive.rst +++ b/presto-docs/src/main/sphinx/connector/hive.rst @@ -291,6 +291,33 @@ Add the ``metastore.storage.schema.reader.impl`` property to ``hive-site.xml`` w You must restart the metastore service for this configuration to take effect. This setting allows the metastore to read storage schemas for Avro tables and avoids ``Storage schema reading not supported`` errors. +Textfile Configuration Properties +--------------------------------- + +Table Properties +^^^^^^^^^^^^^^^^ + +These properties can be used when creating TEXTFILE tables in Presto: + +======================================================== ============================================================================== ============================= +Property Name Description Default +======================================================== ============================================================================== ============================= +``textfile_field_delim`` A custom single-character delimiter to separate fields. NONE + +``textfile_escape_delim`` A custom single-character delimiter to escape characters. NONE + +``textfile_collection_delim`` A custom single-character delimiter to separate collection elements. NONE + +``textfile_mapkey_delim`` A custom single-character delimiter to separate map keys. NONE + +======================================================== ============================================================================== ============================= + +.. note:: +These properties are mapped to the corresponding properties in Hive ``LazySerDeParameters`` during serialization and +follow the same behaviors with ``LazySimpleSerDe``. +If they are not defined, the Hive defaults are used, which are typically ``\001`` for field delimiter, ``\002`` for +collection delimiter, ``\003`` for map key delimiter, and escape character is disabled. + Metastore Configuration Properties ---------------------------------- @@ -1308,4 +1335,4 @@ Example:: CAST(id AS BIGINT) AS id, CAST(value AS INT) AS value, CAST(date_col AS DATE) AS date_col - FROM hive.csv.csv_data; \ No newline at end of file + FROM hive.csv.csv_data; diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveMetadata.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveMetadata.java index f9cab97400b3e..f806e1e696eaa 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveMetadata.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveMetadata.java @@ -261,9 +261,12 @@ import static com.facebook.presto.hive.HiveTableProperties.SKIP_HEADER_LINE_COUNT; import static com.facebook.presto.hive.HiveTableProperties.SORTED_BY_PROPERTY; import static com.facebook.presto.hive.HiveTableProperties.STORAGE_FORMAT_PROPERTY; +import static com.facebook.presto.hive.HiveTableProperties.TEXTFILE_COLLECTION_DELIM; +import static com.facebook.presto.hive.HiveTableProperties.TEXTFILE_ESCAPE_DELIM; +import static com.facebook.presto.hive.HiveTableProperties.TEXTFILE_FIELD_DELIM; +import static com.facebook.presto.hive.HiveTableProperties.TEXTFILE_MAPKEY_DELIM; import static com.facebook.presto.hive.HiveTableProperties.getAvroSchemaUrl; import static com.facebook.presto.hive.HiveTableProperties.getBucketProperty; -import static com.facebook.presto.hive.HiveTableProperties.getCsvProperty; import static com.facebook.presto.hive.HiveTableProperties.getDwrfEncryptionAlgorithm; import static com.facebook.presto.hive.HiveTableProperties.getDwrfEncryptionProvider; import static com.facebook.presto.hive.HiveTableProperties.getEncryptColumns; @@ -276,6 +279,7 @@ import static com.facebook.presto.hive.HiveTableProperties.getOrcBloomFilterFpp; import static com.facebook.presto.hive.HiveTableProperties.getPartitionedBy; import static com.facebook.presto.hive.HiveTableProperties.getPreferredOrderingColumns; +import static com.facebook.presto.hive.HiveTableProperties.getSingleCharacterProperty; import static com.facebook.presto.hive.HiveTableProperties.isExternalTable; import static com.facebook.presto.hive.HiveType.HIVE_BINARY; import static com.facebook.presto.hive.HiveType.toHiveType; @@ -418,6 +422,17 @@ public class HiveMetadata private static final String CSV_QUOTE_KEY = OpenCSVSerde.QUOTECHAR; private static final String CSV_ESCAPE_KEY = OpenCSVSerde.ESCAPECHAR; + private static final String TEXTFILE_FIELD_DELIM_KEY = "field.delim"; + private static final String TEXTFILE_ESCAPE_DELIM_KEY = "escape.delim"; + private static final String TEXTFILE_COLLECTION_DELIM_KEY = "collection.delim"; + private static final String TEXTFILE_MAPKEY_DELIM_KEY = "mapkey.delim"; + + private static final Set TEXTFILE_SERDE_KEYS = ImmutableSet.of( + TEXTFILE_FIELD_DELIM_KEY, + TEXTFILE_ESCAPE_DELIM_KEY, + TEXTFILE_COLLECTION_DELIM_KEY, + TEXTFILE_MAPKEY_DELIM_KEY); + public static final String SKIP_HEADER_COUNT_KEY = "skip.header.line.count"; public static final String SKIP_FOOTER_COUNT_KEY = "skip.footer.line.count"; @@ -768,6 +783,16 @@ private ConnectorTableMetadata getTableMetadata(Optional table, SchemaTab getSerdeProperty(table.get(), SKIP_FOOTER_COUNT_KEY) .ifPresent(skipFooterCount -> properties.put(SKIP_FOOTER_LINE_COUNT, Integer.valueOf(skipFooterCount))); + // Textfile specific properties + getSerdeProperty(table.get(), TEXTFILE_FIELD_DELIM_KEY) + .ifPresent(fieldDelim -> properties.put(TEXTFILE_FIELD_DELIM, fieldDelim)); + getSerdeProperty(table.get(), TEXTFILE_ESCAPE_DELIM_KEY) + .ifPresent(escapeDelim -> properties.put(TEXTFILE_ESCAPE_DELIM, escapeDelim)); + getSerdeProperty(table.get(), TEXTFILE_COLLECTION_DELIM_KEY) + .ifPresent(textCollectionDelim -> properties.put(TEXTFILE_COLLECTION_DELIM, textCollectionDelim)); + getSerdeProperty(table.get(), TEXTFILE_MAPKEY_DELIM_KEY) + .ifPresent(textMapKeyDelim -> properties.put(TEXTFILE_MAPKEY_DELIM, textMapKeyDelim)); + // CSV specific property getCsvSerdeProperty(table.get(), CSV_SEPARATOR_KEY) .ifPresent(csvSeparator -> properties.put(CSV_SEPARATOR, csvSeparator)); @@ -1335,22 +1360,45 @@ private Map getEmptyTableProperties( }); // CSV specific properties - getCsvProperty(tableMetadata.getProperties(), CSV_ESCAPE) + getSingleCharacterProperty(tableMetadata.getProperties(), CSV_ESCAPE) .ifPresent(escape -> { checkFormatForProperty(hiveStorageFormat, CSV, CSV_ESCAPE); tableProperties.put(CSV_ESCAPE_KEY, escape.toString()); }); - getCsvProperty(tableMetadata.getProperties(), CSV_QUOTE) + getSingleCharacterProperty(tableMetadata.getProperties(), CSV_QUOTE) .ifPresent(quote -> { checkFormatForProperty(hiveStorageFormat, CSV, CSV_QUOTE); tableProperties.put(CSV_QUOTE_KEY, quote.toString()); }); - getCsvProperty(tableMetadata.getProperties(), CSV_SEPARATOR) + getSingleCharacterProperty(tableMetadata.getProperties(), CSV_SEPARATOR) .ifPresent(separator -> { checkFormatForProperty(hiveStorageFormat, CSV, CSV_SEPARATOR); tableProperties.put(CSV_SEPARATOR_KEY, separator.toString()); }); + // TEXT specific properties + getSingleCharacterProperty(tableMetadata.getProperties(), TEXTFILE_FIELD_DELIM) + .ifPresent(fieldDelim -> { + checkFormatForProperty(hiveStorageFormat, TEXTFILE, TEXTFILE_FIELD_DELIM_KEY); + tableProperties.put(TEXTFILE_FIELD_DELIM_KEY, fieldDelim.toString()); + }); + getSingleCharacterProperty(tableMetadata.getProperties(), TEXTFILE_ESCAPE_DELIM) + .ifPresent(escapeDelim -> { + checkFormatForProperty(hiveStorageFormat, TEXTFILE, TEXTFILE_ESCAPE_DELIM_KEY); + tableProperties.put(TEXTFILE_ESCAPE_DELIM_KEY, escapeDelim.toString()); + }); + getSingleCharacterProperty(tableMetadata.getProperties(), TEXTFILE_COLLECTION_DELIM) + .ifPresent(collectionDelim -> { + checkFormatForProperty(hiveStorageFormat, TEXTFILE, TEXTFILE_COLLECTION_DELIM_KEY); + tableProperties.put(TEXTFILE_COLLECTION_DELIM_KEY, collectionDelim.toString()); + }); + + getSingleCharacterProperty(tableMetadata.getProperties(), TEXTFILE_MAPKEY_DELIM) + .ifPresent(mapKeyDelim -> { + checkFormatForProperty(hiveStorageFormat, TEXTFILE, TEXTFILE_MAPKEY_DELIM_KEY); + tableProperties.put(TEXTFILE_MAPKEY_DELIM_KEY, mapKeyDelim.toString()); + }); + // Table comment property tableMetadata.getComment().ifPresent(value -> tableProperties.put(TABLE_COMMENT, value)); @@ -1461,10 +1509,14 @@ private static Table buildTableObject( } } + Map serdeParameters = extractSerdeParameters(additionalTableParameters); + ImmutableMap.Builder tableParameters = ImmutableMap.builder() .put(PRESTO_VERSION_NAME, prestoVersion) .put(PRESTO_QUERY_ID_NAME, queryId) - .putAll(additionalTableParameters); + .putAll(additionalTableParameters.entrySet().stream() + .filter(entry -> !serdeParameters.containsKey(entry.getKey())) + .collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue))); if (tableType.equals(EXTERNAL_TABLE)) { tableParameters.put("EXTERNAL", "TRUE"); @@ -1483,6 +1535,7 @@ private static Table buildTableObject( .setStorageFormat(fromHiveStorageFormat(hiveStorageFormat)) .setBucketProperty(bucketProperty) .setParameters(ImmutableMap.of(PREFERRED_ORDERING_COLUMNS, encodePreferredOrderingColumns(preferredOrderingColumns))) + .setSerdeParameters(serdeParameters) .setLocation(targetPath.toString()); return tableBuilder.build(); @@ -3509,6 +3562,13 @@ private static HiveStorageFormat extractHiveStorageFormat(Table table) throw new PrestoException(HIVE_UNSUPPORTED_FORMAT, format("Output format %s with SerDe %s is not supported", outputFormat, serde)); } + private static Map extractSerdeParameters(Map tableParameters) + { + return tableParameters.entrySet().stream() + .filter(entry -> TEXTFILE_SERDE_KEYS.contains(entry.getKey())) + .collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue)); + } + @VisibleForTesting static String encodePreferredOrderingColumns(List preferredOrderingColumns) { diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/HiveTableProperties.java b/presto-hive/src/main/java/com/facebook/presto/hive/HiveTableProperties.java index 5c7eb8153a034..6a2d96ea4914a 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/HiveTableProperties.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/HiveTableProperties.java @@ -57,6 +57,11 @@ public class HiveTableProperties public static final String CSV_SEPARATOR = "csv_separator"; public static final String CSV_QUOTE = "csv_quote"; public static final String CSV_ESCAPE = "csv_escape"; + public static final String TEXTFILE_FIELD_DELIM = "textfile_field_delim"; + public static final String TEXTFILE_MAPKEY_DELIM = "textfile_mapkey_delim"; + public static final String TEXTFILE_COLLECTION_DELIM = "textfile_collection_delim"; + public static final String TEXTFILE_ESCAPE_DELIM = "textfile_escape_delim"; + public static final String SKIP_HEADER_LINE_COUNT = "skip_header_line_count"; public static final String SKIP_FOOTER_LINE_COUNT = "skip_footer_line_count"; @@ -157,6 +162,10 @@ public HiveTableProperties(TypeManager typeManager, HiveClientConfig config) stringProperty(CSV_SEPARATOR, "CSV separator character", null, false), stringProperty(CSV_QUOTE, "CSV quote character", null, false), stringProperty(CSV_ESCAPE, "CSV escape character", null, false), + stringProperty(TEXTFILE_FIELD_DELIM, "Textfile field delimiter character", null, false), + stringProperty(TEXTFILE_ESCAPE_DELIM, "Textfile escape delimiter character", null, false), + stringProperty(TEXTFILE_COLLECTION_DELIM, "Textfile collection delimiter character", null, false), + stringProperty(TEXTFILE_MAPKEY_DELIM, "Textfile map key delimiter character", null, false), integerProperty(SKIP_HEADER_LINE_COUNT, "Number of header lines", null, false), integerProperty(SKIP_FOOTER_LINE_COUNT, "Number of footer lines", null, false), new PropertyMetadata<>( @@ -248,17 +257,17 @@ public static Double getOrcBloomFilterFpp(Map tableProperties) return (Double) tableProperties.get(ORC_BLOOM_FILTER_FPP); } - public static Optional getCsvProperty(Map tableProperties, String key) + public static Optional getSingleCharacterProperty(Map tableProperties, String key) { Object value = tableProperties.get(key); if (value == null) { return Optional.empty(); } - String csvValue = (String) value; - if (csvValue.length() != 1) { - throw new PrestoException(INVALID_TABLE_PROPERTY, format("%s must be a single character string, but was: '%s'", key, csvValue)); + String stringValue = (String) value; + if (stringValue.length() != 1) { + throw new PrestoException(INVALID_TABLE_PROPERTY, format("%s must be a single character string, but was: '%s'", key, stringValue)); } - return Optional.of(csvValue.charAt(0)); + return Optional.of(stringValue.charAt(0)); } @SuppressWarnings("unchecked") diff --git a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveIntegrationSmokeTest.java b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveIntegrationSmokeTest.java index fe8e7f6a225e6..f9f2c82a39075 100644 --- a/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveIntegrationSmokeTest.java +++ b/presto-hive/src/test/java/com/facebook/presto/hive/TestHiveIntegrationSmokeTest.java @@ -7090,6 +7090,157 @@ public void testInsertTableWithHeaderAndFooterForCsv() assertUpdate("DROP TABLE csv_table_skip_header"); } + @Test + public void testSerdeParametersForTextfileRead() + throws Exception + { + File tempDir = createTempDir(); + File dataFile = new File(tempDir, "custom-delim.txt"); + Files.write( + "1001" + + "|he\u0001|llo" + + "|true" + + "|88.5" + + "|alpha;beta;gamma" + + "|size:large;color:blue" + + "|42;1.1:2.2:3.3;20\u0004bar:10\u0004foo\n", dataFile, UTF_8); + + String catalog = getSession().getCatalog().get(); + String schema = getSession().getSchema().get(); + String table = "test_textfile_custom_delim"; + String path = new Path(tempDir.toURI().toASCIIString()).toString(); + + String createTableWithCustomSerdeFormat = + "CREATE TABLE %s.%s.%s (\n" + + " %s bigint,\n" + + " %s varchar,\n" + + " %s boolean,\n" + + " %s double,\n" + + " %s array(varchar),\n" + + " %s map(varchar, varchar),\n" + + " %s row(%s integer, %s array(real), %s map(smallint, varchar))\n" + + ")\n" + + "WITH (\n" + + " external_location = '%s',\n" + + " format = 'TEXTFILE',\n" + + " textfile_collection_delim = ';',\n" + + " textfile_escape_delim = %s,\n" + + " textfile_field_delim = '|',\n" + + " textfile_mapkey_delim = ':'\n" + + ")"; + + @Language("SQL") String createTableSql = format( + createTableWithCustomSerdeFormat, + catalog, schema, table, + "c1", "c2", "c3", "c4", "c5", "c6", "c7", + "s_int", "s_arr", "s_map", + path, + "'\u0001'"); + + String expectedCreateTableSql = format( + createTableWithCustomSerdeFormat, + catalog, schema, table, + "\"c1\"", "\"c2\"", "\"c3\"", "\"c4\"", "\"c5\"", "\"c6\"", "\"c7\"", + "\"s_int\"", "\"s_arr\"", "\"s_map\"", + path, + "U&'\\0001'"); + + try { + assertUpdate(createTableSql); + + MaterializedResult actualCreateTableSql = computeActual(format("SHOW CREATE TABLE %s.%s.%s", catalog, schema, table)); + assertEquals(actualCreateTableSql.getOnlyValue(), expectedCreateTableSql); + + assertQuery( + format( + "SELECT\n" + + "c1, c2, c3, c4, c5, \n" + + "element_at(c6, 'size'), element_at(c6, 'color'), \n" + + "c7.s_arr, element_at(c7.s_map, 10), element_at(c7.s_map, 20) FROM %s.%s.%s", catalog, schema, table), + "VALUES(" + + "1001, 'he|llo', true, 88.5, \n" + + "ARRAY['alpha', 'beta', 'gamma'], \n" + + "'large', 'blue', \n" + + "ARRAY[CAST(1.1 AS REAL), CAST(2.2 AS REAL), CAST(3.3 AS REAL)], 'foo', 'bar')"); + } + finally { + assertUpdate(format("DROP TABLE IF EXISTS %s.%s.%s", catalog, schema, table)); + deleteRecursively(tempDir.toPath(), ALLOW_INSECURE); + } + } + + @Test + public void testSerdeParametersForTextfileWrite() + { + String catalog = getSession().getCatalog().get(); + String schema = getSession().getSchema().get(); + String table = "test_textfile_custom_delim"; + + String createTableWithCustomSerdeFormat = + "CREATE TABLE %s.%s.%s (\n" + + " %s bigint,\n" + + " %s varchar,\n" + + " %s boolean,\n" + + " %s double,\n" + + " %s array(varchar),\n" + + " %s map(varchar, varchar),\n" + + " %s row(%s integer, %s array(real), %s map(smallint, varchar))\n" + + ")\n" + + "WITH (\n" + + " format = 'TEXTFILE',\n" + + " textfile_collection_delim = ';',\n" + + " textfile_escape_delim = %s,\n" + + " textfile_field_delim = '|',\n" + + " textfile_mapkey_delim = ':'\n" + + ")"; + + @Language("SQL") String createTableSql = format( + createTableWithCustomSerdeFormat, + catalog, schema, table, + "c1", "c2", "c3", "c4", "c5", "c6", "c7", + "s_int", "s_arr", "s_map", + "'\u0001'"); + + String expectedCreateTableSql = format( + createTableWithCustomSerdeFormat, + catalog, schema, table, + "\"c1\"", "\"c2\"", "\"c3\"", "\"c4\"", "\"c5\"", "\"c6\"", "\"c7\"", + "\"s_int\"", "\"s_arr\"", "\"s_map\"", + "U&'\\0001'"); + + try { + assertUpdate(createTableSql); + + MaterializedResult actualCreateTableSql = computeActual(format("SHOW CREATE TABLE %s.%s.%s", catalog, schema, table)); + assertEquals(actualCreateTableSql.getOnlyValue(), expectedCreateTableSql); + + assertUpdate(format( + "INSERT INTO %s.%s.%s VALUES (" + + "1001, " + + "'he|llo', " + + "true, " + + "88.5, " + + "ARRAY['alpha','beta', 'gamma'], " + + "MAP(ARRAY['size', 'color'], ARRAY['large', 'blue']), " + + "ROW(42, ARRAY[REAL '1.1', REAL '2.2',REAL '3.3'], MAP(ARRAY[SMALLINT '10', SMALLINT '20'], ARRAY['foo', 'bar'])))", catalog, schema, table), 1); + + assertQuery( + format( + "SELECT\n" + + "c1, c2, c3, c4, c5, \n" + + "element_at(c6, 'size'), element_at(c6, 'color'), \n" + + "c7.s_arr, element_at(c7.s_map, 10), element_at(c7.s_map, 20) FROM %s.%s.%s", catalog, schema, table), + "VALUES(" + + "1001, 'he|llo', true, 88.5, \n" + + "ARRAY['alpha', 'beta', 'gamma'], \n" + + "'large', 'blue', \n" + + "ARRAY[CAST(1.1 AS REAL), CAST(2.2 AS REAL), CAST(3.3 AS REAL)], 'foo', 'bar')"); + } + finally { + assertUpdate(format("DROP TABLE IF EXISTS %s.%s.%s", catalog, schema, table)); + } + } + protected String retentionDays(int days) { return ""; diff --git a/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeGeneralQueries.java b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeGeneralQueries.java index c273eb0cc13ba..e20f7a84b3876 100644 --- a/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeGeneralQueries.java +++ b/presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeGeneralQueries.java @@ -420,60 +420,10 @@ public void testReadTableWithTextfileFormat() String tmpTableName = generateRandomTableName(); try { - getExpectedQueryRunner().execute(getSession(), format( - "CREATE TABLE %s (" + - "id BIGINT," + - "name VARCHAR," + - "is_active BOOLEAN," + - "score DOUBLE," + - "created_at TIMESTAMP," + - "tags ARRAY," + - "metrics ARRAY," + - "properties MAP," + - "flags MAP," + - "nested_struct ROW(sub_id INTEGER, sub_name VARCHAR, sub_scores ARRAY, sub_map MAP)," + - "price DECIMAL(15,2)," + - "amount DECIMAL(21,6)," + - "event_date DATE," + - "ds VARCHAR" + - ") WITH (format = 'TEXTFILE', partitioned_by = ARRAY['ds'])", tmpTableName), ImmutableList.of(BIGINT)); - getExpectedQueryRunner().execute(getSession(), format( - "INSERT INTO %s (" + - "id," + - "name," + - "is_active," + - "score," + - "created_at," + - "tags," + - "metrics," + - "properties," + - "flags," + - "nested_struct," + - "price," + - "amount," + - "event_date," + - "ds" + - ") VALUES (" + - "1001," + - "'Jane Doe'," + - "TRUE," + - "88.5," + - "TIMESTAMP '2025-07-23 10:00:00'," + - "ARRAY['alpha', 'beta', 'gamma']," + - "ARRAY[3.14, 2.71, 1.41]," + - "MAP(ARRAY['color', 'size'], ARRAY['blue', 'large'])," + - "MAP(ARRAY[TINYINT '1', TINYINT '2'], ARRAY[TRUE, FALSE])," + - "ROW(" + - "42," + - "'sub_jane'," + - "ARRAY[REAL '1.1', REAL '2.2', REAL '3.3']," + - "MAP(ARRAY[SMALLINT '10', SMALLINT '20'], ARRAY['foo', 'bar'])" + - ")," + - "DECIMAL '12.34'," + - "CAST('-123456789012345.123456' as DECIMAL(21,6))," + - "DATE '2024-02-29'," + - "'2025-07-01'" + - ")", tmpTableName), ImmutableList.of(BIGINT)); + getExpectedQueryRunner().execute(getSession(), + createTextFileTableSql(tmpTableName, ImmutableList.of()), + ImmutableList.of(BIGINT)); + getExpectedQueryRunner().execute(getSession(), insertTextFileTableSql(tmpTableName), ImmutableList.of(BIGINT)); // created_at is skipped because of the inconsistency in TIMESTAMP columns between Presto and Velox. // https://github.com/facebookincubator/velox/issues/8127 assertQuery(format("SELECT id, name, is_active, score, tags, metrics, properties, flags, nested_struct, price, amount, event_date, ds FROM %s", tmpTableName)); @@ -483,6 +433,94 @@ public void testReadTableWithTextfileFormat() } } + @Test(groups = {"textfile"}) + public void testReadTableWithCustomSerdeTextfile() + { + String tmpTableName = generateRandomTableName(); + List serdeParams = ImmutableList.of( + "textfile_field_delim='|'", + "textfile_escape_delim='\u0001'", + "textfile_collection_delim=';'", + "textfile_mapkey_delim=':'"); + try { + getExpectedQueryRunner().execute(getSession(), + createTextFileTableSql(tmpTableName, serdeParams), + ImmutableList.of(BIGINT)); + getExpectedQueryRunner().execute(getSession(), insertTextFileTableSql(tmpTableName), ImmutableList.of(BIGINT)); + // created_at is skipped because of the inconsistency in TIMESTAMP columns between Presto and Velox. + // https://github.com/facebookincubator/velox/issues/8127 + assertQuery(format("SELECT id, name, is_active, score, tags, metrics, properties, flags, nested_struct, price, amount, event_date, ds FROM %s", tmpTableName)); + } + finally { + dropTableIfExists(tmpTableName); + } + } + + private String createTextFileTableSql(String tableName, List serdeParams) + { + String serde = serdeParams.isEmpty() ? "" : ", " + String.join(", ", serdeParams); + return format( + "CREATE TABLE %s (" + + "id BIGINT," + + "name VARCHAR," + + "is_active BOOLEAN," + + "score DOUBLE," + + "created_at TIMESTAMP," + + "tags ARRAY," + + "metrics ARRAY," + + "properties MAP," + + "flags MAP," + + "nested_struct ROW(sub_id INTEGER, sub_name VARCHAR, sub_scores ARRAY, sub_map MAP)," + + "price DECIMAL(15,2)," + + "amount DECIMAL(21,6)," + + "event_date DATE," + + "ds VARCHAR" + + ") WITH (format = 'TEXTFILE'%s, partitioned_by = ARRAY['ds'])", + tableName, + serde); + } + + private String insertTextFileTableSql(String tableName) + { + return format( + "INSERT INTO %s (" + + "id," + + "name," + + "is_active," + + "score," + + "created_at," + + "tags," + + "metrics," + + "properties," + + "flags," + + "nested_struct," + + "price," + + "amount," + + "event_date," + + "ds" + + ") VALUES (" + + "1001," + + "'Jane Doe'," + + "TRUE," + + "88.5," + + "TIMESTAMP '2025-07-23 10:00:00'," + + "ARRAY['alpha', 'beta', 'gamma']," + + "ARRAY[3.14, 2.71, 1.41]," + + "MAP(ARRAY['color', 'size'], ARRAY['blue', 'large'])," + + "MAP(ARRAY[TINYINT '1', TINYINT '2'], ARRAY[TRUE, FALSE])," + + "ROW(" + + "42," + + "'sub_jane'," + + "ARRAY[REAL '1.1', REAL '2.2', REAL '3.3']," + + "MAP(ARRAY[SMALLINT '10', SMALLINT '20'], ARRAY['foo', 'bar'])" + + ")," + + "DECIMAL '12.34'," + + "CAST('-123456789012345.123456' as DECIMAL(21,6))," + + "DATE '2024-02-29'," + + "'2025-07-01'" + + ")", tableName); + } + @Test public void testOrderBy() {