Skip to content

Commit dd87eeb

Browse files
Support reading from CSV tables with null escape and quote characters
1 parent 59f6866 commit dd87eeb

File tree

2 files changed

+9
-1
lines changed

2 files changed

+9
-1
lines changed

lib/trino-hive-formats/src/main/java/io/trino/hive/formats/line/csv/CsvDeserializer.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,11 @@ public CsvDeserializer(List<Column> columns, char separatorChar, char quoteChar,
6060
checkArgument(separatorChar != '\0', "Separator cannot be the null character (ASCII 0)");
6161
checkArgument(separatorChar != quoteChar, "Separator and quote character cannot be the same");
6262
checkArgument(separatorChar != escapeChar, "Separator and escape character cannot be the same");
63-
checkArgument(quoteChar != escapeChar, "Quote and escape character cannot be the same");
63+
64+
// Quote and escape character can be the same when both are the null character (quoting and escaping are disabled)
65+
if (quoteChar != '\0' || escapeChar != '\0') {
66+
checkArgument(quoteChar != escapeChar, "Quote and escape character cannot be the same");
67+
}
6468
this.separatorChar = separatorChar;
6569
this.quoteChar = quoteChar;
6670
this.escapeChar = escapeChar;

lib/trino-hive-formats/src/test/java/io/trino/hive/formats/line/csv/TestCsvFormat.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,10 @@ public void testCsv()
126126
assertTrinoHiveByteForByte(true, Arrays.asList("f**", "b*r", "b*z"), Optional.of('\t'), Optional.of('*'), Optional.of('#'));
127127
assertTrinoHiveByteForByte(false, Arrays.asList("f**", "b*r", "b*z"), Optional.of('\t'), Optional.of('*'), Optional.of('\0'));
128128

129+
// If both the quote character and escape character are `\0` then quoting and escaping is simply disabled, even if this would cause output that does not round trip
130+
assertTrinoHiveByteForByte(true, Arrays.asList("foo", "bar", "baz"), Optional.of('\t'), Optional.of('\0'), Optional.of('\0'));
131+
assertTrinoHiveByteForByte(false, Arrays.asList("f\t\t", "\tbar\t", "baz"), Optional.of('\t'), Optional.of('\0'), Optional.of('\0'));
132+
129133
// These cases don't round trip, because Hive uses different default escape characters for serialization and deserialization.
130134
// For serialization the pipe character is escaped with a quote char, but for deserialization escape character is the backslash character
131135
assertTrinoHiveByteForByte(false, Arrays.asList("|", "a", "b"), Optional.empty(), Optional.of('|'), Optional.empty());

0 commit comments

Comments
 (0)