Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,11 @@ public CsvDeserializer(List<Column> columns, char separatorChar, char quoteChar,
checkArgument(separatorChar != '\0', "Separator cannot be the null character (ASCII 0)");
checkArgument(separatorChar != quoteChar, "Separator and quote character cannot be the same");
checkArgument(separatorChar != escapeChar, "Separator and escape character cannot be the same");
checkArgument(quoteChar != escapeChar, "Quote and escape character cannot be the same");

// Quote and escape character can be the same when both are the null character (quoting and escaping are disabled)
if (quoteChar != '\0' || escapeChar != '\0') {
checkArgument(quoteChar != escapeChar, "Quote and escape character cannot be the same");
}
this.separatorChar = separatorChar;
this.quoteChar = quoteChar;
this.escapeChar = escapeChar;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,10 @@ public void testCsv()
assertTrinoHiveByteForByte(true, Arrays.asList("f**", "b*r", "b*z"), Optional.of('\t'), Optional.of('*'), Optional.of('#'));
assertTrinoHiveByteForByte(false, Arrays.asList("f**", "b*r", "b*z"), Optional.of('\t'), Optional.of('*'), Optional.of('\0'));

// If both the quote character and escape character are `\0` then quoting and escaping is simply disabled, even if this would cause output that does not round trip
assertTrinoHiveByteForByte(true, Arrays.asList("foo", "bar", "baz"), Optional.of('\t'), Optional.of('\0'), Optional.of('\0'));
assertTrinoHiveByteForByte(false, Arrays.asList("f\t\t", "\tbar\t", "baz"), Optional.of('\t'), Optional.of('\0'), Optional.of('\0'));

// These cases don't round trip, because Hive uses different default escape characters for serialization and deserialization.
// For serialization the pipe character is escaped with a quote char, but for deserialization escape character is the backslash character
assertTrinoHiveByteForByte(false, Arrays.asList("|", "a", "b"), Optional.empty(), Optional.of('|'), Optional.empty());
Expand Down