Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@
<excludes>
<!-- These files are used as test data and test result specifications. -->
<exclude>src/test/resources/org/apache/commons/csv/empty.txt</exclude>
<exclude>src/test/resources/org/apache/commons/csv/CSV-141/csv-141.csv</exclude>
<exclude>src/test/resources/org/apache/commons/csv/csv-167/sample1.csv</exclude>
<exclude>src/test/resources/org/apache/commons/csv/CSV-198/optd_por_public.csv</exclude>
<exclude>src/test/resources/org/apache/commons/csv/CSV-213/999751170.patch.csv</exclude>
Expand Down
80 changes: 73 additions & 7 deletions src/main/java/org/apache/commons/csv/CSVFormat.java
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,12 @@ public static Builder create(final CSVFormat csvFormat) {
return new Builder(csvFormat);
}

private boolean allowEofWithoutClosingQuote;

private boolean allowMissingColumnNames;

private boolean allowTrailingText;

private boolean autoFlush;

private Character commentMarker;
Expand Down Expand Up @@ -264,6 +268,8 @@ private Builder(final CSVFormat csvFormat) {
this.autoFlush = csvFormat.autoFlush;
this.quotedNullString = csvFormat.quotedNullString;
this.duplicateHeaderMode = csvFormat.duplicateHeaderMode;
this.allowTrailingText = csvFormat.allowTrailingText;
this.allowEofWithoutClosingQuote = csvFormat.allowEofWithoutClosingQuote;
}

/**
Expand All @@ -288,6 +294,19 @@ public Builder setAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNa
return this;
}

/**
* Sets whether the last field on the last line, if quoted, can have no closing quote when the file ends, {@code true} if this is ok,
* {@code false} if {@link IOException} should be thrown.
*
* @param allowEofWithoutClosingQuote whether to allow the last field on the last line to have a missing closing quote when the file ends,
* {@code true} if so, or {@code false} to cause an {@link IOException} to be thrown.
* @since 1.10.0
*/
public Builder setAllowEofWithoutClosingQuote(final boolean allowEofWithoutClosingQuote) {
this.allowEofWithoutClosingQuote = allowEofWithoutClosingQuote;
return this;
}

/**
* Sets the parser missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to cause an
* {@link IllegalArgumentException} to be thrown.
Expand All @@ -301,6 +320,20 @@ public Builder setAllowMissingColumnNames(final boolean allowMissingColumnNames)
return this;
}

/**
* Sets whether to allow trailing text in a quoted field, after the closing quote.
*
* @param allowTrailingText the trailing text behavior, {@code true} to append that text to the field contents, {@code false} to throw
* an {@link IOException}.
*
* @return This instance.
* @since 1.10.0
*/
public Builder setAllowTrailingText(final boolean allowTrailingText) {
this.allowTrailingText = allowTrailingText;
return this;
}

/**
* Sets whether to flush on close.
*
Expand Down Expand Up @@ -810,7 +843,7 @@ public CSVFormat getFormat() {
* @see Predefined#Default
*/
public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, null, null, null, false, false, false,
false, false, false, DuplicateHeaderMode.ALLOW_ALL);
false, false, false, DuplicateHeaderMode.ALLOW_ALL, false, false);

/**
* Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary
Expand All @@ -834,6 +867,8 @@ public CSVFormat getFormat() {
* <li>{@code setIgnoreEmptyLines(false)}</li>
* <li>{@code setAllowMissingColumnNames(true)}</li>
* <li>{@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li>
* <li>{@code setAllowTrailingText(true)}</li>
* <li>{@code setAllowEofWithoutClosingQuote(true)}</li>
* </ul>
* <p>
* Note: This is currently like {@link #RFC4180} plus {@link Builder#setAllowMissingColumnNames(boolean) Builder#setAllowMissingColumnNames(true)} and
Expand All @@ -846,6 +881,8 @@ public CSVFormat getFormat() {
public static final CSVFormat EXCEL = DEFAULT.builder()
.setIgnoreEmptyLines(false)
.setAllowMissingColumnNames(true)
.setAllowTrailingText(true)
.setAllowEofWithoutClosingQuote(true)
.build();
// @formatter:on

Expand Down Expand Up @@ -1268,7 +1305,7 @@ private static boolean isTrimChar(final CharSequence charSequence, final int pos
*/
public static CSVFormat newFormat(final char delimiter) {
return new CSVFormat(String.valueOf(delimiter), null, null, null, null, false, false, null, null, null, null, false, false, false, false, false, false,
DuplicateHeaderMode.ALLOW_ALL);
DuplicateHeaderMode.ALLOW_ALL, false, false);
}

static String[] toStringArray(final Object[] values) {
Expand Down Expand Up @@ -1310,8 +1347,12 @@ public static CSVFormat valueOf(final String format) {

private final DuplicateHeaderMode duplicateHeaderMode;

private final boolean allowEofWithoutClosingQuote;

private final boolean allowMissingColumnNames;

private final boolean allowTrailingText;

private final boolean autoFlush;

private final Character commentMarker; // null if commenting is disabled
Expand Down Expand Up @@ -1366,6 +1407,8 @@ private CSVFormat(final Builder builder) {
this.autoFlush = builder.autoFlush;
this.quotedNullString = builder.quotedNullString;
this.duplicateHeaderMode = builder.duplicateHeaderMode;
this.allowTrailingText = builder.allowTrailingText;
this.allowEofWithoutClosingQuote = builder.allowEofWithoutClosingQuote;
validate();
}

Expand Down Expand Up @@ -1396,7 +1439,7 @@ private CSVFormat(final String delimiter, final Character quoteChar, final Quote
final boolean ignoreSurroundingSpaces, final boolean ignoreEmptyLines, final String recordSeparator, final String nullString,
final Object[] headerComments, final String[] header, final boolean skipHeaderRecord, final boolean allowMissingColumnNames,
final boolean ignoreHeaderCase, final boolean trim, final boolean trailingDelimiter, final boolean autoFlush,
final DuplicateHeaderMode duplicateHeaderMode) {
final DuplicateHeaderMode duplicateHeaderMode, final boolean allowTrailingText, final boolean allowEofWithoutClosingQuote) {
this.delimiter = delimiter;
this.quoteCharacter = quoteChar;
this.quoteMode = quoteMode;
Expand All @@ -1416,6 +1459,8 @@ private CSVFormat(final String delimiter, final Character quoteChar, final Quote
this.autoFlush = autoFlush;
this.quotedNullString = quoteCharacter + nullString + quoteCharacter;
this.duplicateHeaderMode = duplicateHeaderMode;
this.allowTrailingText = allowTrailingText;
this.allowEofWithoutClosingQuote = allowEofWithoutClosingQuote;
validate();
}

Expand Down Expand Up @@ -1469,7 +1514,8 @@ public boolean equals(final Object obj) {
ignoreHeaderCase == other.ignoreHeaderCase && ignoreSurroundingSpaces == other.ignoreSurroundingSpaces &&
Objects.equals(nullString, other.nullString) && Objects.equals(quoteCharacter, other.quoteCharacter) && quoteMode == other.quoteMode &&
Objects.equals(quotedNullString, other.quotedNullString) && Objects.equals(recordSeparator, other.recordSeparator) &&
skipHeaderRecord == other.skipHeaderRecord && trailingDelimiter == other.trailingDelimiter && trim == other.trim;
skipHeaderRecord == other.skipHeaderRecord && trailingDelimiter == other.trailingDelimiter && trim == other.trim &&
allowTrailingText == other.allowTrailingText && allowEofWithoutClosingQuote == other.allowEofWithoutClosingQuote;
}

/**
Expand Down Expand Up @@ -1503,6 +1549,16 @@ public boolean getAllowDuplicateHeaderNames() {
return duplicateHeaderMode == DuplicateHeaderMode.ALLOW_ALL;
}

/**
* Gets whether the file can end before the last field on the last line, if quoted, has a closing quote.
*
* @return {@code true} if so, {@code false} to throw an {@link IOException}.
* @since 1.10.0
*/
public boolean getAllowEofWithoutClosingQuote() {
return allowEofWithoutClosingQuote;
}

/**
* Gets whether missing column names are allowed when parsing the header line.
*
Expand All @@ -1512,6 +1568,16 @@ public boolean getAllowMissingColumnNames() {
return allowMissingColumnNames;
}

/**
* Gets whether quoted fields allow trailing text after the closing quote.
*
* @return {@code true} if allowed, {@code false} to throw an {@link IOException}.
* @since 1.10.0
*/
public boolean getAllowTrailingText() {
return allowTrailingText;
}

/**
* Gets whether to flush on close.
*
Expand Down Expand Up @@ -1692,9 +1758,9 @@ public int hashCode() {
int result = 1;
result = prime * result + Arrays.hashCode(headers);
result = prime * result + Arrays.hashCode(headerComments);
return prime * result + Objects.hash(duplicateHeaderMode, allowMissingColumnNames, autoFlush, commentMarker, delimiter, escapeCharacter,
ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, nullString, quoteCharacter, quoteMode, quotedNullString, recordSeparator,
skipHeaderRecord, trailingDelimiter, trim);
return prime * result + Objects.hash(duplicateHeaderMode, allowEofWithoutClosingQuote, allowMissingColumnNames, allowTrailingText,
autoFlush, commentMarker, delimiter, escapeCharacter, ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces,
nullString, quoteCharacter, quoteMode, quotedNullString, recordSeparator, skipHeaderRecord, trailingDelimiter, trim);
}

/**
Expand Down
28 changes: 21 additions & 7 deletions src/main/java/org/apache/commons/csv/Lexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ final class Lexer implements Closeable {

private final boolean ignoreSurroundingSpaces;
private final boolean ignoreEmptyLines;
private final boolean allowTrailingText;
private final boolean allowEofWithoutClosingQuote;

/** The input stream */
private final ExtendedBufferedReader reader;
Expand All @@ -72,6 +74,8 @@ final class Lexer implements Closeable {
this.commentStart = mapNullToDisabled(format.getCommentMarker());
this.ignoreSurroundingSpaces = format.getIgnoreSurroundingSpaces();
this.ignoreEmptyLines = format.getIgnoreEmptyLines();
this.allowTrailingText = format.getAllowTrailingText();
this.allowEofWithoutClosingQuote = format.getAllowEofWithoutClosingQuote();
this.delimiterBuf = new char[delimiter.length - 1];
this.escapeDelimiterBuf = new char[2 * delimiter.length - 1];
}
Expand Down Expand Up @@ -364,17 +368,27 @@ private Token parseEncapsulatedToken(final Token token) throws IOException {
token.type = EORECORD;
return token;
}
if (!Character.isWhitespace((char)c)) {
// error invalid char between token and next delimiter
throw new IOException("(line " + getCurrentLineNumber() +
") invalid char between encapsulated token and delimiter");
if (allowTrailingText) {
token.content.append((char) c);
} else {
if (!Character.isWhitespace((char)c)) {
// error invalid char between token and next delimiter
throw new IOException("(line " + getCurrentLineNumber() +
") invalid char between encapsulated token and delimiter");
}
}
}
}
} else if (isEndOfFile(c)) {
// error condition (end of file before end of token)
throw new IOException("(startline " + startLineNumber +
") EOF reached before encapsulated token finished");
if (allowEofWithoutClosingQuote) {
token.type = EOF;
token.isReady = true; // There is data at EOF
return token;
} else {
// error condition (end of file before end of token)
throw new IOException("(startline " + startLineNumber +
") EOF reached before encapsulated token finished");
}
} else {
// consume character
token.content.append((char) c);
Expand Down
17 changes: 6 additions & 11 deletions src/test/java/org/apache/commons/csv/CSVParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,6 @@ public void testCSV141CSVFormat_POSTGRESQL_CSV() throws Exception {
}

@Test
@Disabled("PR 295 does not work")
public void testCSV141Excel() throws Exception {
testCSV141Ok(CSVFormat.EXCEL);
}
Expand Down Expand Up @@ -358,16 +357,12 @@ record = parser.nextRecord();
record = parser.nextRecord();
assertEquals("1414770318327", record.get(0));
assertEquals("android.widget.EditText", record.get(1));
assertEquals("pass sem1", record.get(2));
assertEquals(3, record.size());
// row 4
record = parser.nextRecord();
assertEquals("1414770318628", record.get(0));
assertEquals("android.widget.EditText", record.get(1));
assertEquals("pass sem1 _84*|*", record.get(2));
assertEquals("0", record.get(3));
assertEquals("pass sem1", record.get(4));
assertEquals(5, record.size());
assertEquals("pass sem1\n1414770318628\"", record.get(2));
assertEquals("android.widget.EditText", record.get(3));
assertEquals("pass sem1 _84*|*", record.get(4));
assertEquals("0", record.get(5));
assertEquals("pass sem1\n", record.get(6));
assertEquals(7, record.size());
}
}

Expand Down
26 changes: 26 additions & 0 deletions src/test/java/org/apache/commons/csv/LexerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -431,4 +431,30 @@ public void testTrimTrailingSpacesZeroLength() throws Exception {
lexer.trimTrailingSpaces(buffer);
assertThat(lexer.nextToken(new Token()), matches(EOF, ""));
}

@Test
public void testTrailingTextAfterQuote() throws Exception {
final String code = "\"a\" b,\"a\" \" b,\"a\" b \"\"";
try (final Lexer parser = createLexer(code, CSVFormat.Builder.create().setAllowTrailingText(true).build())) {
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a b"));
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a \" b"));
assertThat(parser.nextToken(new Token()), matches(EOF, "a b \"\""));
}
try (final Lexer parser = createLexer(code, CSVFormat.Builder.create().setAllowTrailingText(false).build())) {
assertThrows(IOException.class, () -> parser.nextToken(new Token()));
}
}

@Test
public void testEOFWithoutClosingQuote() throws Exception {
final String code = "a,\"b";
try (final Lexer parser = createLexer(code, CSVFormat.Builder.create().setAllowEofWithoutClosingQuote(true).build())) {
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
assertThat(parser.nextToken(new Token()), matches(EOF, "b"));
}
try (final Lexer parser = createLexer(code, CSVFormat.Builder.create().setAllowEofWithoutClosingQuote(false).build())) {
assertThat(parser.nextToken(new Token()), matches(TOKEN, "a"));
assertThrows(IOException.class, () -> parser.nextToken(new Token()));
}
}
}