Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 48 additions & 20 deletions src/main/java/org/apache/commons/csv/CSVFormat.java
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,6 @@ public static Builder create(final CSVFormat csvFormat) {
return new Builder(csvFormat);
}

private boolean allowDuplicateHeaderNames;

private boolean allowMissingColumnNames;

private boolean autoFlush;
Expand All @@ -198,6 +196,8 @@ public static Builder create(final CSVFormat csvFormat) {

private String delimiter;

private DuplicateHeaderMode duplicateHeaderMode;

private Character escapeCharacter;

private String[] headerComments;
Expand Down Expand Up @@ -245,7 +245,7 @@ private Builder(final CSVFormat csvFormat) {
this.trim = csvFormat.trim;
this.autoFlush = csvFormat.autoFlush;
this.quotedNullString = csvFormat.quotedNullString;
this.allowDuplicateHeaderNames = csvFormat.allowDuplicateHeaderNames;
this.duplicateHeaderMode = csvFormat.duplicateHeaderMode;
}

/**
Expand All @@ -262,12 +262,26 @@ public CSVFormat build() {
*
* @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow.
* @return This instance.
* @deprecated Use {@link #setDuplicateHeaderMode(DuplicateHeaderMode)}.
*/
@Deprecated
public Builder setAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) {
this.allowDuplicateHeaderNames = allowDuplicateHeaderNames;
final DuplicateHeaderMode mode = allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY;
setDuplicateHeaderMode(mode);
return this;
}

/**
* Sets the duplicate header names behavior.
*
* @param duplicateHeaderMode the duplicate header names behavior
* @return This instance.
*/
public Builder setDuplicateHeaderMode(final DuplicateHeaderMode duplicateHeaderMode) {
this.duplicateHeaderMode = duplicateHeaderMode;
return this;
}

/**
* Sets the missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to cause an
* {@link IllegalArgumentException} to be thrown.
Expand Down Expand Up @@ -760,7 +774,8 @@ public CSVFormat getFormat() {
}

/**
* Standard Comma Separated Value format, as for {@link #RFC4180} but allowing empty lines.
* Standard Comma Separated Value format, as for {@link #RFC4180} but allowing
* empty lines.
*
* <p>
* The {@link Builder} settings are:
Expand All @@ -770,13 +785,13 @@ public CSVFormat getFormat() {
* <li>{@code setQuote('"')}</li>
* <li>{@code setRecordSeparator("\r\n")}</li>
* <li>{@code setIgnoreEmptyLines(true)}</li>
* <li>{@code setAllowDuplicateHeaderNames(true)}</li>
* <li>{@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li>
* </ul>
*
* @see Predefined#Default
*/
public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, null, null, null, false, false, false,
false, false, false, true);
false, false, false, DuplicateHeaderMode.ALLOW_ALL);

/**
* Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary
Expand All @@ -799,7 +814,7 @@ public CSVFormat getFormat() {
* <li>{@code setRecordSeparator("\r\n")}</li>
* <li>{@code setIgnoreEmptyLines(false)}</li>
* <li>{@code setAllowMissingColumnNames(true)}</li>
* <li>{@code setAllowDuplicateHeaderNames(true)}</li>
* <li>{@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li>
* </ul>
* <p>
* Note: This is currently like {@link #RFC4180} plus {@link Builder#setAllowMissingColumnNames(boolean) Builder#setAllowMissingColumnNames(true)} and
Expand Down Expand Up @@ -1220,7 +1235,7 @@ private static boolean isLineBreak(final Character c) {
*/
public static CSVFormat newFormat(final char delimiter) {
return new CSVFormat(String.valueOf(delimiter), null, null, null, null, false, false, null, null, null, null, false, false, false, false, false, false,
true);
DuplicateHeaderMode.ALLOW_ALL);
}

static String[] toStringArray(final Object[] values) {
Expand Down Expand Up @@ -1262,7 +1277,7 @@ public static CSVFormat valueOf(final String format) {
return CSVFormat.Predefined.valueOf(format).getFormat();
}

private final boolean allowDuplicateHeaderNames;
private final DuplicateHeaderMode duplicateHeaderMode;

private final boolean allowMissingColumnNames;

Expand Down Expand Up @@ -1319,7 +1334,7 @@ private CSVFormat(final Builder builder) {
this.trim = builder.trim;
this.autoFlush = builder.autoFlush;
this.quotedNullString = builder.quotedNullString;
this.allowDuplicateHeaderNames = builder.allowDuplicateHeaderNames;
this.duplicateHeaderMode = builder.duplicateHeaderMode;
validate();
}

Expand All @@ -1343,14 +1358,14 @@ private CSVFormat(final Builder builder) {
* @param trim TODO Doc me.
* @param trailingDelimiter TODO Doc me.
* @param autoFlush TODO Doc me.
* @param allowDuplicateHeaderNames TODO Doc me.
* @param duplicateHeaderMode the behavior when handling duplicate headers
* @throws IllegalArgumentException if the delimiter is a line break character.
*/
private CSVFormat(final String delimiter, final Character quoteChar, final QuoteMode quoteMode, final Character commentStart, final Character escape,
final boolean ignoreSurroundingSpaces, final boolean ignoreEmptyLines, final String recordSeparator, final String nullString,
final Object[] headerComments, final String[] header, final boolean skipHeaderRecord, final boolean allowMissingColumnNames,
final boolean ignoreHeaderCase, final boolean trim, final boolean trailingDelimiter, final boolean autoFlush,
final boolean allowDuplicateHeaderNames) {
final DuplicateHeaderMode duplicateHeaderMode) {
this.delimiter = delimiter;
this.quoteCharacter = quoteChar;
this.quoteMode = quoteMode;
Expand All @@ -1369,7 +1384,7 @@ private CSVFormat(final String delimiter, final Character quoteChar, final Quote
this.trim = trim;
this.autoFlush = autoFlush;
this.quotedNullString = quoteCharacter + nullString + quoteCharacter;
this.allowDuplicateHeaderNames = allowDuplicateHeaderNames;
this.duplicateHeaderMode = duplicateHeaderMode;
validate();
}

Expand Down Expand Up @@ -1416,7 +1431,7 @@ public boolean equals(final Object obj) {
return false;
}
final CSVFormat other = (CSVFormat) obj;
return allowDuplicateHeaderNames == other.allowDuplicateHeaderNames && allowMissingColumnNames == other.allowMissingColumnNames &&
return duplicateHeaderMode == other.duplicateHeaderMode && allowMissingColumnNames == other.allowMissingColumnNames &&
autoFlush == other.autoFlush && Objects.equals(commentMarker, other.commentMarker) && Objects.equals(delimiter, other.delimiter) &&
Objects.equals(escapeCharacter, other.escapeCharacter) && Arrays.equals(header, other.header) &&
Arrays.equals(headerComments, other.headerComments) && ignoreEmptyLines == other.ignoreEmptyLines &&
Expand Down Expand Up @@ -1450,9 +1465,21 @@ public String format(final Object... values) {
*
* @return whether duplicate header names are allowed
* @since 1.7
* @deprecated Use {@link #getDuplicateHeaderMode()}.
*/
@Deprecated
public boolean getAllowDuplicateHeaderNames() {
return allowDuplicateHeaderNames;
return duplicateHeaderMode == DuplicateHeaderMode.ALLOW_ALL;
}

/**
* Gets how duplicate headers are handled.
*
* @return if duplicate header values are allowed, allowed conditionally, or disallowed.
* @since 1.9.0
*/
public DuplicateHeaderMode getDuplicateHeaderMode() {
return duplicateHeaderMode;
}

/**
Expand Down Expand Up @@ -1633,7 +1660,7 @@ public int hashCode() {
int result = 1;
result = prime * result + Arrays.hashCode(header);
result = prime * result + Arrays.hashCode(headerComments);
return prime * result + Objects.hash(allowDuplicateHeaderNames, allowMissingColumnNames, autoFlush, commentMarker, delimiter, escapeCharacter,
return prime * result + Objects.hash(duplicateHeaderMode, allowMissingColumnNames, autoFlush, commentMarker, delimiter, escapeCharacter,
ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, nullString, quoteCharacter, quoteMode, quotedNullString, recordSeparator,
skipHeaderRecord, trailingDelimiter, trim);
}
Expand Down Expand Up @@ -2235,7 +2262,7 @@ private void validate() throws IllegalArgumentException {
}

// validate header
if (header != null && !allowDuplicateHeaderNames) {
if (header != null && duplicateHeaderMode != DuplicateHeaderMode.ALLOW_ALL) {
final Set<String> dupCheck = new HashSet<>();
for (final String hdr : header) {
if (!dupCheck.add(hdr)) {
Expand All @@ -2254,7 +2281,7 @@ private void validate() throws IllegalArgumentException {
*/
@Deprecated
public CSVFormat withAllowDuplicateHeaderNames() {
return builder().setAllowDuplicateHeaderNames(true).build();
return builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL).build();
}

/**
Expand All @@ -2267,7 +2294,8 @@ public CSVFormat withAllowDuplicateHeaderNames() {
*/
@Deprecated
public CSVFormat withAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) {
return builder().setAllowDuplicateHeaderNames(allowDuplicateHeaderNames).build();
final DuplicateHeaderMode mode = allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY;
return builder().setDuplicateHeaderMode(mode).build();
}

/**
Expand Down
10 changes: 7 additions & 3 deletions src/main/java/org/apache/commons/csv/CSVParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -497,12 +497,16 @@ private Headers createHeaders() throws IOException {
throw new IllegalArgumentException(
"A header name is missing in " + Arrays.toString(headerRecord));
}
// Note: This will always allow a duplicate header if the header is empty

final boolean containsHeader = header != null && hdrMap.containsKey(header);
if (containsHeader && !emptyHeader && !this.format.getAllowDuplicateHeaderNames()) {
final DuplicateHeaderMode headerMode = this.format.getDuplicateHeaderMode();
final boolean duplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_ALL;
final boolean emptyDuplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_EMPTY;

if (containsHeader && !duplicatesAllowed && !(emptyHeader && emptyDuplicatesAllowed)) {
throw new IllegalArgumentException(
String.format(
"The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.withAllowDuplicateHeaderNames().",
"The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().",
header, Arrays.toString(headerRecord)));
}
if (header != null) {
Expand Down
42 changes: 42 additions & 0 deletions src/main/java/org/apache/commons/csv/DuplicateHeaderMode.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.commons.csv;

/**
* Determines how duplicate header fields should be handled
* if {@link CSVFormat#withHeader(String...)} is not null.
*
* @since 1.9.0
*/
public enum DuplicateHeaderMode {

/**
* Allows all duplicate headers.
*/
ALLOW_ALL,

/**
* Allows duplicate headers only if they're empty strings or null.
*/
ALLOW_EMPTY,

/**
* Disallows duplicate headers entirely.
*/
DISALLOW
}
2 changes: 1 addition & 1 deletion src/site/resources/checkstyle/checkstyle-suppressions.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@
"-//Checkstyle//DTD SuppressionFilter Configuration 1.2//EN"
"https://checkstyle.org/dtds/suppressions_1_2.dtd">
<suppressions>
<suppress checks="LineLength" files="[\\/]CSVParser\.java$" lines="511"/>
<suppress checks="LineLength" files="[\\/]CSVParser\.java$" lines="515"/>
</suppressions>
13 changes: 13 additions & 0 deletions src/test/java/org/apache/commons/csv/CSVFormatTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,10 @@ public void testEqualsHash() throws Exception {
final Object a = method.invoke(CSVFormat.DEFAULT, QuoteMode.MINIMAL);
final Object b = method.invoke(CSVFormat.DEFAULT, QuoteMode.ALL);
assertNotEquals(name, type, a, b);
} else if ("org.apache.commons.csv.DuplicateHeaderMode".equals(type)) {
final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] {DuplicateHeaderMode.ALLOW_ALL});
final Object b = method.invoke(CSVFormat.DEFAULT, new Object[] {DuplicateHeaderMode.DISALLOW});
assertNotEquals(name, type, a, b);
} else if ("java.lang.Object[]".equals(type)){
final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] {new Object[] {null, null}});
final Object b = method.invoke(CSVFormat.DEFAULT, new Object[] {new Object[] {new Object(), new Object()}});
Expand Down Expand Up @@ -1295,6 +1299,15 @@ public void testWithEscape() {
}


@Test
public void testWithEmptyDuplicates() {
final CSVFormat formatWithEmptyDuplicates =
CSVFormat.DEFAULT.builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_EMPTY).build();

assertEquals(DuplicateHeaderMode.ALLOW_EMPTY, formatWithEmptyDuplicates.getDuplicateHeaderMode());
assertFalse(formatWithEmptyDuplicates.getAllowDuplicateHeaderNames());
}

@Test
public void testWithEscapeCRThrowsExceptions() {
assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withEscape(CR));
Expand Down
89 changes: 89 additions & 0 deletions src/test/java/org/apache/commons/csv/issues/JiraCsv264Test.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.commons.csv.issues;

import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.DuplicateHeaderMode;
import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.assertThrows;

import java.io.IOException;
import java.io.StringReader;

/**
* When {@link CSVFormat#withHeader(String...)} is not null; duplicate headers
* with empty strings should not be allowed.
*
* @see <a href="https://issues.apache.org/jira/browse/CSV-264">Jira Ticker</a>
*/
public class JiraCsv264Test {

private static final String CSV_STRING = "\"\",\"B\",\"\"\n" +
"\"1\",\"2\",\"3\"\n" +
"\"4\",\"5\",\"6\"";

/**
* A CSV file with a random gap in the middle.
*/
private static final String CSV_STRING_GAP = "\"A\",\"B\",\"\",\"\",\"E\"\n" +
"\"1\",\"2\",\"\",\"\",\"5\"\n" +
"\"6\",\"7\",\"\",\"\",\"10\"";

@Test
public void testJiraCsv264() throws IOException {
final CSVFormat csvFormat = CSVFormat.DEFAULT
.builder()
.setHeader()
.setDuplicateHeaderMode(DuplicateHeaderMode.DISALLOW)
.setAllowMissingColumnNames(true)
.build();

try (StringReader reader = new StringReader(CSV_STRING)) {
assertThrows(IllegalArgumentException.class, () -> csvFormat.parse(reader));
}
}

@Test
public void testJiraCsv264WithGapAllowEmpty() throws IOException {
final CSVFormat csvFormat = CSVFormat.DEFAULT
.builder()
.setHeader()
.setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_EMPTY)
.setAllowMissingColumnNames(true)
.build();

try (StringReader reader = new StringReader(CSV_STRING_GAP)) {
csvFormat.parse(reader);
}
}

@Test
public void testJiraCsv264WithGapDisallow() throws IOException {
final CSVFormat csvFormat = CSVFormat.DEFAULT
.builder()
.setHeader()
.setDuplicateHeaderMode(DuplicateHeaderMode.DISALLOW)
.setAllowMissingColumnNames(true)
.build();

try (StringReader reader = new StringReader(CSV_STRING_GAP)) {
assertThrows(IllegalArgumentException.class, () -> csvFormat.parse(reader));
}
}
}