Skip to content

Commit 4fdfc59

Browse files
authored
CSV-264: Added DuplicateHeaderMode for flexibility with header strictness. (#114)
* csv-264: added duplicateheadermode for flexibility with header strictness * fix: use assertthrows and update docs
1 parent caa59a3 commit 4fdfc59

File tree

6 files changed

+200
-24
lines changed

6 files changed

+200
-24
lines changed

src/main/java/org/apache/commons/csv/CSVFormat.java

Lines changed: 48 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -188,8 +188,6 @@ public static Builder create(final CSVFormat csvFormat) {
188188
return new Builder(csvFormat);
189189
}
190190

191-
private boolean allowDuplicateHeaderNames;
192-
193191
private boolean allowMissingColumnNames;
194192

195193
private boolean autoFlush;
@@ -198,6 +196,8 @@ public static Builder create(final CSVFormat csvFormat) {
198196

199197
private String delimiter;
200198

199+
private DuplicateHeaderMode duplicateHeaderMode;
200+
201201
private Character escapeCharacter;
202202

203203
private String[] headerComments;
@@ -245,7 +245,7 @@ private Builder(final CSVFormat csvFormat) {
245245
this.trim = csvFormat.trim;
246246
this.autoFlush = csvFormat.autoFlush;
247247
this.quotedNullString = csvFormat.quotedNullString;
248-
this.allowDuplicateHeaderNames = csvFormat.allowDuplicateHeaderNames;
248+
this.duplicateHeaderMode = csvFormat.duplicateHeaderMode;
249249
}
250250

251251
/**
@@ -262,12 +262,26 @@ public CSVFormat build() {
262262
*
263263
* @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow.
264264
* @return This instance.
265+
* @deprecated Use {@link #setDuplicateHeaderMode(DuplicateHeaderMode)}.
265266
*/
267+
@Deprecated
266268
public Builder setAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) {
267-
this.allowDuplicateHeaderNames = allowDuplicateHeaderNames;
269+
final DuplicateHeaderMode mode = allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY;
270+
setDuplicateHeaderMode(mode);
268271
return this;
269272
}
270273

274+
/**
275+
* Sets the duplicate header names behavior.
276+
*
277+
* @param duplicateHeaderMode the duplicate header names behavior
278+
* @return This instance.
279+
*/
280+
public Builder setDuplicateHeaderMode(final DuplicateHeaderMode duplicateHeaderMode) {
281+
this.duplicateHeaderMode = duplicateHeaderMode;
282+
return this;
283+
}
284+
271285
/**
272286
* Sets the missing column names behavior, {@code true} to allow missing column names in the header line, {@code false} to cause an
273287
* {@link IllegalArgumentException} to be thrown.
@@ -760,7 +774,8 @@ public CSVFormat getFormat() {
760774
}
761775

762776
/**
763-
* Standard Comma Separated Value format, as for {@link #RFC4180} but allowing empty lines.
777+
* Standard Comma Separated Value format, as for {@link #RFC4180} but allowing
778+
* empty lines.
764779
*
765780
* <p>
766781
* The {@link Builder} settings are:
@@ -770,13 +785,13 @@ public CSVFormat getFormat() {
770785
* <li>{@code setQuote('"')}</li>
771786
* <li>{@code setRecordSeparator("\r\n")}</li>
772787
* <li>{@code setIgnoreEmptyLines(true)}</li>
773-
* <li>{@code setAllowDuplicateHeaderNames(true)}</li>
788+
* <li>{@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li>
774789
* </ul>
775790
*
776791
* @see Predefined#Default
777792
*/
778793
public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, null, null, null, false, false, false,
779-
false, false, false, true);
794+
false, false, false, DuplicateHeaderMode.ALLOW_ALL);
780795

781796
/**
782797
* Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is locale dependent, it might be necessary
@@ -799,7 +814,7 @@ public CSVFormat getFormat() {
799814
* <li>{@code setRecordSeparator("\r\n")}</li>
800815
* <li>{@code setIgnoreEmptyLines(false)}</li>
801816
* <li>{@code setAllowMissingColumnNames(true)}</li>
802-
* <li>{@code setAllowDuplicateHeaderNames(true)}</li>
817+
* <li>{@code setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL)}</li>
803818
* </ul>
804819
* <p>
805820
* Note: This is currently like {@link #RFC4180} plus {@link Builder#setAllowMissingColumnNames(boolean) Builder#setAllowMissingColumnNames(true)} and
@@ -1220,7 +1235,7 @@ private static boolean isLineBreak(final Character c) {
12201235
*/
12211236
public static CSVFormat newFormat(final char delimiter) {
12221237
return new CSVFormat(String.valueOf(delimiter), null, null, null, null, false, false, null, null, null, null, false, false, false, false, false, false,
1223-
true);
1238+
DuplicateHeaderMode.ALLOW_ALL);
12241239
}
12251240

12261241
static String[] toStringArray(final Object[] values) {
@@ -1262,7 +1277,7 @@ public static CSVFormat valueOf(final String format) {
12621277
return CSVFormat.Predefined.valueOf(format).getFormat();
12631278
}
12641279

1265-
private final boolean allowDuplicateHeaderNames;
1280+
private final DuplicateHeaderMode duplicateHeaderMode;
12661281

12671282
private final boolean allowMissingColumnNames;
12681283

@@ -1319,7 +1334,7 @@ private CSVFormat(final Builder builder) {
13191334
this.trim = builder.trim;
13201335
this.autoFlush = builder.autoFlush;
13211336
this.quotedNullString = builder.quotedNullString;
1322-
this.allowDuplicateHeaderNames = builder.allowDuplicateHeaderNames;
1337+
this.duplicateHeaderMode = builder.duplicateHeaderMode;
13231338
validate();
13241339
}
13251340

@@ -1343,14 +1358,14 @@ private CSVFormat(final Builder builder) {
13431358
* @param trim TODO Doc me.
13441359
* @param trailingDelimiter TODO Doc me.
13451360
* @param autoFlush TODO Doc me.
1346-
* @param allowDuplicateHeaderNames TODO Doc me.
1361+
* @param duplicateHeaderMode the behavior when handling duplicate headers
13471362
* @throws IllegalArgumentException if the delimiter is a line break character.
13481363
*/
13491364
private CSVFormat(final String delimiter, final Character quoteChar, final QuoteMode quoteMode, final Character commentStart, final Character escape,
13501365
final boolean ignoreSurroundingSpaces, final boolean ignoreEmptyLines, final String recordSeparator, final String nullString,
13511366
final Object[] headerComments, final String[] header, final boolean skipHeaderRecord, final boolean allowMissingColumnNames,
13521367
final boolean ignoreHeaderCase, final boolean trim, final boolean trailingDelimiter, final boolean autoFlush,
1353-
final boolean allowDuplicateHeaderNames) {
1368+
final DuplicateHeaderMode duplicateHeaderMode) {
13541369
this.delimiter = delimiter;
13551370
this.quoteCharacter = quoteChar;
13561371
this.quoteMode = quoteMode;
@@ -1369,7 +1384,7 @@ private CSVFormat(final String delimiter, final Character quoteChar, final Quote
13691384
this.trim = trim;
13701385
this.autoFlush = autoFlush;
13711386
this.quotedNullString = quoteCharacter + nullString + quoteCharacter;
1372-
this.allowDuplicateHeaderNames = allowDuplicateHeaderNames;
1387+
this.duplicateHeaderMode = duplicateHeaderMode;
13731388
validate();
13741389
}
13751390

@@ -1416,7 +1431,7 @@ public boolean equals(final Object obj) {
14161431
return false;
14171432
}
14181433
final CSVFormat other = (CSVFormat) obj;
1419-
return allowDuplicateHeaderNames == other.allowDuplicateHeaderNames && allowMissingColumnNames == other.allowMissingColumnNames &&
1434+
return duplicateHeaderMode == other.duplicateHeaderMode && allowMissingColumnNames == other.allowMissingColumnNames &&
14201435
autoFlush == other.autoFlush && Objects.equals(commentMarker, other.commentMarker) && Objects.equals(delimiter, other.delimiter) &&
14211436
Objects.equals(escapeCharacter, other.escapeCharacter) && Arrays.equals(header, other.header) &&
14221437
Arrays.equals(headerComments, other.headerComments) && ignoreEmptyLines == other.ignoreEmptyLines &&
@@ -1450,9 +1465,21 @@ public String format(final Object... values) {
14501465
*
14511466
* @return whether duplicate header names are allowed
14521467
* @since 1.7
1468+
* @deprecated Use {@link #getDuplicateHeaderMode()}.
14531469
*/
1470+
@Deprecated
14541471
public boolean getAllowDuplicateHeaderNames() {
1455-
return allowDuplicateHeaderNames;
1472+
return duplicateHeaderMode == DuplicateHeaderMode.ALLOW_ALL;
1473+
}
1474+
1475+
/**
1476+
* Gets how duplicate headers are handled.
1477+
*
1478+
* @return if duplicate header values are allowed, allowed conditionally, or disallowed.
1479+
* @since 1.9.0
1480+
*/
1481+
public DuplicateHeaderMode getDuplicateHeaderMode() {
1482+
return duplicateHeaderMode;
14561483
}
14571484

14581485
/**
@@ -1633,7 +1660,7 @@ public int hashCode() {
16331660
int result = 1;
16341661
result = prime * result + Arrays.hashCode(header);
16351662
result = prime * result + Arrays.hashCode(headerComments);
1636-
return prime * result + Objects.hash(allowDuplicateHeaderNames, allowMissingColumnNames, autoFlush, commentMarker, delimiter, escapeCharacter,
1663+
return prime * result + Objects.hash(duplicateHeaderMode, allowMissingColumnNames, autoFlush, commentMarker, delimiter, escapeCharacter,
16371664
ignoreEmptyLines, ignoreHeaderCase, ignoreSurroundingSpaces, nullString, quoteCharacter, quoteMode, quotedNullString, recordSeparator,
16381665
skipHeaderRecord, trailingDelimiter, trim);
16391666
}
@@ -2235,7 +2262,7 @@ private void validate() throws IllegalArgumentException {
22352262
}
22362263

22372264
// validate header
2238-
if (header != null && !allowDuplicateHeaderNames) {
2265+
if (header != null && duplicateHeaderMode != DuplicateHeaderMode.ALLOW_ALL) {
22392266
final Set<String> dupCheck = new HashSet<>();
22402267
for (final String hdr : header) {
22412268
if (!dupCheck.add(hdr)) {
@@ -2254,7 +2281,7 @@ private void validate() throws IllegalArgumentException {
22542281
*/
22552282
@Deprecated
22562283
public CSVFormat withAllowDuplicateHeaderNames() {
2257-
return builder().setAllowDuplicateHeaderNames(true).build();
2284+
return builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_ALL).build();
22582285
}
22592286

22602287
/**
@@ -2267,7 +2294,8 @@ public CSVFormat withAllowDuplicateHeaderNames() {
22672294
*/
22682295
@Deprecated
22692296
public CSVFormat withAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) {
2270-
return builder().setAllowDuplicateHeaderNames(allowDuplicateHeaderNames).build();
2297+
final DuplicateHeaderMode mode = allowDuplicateHeaderNames ? DuplicateHeaderMode.ALLOW_ALL : DuplicateHeaderMode.ALLOW_EMPTY;
2298+
return builder().setDuplicateHeaderMode(mode).build();
22712299
}
22722300

22732301
/**

src/main/java/org/apache/commons/csv/CSVParser.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -497,12 +497,16 @@ private Headers createHeaders() throws IOException {
497497
throw new IllegalArgumentException(
498498
"A header name is missing in " + Arrays.toString(headerRecord));
499499
}
500-
// Note: This will always allow a duplicate header if the header is empty
500+
501501
final boolean containsHeader = header != null && hdrMap.containsKey(header);
502-
if (containsHeader && !emptyHeader && !this.format.getAllowDuplicateHeaderNames()) {
502+
final DuplicateHeaderMode headerMode = this.format.getDuplicateHeaderMode();
503+
final boolean duplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_ALL;
504+
final boolean emptyDuplicatesAllowed = headerMode == DuplicateHeaderMode.ALLOW_EMPTY;
505+
506+
if (containsHeader && !duplicatesAllowed && !(emptyHeader && emptyDuplicatesAllowed)) {
503507
throw new IllegalArgumentException(
504508
String.format(
505-
"The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.withAllowDuplicateHeaderNames().",
509+
"The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.Builder.setDuplicateHeaderMode().",
506510
header, Arrays.toString(headerRecord)));
507511
}
508512
if (header != null) {
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.commons.csv;
19+
20+
/**
21+
* Determines how duplicate header fields should be handled
22+
* if {@link CSVFormat#withHeader(String...)} is not null.
23+
*
24+
* @since 1.9.0
25+
*/
26+
public enum DuplicateHeaderMode {
27+
28+
/**
29+
* Allows all duplicate headers.
30+
*/
31+
ALLOW_ALL,
32+
33+
/**
34+
* Allows duplicate headers only if they're empty strings or null.
35+
*/
36+
ALLOW_EMPTY,
37+
38+
/**
39+
* Disallows duplicate headers entirely.
40+
*/
41+
DISALLOW
42+
}

src/site/resources/checkstyle/checkstyle-suppressions.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,5 @@
1919
"-//Checkstyle//DTD SuppressionFilter Configuration 1.2//EN"
2020
"https://checkstyle.org/dtds/suppressions_1_2.dtd">
2121
<suppressions>
22-
<suppress checks="LineLength" files="[\\/]CSVParser\.java$" lines="511"/>
22+
<suppress checks="LineLength" files="[\\/]CSVParser\.java$" lines="515"/>
2323
</suppressions>

src/test/java/org/apache/commons/csv/CSVFormatTest.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,10 @@ public void testEqualsHash() throws Exception {
260260
final Object a = method.invoke(CSVFormat.DEFAULT, QuoteMode.MINIMAL);
261261
final Object b = method.invoke(CSVFormat.DEFAULT, QuoteMode.ALL);
262262
assertNotEquals(name, type, a, b);
263+
} else if ("org.apache.commons.csv.DuplicateHeaderMode".equals(type)) {
264+
final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] {DuplicateHeaderMode.ALLOW_ALL});
265+
final Object b = method.invoke(CSVFormat.DEFAULT, new Object[] {DuplicateHeaderMode.DISALLOW});
266+
assertNotEquals(name, type, a, b);
263267
} else if ("java.lang.Object[]".equals(type)){
264268
final Object a = method.invoke(CSVFormat.DEFAULT, new Object[] {new Object[] {null, null}});
265269
final Object b = method.invoke(CSVFormat.DEFAULT, new Object[] {new Object[] {new Object(), new Object()}});
@@ -1295,6 +1299,15 @@ public void testWithEscape() {
12951299
}
12961300

12971301

1302+
@Test
1303+
public void testWithEmptyDuplicates() {
1304+
final CSVFormat formatWithEmptyDuplicates =
1305+
CSVFormat.DEFAULT.builder().setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_EMPTY).build();
1306+
1307+
assertEquals(DuplicateHeaderMode.ALLOW_EMPTY, formatWithEmptyDuplicates.getDuplicateHeaderMode());
1308+
assertFalse(formatWithEmptyDuplicates.getAllowDuplicateHeaderNames());
1309+
}
1310+
12981311
@Test
12991312
public void testWithEscapeCRThrowsExceptions() {
13001313
assertThrows(IllegalArgumentException.class, () -> CSVFormat.DEFAULT.withEscape(CR));
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.commons.csv.issues;
19+
20+
import org.apache.commons.csv.CSVFormat;
21+
import org.apache.commons.csv.DuplicateHeaderMode;
22+
import org.junit.jupiter.api.Test;
23+
24+
import static org.junit.jupiter.api.Assertions.assertThrows;
25+
26+
import java.io.IOException;
27+
import java.io.StringReader;
28+
29+
/**
30+
* When {@link CSVFormat#withHeader(String...)} is not null; duplicate headers
31+
* with empty strings should not be allowed.
32+
*
33+
* @see <a href="https://issues.apache.org/jira/browse/CSV-264">Jira Ticker</a>
34+
*/
35+
public class JiraCsv264Test {
36+
37+
private static final String CSV_STRING = "\"\",\"B\",\"\"\n" +
38+
"\"1\",\"2\",\"3\"\n" +
39+
"\"4\",\"5\",\"6\"";
40+
41+
/**
42+
* A CSV file with a random gap in the middle.
43+
*/
44+
private static final String CSV_STRING_GAP = "\"A\",\"B\",\"\",\"\",\"E\"\n" +
45+
"\"1\",\"2\",\"\",\"\",\"5\"\n" +
46+
"\"6\",\"7\",\"\",\"\",\"10\"";
47+
48+
@Test
49+
public void testJiraCsv264() throws IOException {
50+
final CSVFormat csvFormat = CSVFormat.DEFAULT
51+
.builder()
52+
.setHeader()
53+
.setDuplicateHeaderMode(DuplicateHeaderMode.DISALLOW)
54+
.setAllowMissingColumnNames(true)
55+
.build();
56+
57+
try (StringReader reader = new StringReader(CSV_STRING)) {
58+
assertThrows(IllegalArgumentException.class, () -> csvFormat.parse(reader));
59+
}
60+
}
61+
62+
@Test
63+
public void testJiraCsv264WithGapAllowEmpty() throws IOException {
64+
final CSVFormat csvFormat = CSVFormat.DEFAULT
65+
.builder()
66+
.setHeader()
67+
.setDuplicateHeaderMode(DuplicateHeaderMode.ALLOW_EMPTY)
68+
.setAllowMissingColumnNames(true)
69+
.build();
70+
71+
try (StringReader reader = new StringReader(CSV_STRING_GAP)) {
72+
csvFormat.parse(reader);
73+
}
74+
}
75+
76+
@Test
77+
public void testJiraCsv264WithGapDisallow() throws IOException {
78+
final CSVFormat csvFormat = CSVFormat.DEFAULT
79+
.builder()
80+
.setHeader()
81+
.setDuplicateHeaderMode(DuplicateHeaderMode.DISALLOW)
82+
.setAllowMissingColumnNames(true)
83+
.build();
84+
85+
try (StringReader reader = new StringReader(CSV_STRING_GAP)) {
86+
assertThrows(IllegalArgumentException.class, () -> csvFormat.parse(reader));
87+
}
88+
}
89+
}

0 commit comments

Comments
 (0)