diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java
index 9de9c80c94..ac2105fb4d 100644
--- a/src/main/java/org/apache/commons/csv/CSVFormat.java
+++ b/src/main/java/org/apache/commons/csv/CSVFormat.java
@@ -264,7 +264,7 @@ public CSVFormat getFormat() {
* @see Predefined#Default
*/
public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF,
- null, null, null, false, false, false, false, false, false);
+ null, null, null, false, false, false, false, false, false, false);
/**
* Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is
@@ -398,7 +398,7 @@ public CSVFormat getFormat() {
.withQuoteMode(QuoteMode.MINIMAL)
.withSkipHeaderRecord(false);
// @formatter:off
-
+
/**
* Default MongoDB TSV format used by the {@code mongoexport} operation.
*
@@ -434,7 +434,7 @@ public CSVFormat getFormat() {
.withQuoteMode(QuoteMode.MINIMAL)
.withSkipHeaderRecord(false);
// @formatter:off
-
+
/**
* Default MySQL format used by the {@code SELECT INTO OUTFILE} and {@code LOAD DATA INFILE} operations.
*
@@ -670,7 +670,7 @@ private static boolean isLineBreak(final Character c) {
*/
public static CSVFormat newFormat(final char delimiter) {
return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, null, false, false,
- false, false, false, false);
+ false, false, false, false, false);
}
/**
@@ -719,6 +719,8 @@ public static CSVFormat valueOf(final String format) {
private final boolean autoFlush;
+ private final boolean ignoreDuplicateHeaderEntries;
+
/**
* Creates a customized CSV format.
*
@@ -755,6 +757,8 @@ public static CSVFormat valueOf(final String format) {
* @param trailingDelimiter
* TODO
* @param autoFlush
+ * @param ignoreDuplicateHeaderEntries
+ * {@code true} when duplicate header entries should be ignored
* @throws IllegalArgumentException
* if the delimiter is a line break character
*/
@@ -763,7 +767,7 @@ private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMo
final boolean ignoreEmptyLines, final String recordSeparator, final String nullString,
final Object[] headerComments, final String[] header, final boolean skipHeaderRecord,
final boolean allowMissingColumnNames, final boolean ignoreHeaderCase, final boolean trim,
- final boolean trailingDelimiter, final boolean autoFlush) {
+ final boolean trailingDelimiter, final boolean autoFlush, final boolean ignoreDuplicateHeaderEntries) {
this.delimiter = delimiter;
this.quoteCharacter = quoteChar;
this.quoteMode = quoteMode;
@@ -781,6 +785,7 @@ private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMo
this.trailingDelimiter = trailingDelimiter;
this.trim = trim;
this.autoFlush = autoFlush;
+ this.ignoreDuplicateHeaderEntries = ignoreDuplicateHeaderEntries;
validate();
}
@@ -1034,6 +1039,15 @@ public boolean getTrim() {
return trim;
}
+ /**
+ * Returns whether to ignore duplicate header entries.
+ *
+ * @return whether to ignore duplicate header entries.
+ */
+ public boolean getIgnoreDuplicateHeaderEntries() {
+ return ignoreDuplicateHeaderEntries;
+ }
+
@Override
public int hashCode() {
final int prime = 31;
@@ -1542,7 +1556,7 @@ private void validate() throws IllegalArgumentException {
}
// validate header
- if (header != null) {
+ if (header != null && !ignoreDuplicateHeaderEntries) {
final Set dupCheck = new HashSet<>();
for (final String hdr : header) {
if (!dupCheck.add(hdr)) {
@@ -1575,7 +1589,8 @@ public CSVFormat withAllowMissingColumnNames() {
public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNames) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
- skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ ignoreDuplicateHeaderEntries);
}
/**
@@ -1590,7 +1605,8 @@ public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNam
public CSVFormat withAutoFlush(final boolean autoFlush) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
- skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ ignoreDuplicateHeaderEntries);
}
/**
@@ -1625,7 +1641,8 @@ public CSVFormat withCommentMarker(final Character commentMarker) {
}
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
- skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ ignoreDuplicateHeaderEntries);
}
/**
@@ -1643,7 +1660,8 @@ public CSVFormat withDelimiter(final char delimiter) {
}
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
- skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ ignoreDuplicateHeaderEntries);
}
/**
@@ -1674,7 +1692,8 @@ public CSVFormat withEscape(final Character escape) {
}
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escape, ignoreSurroundingSpaces,
ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
- allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
+ allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ ignoreDuplicateHeaderEntries);
}
/**
@@ -1829,7 +1848,8 @@ public CSVFormat withHeader(final ResultSetMetaData metaData) throws SQLExceptio
public CSVFormat withHeader(final String... header) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
- skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ ignoreDuplicateHeaderEntries);
}
/**
@@ -1850,7 +1870,8 @@ public CSVFormat withHeader(final String... header) {
public CSVFormat withHeaderComments(final Object... headerComments) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
- skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ ignoreDuplicateHeaderEntries);
}
/**
@@ -1875,7 +1896,8 @@ public CSVFormat withIgnoreEmptyLines() {
public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
- skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ ignoreDuplicateHeaderEntries);
}
/**
@@ -1901,7 +1923,8 @@ public CSVFormat withIgnoreHeaderCase() {
public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
- skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ ignoreDuplicateHeaderEntries);
}
/**
@@ -1926,7 +1949,8 @@ public CSVFormat withIgnoreSurroundingSpaces() {
public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
- skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ ignoreDuplicateHeaderEntries);
}
/**
@@ -1945,7 +1969,8 @@ public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpac
public CSVFormat withNullString(final String nullString) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
- skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ ignoreDuplicateHeaderEntries);
}
/**
@@ -1976,7 +2001,8 @@ public CSVFormat withQuote(final Character quoteChar) {
}
return new CSVFormat(delimiter, quoteChar, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces,
ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
- allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
+ allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ ignoreDuplicateHeaderEntries);
}
/**
@@ -1990,7 +2016,8 @@ public CSVFormat withQuote(final Character quoteChar) {
public CSVFormat withQuoteMode(final QuoteMode quoteModePolicy) {
return new CSVFormat(delimiter, quoteCharacter, quoteModePolicy, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
- skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ ignoreDuplicateHeaderEntries);
}
/**
@@ -2028,7 +2055,8 @@ public CSVFormat withRecordSeparator(final char recordSeparator) {
public CSVFormat withRecordSeparator(final String recordSeparator) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
- skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ ignoreDuplicateHeaderEntries);
}
/**
@@ -2055,7 +2083,8 @@ public CSVFormat withSkipHeaderRecord() {
public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
- skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ ignoreDuplicateHeaderEntries);
}
/**
@@ -2096,7 +2125,8 @@ public CSVFormat withTrailingDelimiter() {
public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
- skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ ignoreDuplicateHeaderEntries);
}
/**
@@ -2121,6 +2151,39 @@ public CSVFormat withTrim() {
public CSVFormat withTrim(final boolean trim) {
return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
- skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ ignoreDuplicateHeaderEntries);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with the ignore duplicate header entries set to {@code true}.
+ * Have to be called first in the forming chain of the {@code CSVFormat} if there is a possibility of duplicate
+ * header entries, e.g. {@code CSVFormat.DEFAULT.withIgnoreDuplicateHeaderEntries().withFirstRecordAsHeader()}.
+ *
+ * @return A new {@code CSVFormat} that will ignore duplicate header entries.
+ * @see #withIgnoreDuplicateHeaderEntries(boolean)
+ * @since 1.7
+ */
+ public CSVFormat withIgnoreDuplicateHeaderEntries() {
+ return this.withIgnoreDuplicateHeaderEntries(true);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with whether duplicate header entries should be ignored.
+ * Have to be called first in the forming chain of the {@code CSVFormat} if specified as {@code true} and
+ * there is a possibility of duplicate header entries, e.g.
+ * {@code CSVFormat.DEFAULT.withIgnoreDuplicateHeaderEntries().withFirstRecordAsHeader()}.
+ *
+ * @param ignoreDuplicateHeaderEntries
+ * the header entries naming behavior, {@code true} to maintaining duplicate header entries,
+ * {@code false} to leave the requirement for unique header entries.
+ * @return A new {@code CSVFormat} that will ignore duplicate header entries if specified as {@code true}
+ * @since 1.7
+ */
+ public CSVFormat withIgnoreDuplicateHeaderEntries(final boolean ignoreDuplicateHeaderEntries) {
+ return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
+ ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ ignoreDuplicateHeaderEntries);
}
}
diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java
index 6e121e8086..8c18d0045c 100644
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@@ -33,12 +33,15 @@
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
+import java.util.Set;
import java.util.TreeMap;
+import java.util.TreeSet;
/**
* Parses CSV files according to the specified format.
@@ -282,7 +285,7 @@ public static CSVParser parse(final URL url, final Charset charset, final CSVFor
private final CSVFormat format;
/** A mapping of column names to column indices */
- private final Map headerMap;
+ private final Map> headerMap;
private final Lexer lexer;
@@ -414,9 +417,52 @@ public String getFirstEndOfLine() {
* The map keys are column names. The map values are 0-based indices.
*
* @return a copy of the header map that iterates in column order.
+ * @throws IllegalStateException if the format is not supported unique header entries
*/
public Map getHeaderMap() {
- return this.headerMap == null ? null : new LinkedHashMap<>(this.headerMap);
+ if (!this.format.getIgnoreDuplicateHeaderEntries()) {
+ if (this.headerMap != null) {
+ final LinkedHashMap headerMap = new LinkedHashMap<>();
+ for (final Map.Entry> entry : this.headerMap.entrySet()) {
+ headerMap.put(entry.getKey(), entry.getValue().iterator().next());
+ }
+ return headerMap;
+ } else {
+ return null;
+ }
+ } else {
+ throw new IllegalStateException("The current parser format is not supported unique header entries. To "
+ + "support this functionality need to exclude the ignore duplicate header entries from CSV format");
+ }
+ }
+
+ /**
+ * Returns a copy of the header map that complies with the ordering of unique columns.
+ * Each unique column contains the set of one element that indicates the column index.
+ * Duplicate column contains the ordered set of column indices. For example:
+ *
+ * original header -> [A, B, C, B]
+ *
duplicate header map -> {A}:[0], {B}:[1, 3], {C}:[2]
+ *
+ * @return a copy of the header map that complies with the ordering of unique columns.
+ * @throws IllegalStateException if the format is not supported duplicate header entries
+ * @since 1.7
+ */
+ public Map> getDuplicateHeaderMap() {
+ if (this.format.getIgnoreDuplicateHeaderEntries()) {
+ if (this.headerMap != null) {
+ final Map> headerMap = new LinkedHashMap<>();
+ for (final Map.Entry> entry : this.headerMap.entrySet()) {
+ headerMap.put(entry.getKey(), new TreeSet<>(entry.getValue()));
+ }
+ return headerMap;
+ } else {
+ return null;
+ }
+ } else {
+ throw new IllegalStateException("The current parser format is not supported duplicate header entries. "
+ + "To support this functionality set up CSV format with ignore duplicate header entries");
+ }
}
/**
@@ -460,13 +506,13 @@ public List getRecords() throws IOException {
* @return null if the format has no header.
* @throws IOException if there is a problem reading the header or skipping the first record
*/
- private Map initializeHeader() throws IOException {
- Map hdrMap = null;
+ private Map> initializeHeader() throws IOException {
+ Map> hdrMap = null;
final String[] formatHeader = this.format.getHeader();
if (formatHeader != null) {
hdrMap = this.format.getIgnoreHeaderCase() ?
- new TreeMap(String.CASE_INSENSITIVE_ORDER) :
- new LinkedHashMap();
+ new TreeMap>(String.CASE_INSENSITIVE_ORDER) :
+ new LinkedHashMap>();
String[] headerRecord = null;
if (formatHeader.length == 0) {
@@ -488,11 +534,17 @@ private Map initializeHeader() throws IOException {
final String header = headerRecord[i];
final boolean containsHeader = hdrMap.containsKey(header);
final boolean emptyHeader = header == null || header.trim().isEmpty();
- if (containsHeader && (!emptyHeader || !this.format.getAllowMissingColumnNames())) {
+ if (containsHeader && (!emptyHeader || !this.format.getAllowMissingColumnNames())
+ && !this.format.getIgnoreDuplicateHeaderEntries()) {
throw new IllegalArgumentException("The header contains a duplicate name: \"" + header +
"\" in " + Arrays.toString(headerRecord));
}
- hdrMap.put(header, Integer.valueOf(i));
+ final Set headerIndexes = hdrMap.get(header);
+ if (headerIndexes == null) {
+ hdrMap.put(header, new TreeSet<>(Collections.singleton(i)));
+ } else {
+ headerIndexes.add(i);
+ }
}
}
}
diff --git a/src/main/java/org/apache/commons/csv/CSVRecord.java b/src/main/java/org/apache/commons/csv/CSVRecord.java
index 34a3ba2127..c20ac20a41 100644
--- a/src/main/java/org/apache/commons/csv/CSVRecord.java
+++ b/src/main/java/org/apache/commons/csv/CSVRecord.java
@@ -19,11 +19,13 @@
import java.io.Serializable;
import java.util.Arrays;
+import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
+import java.util.Set;
/**
* A CSV record parsed from a CSV file.
@@ -40,7 +42,7 @@ public final class CSVRecord implements Serializable, Iterable {
private final String comment;
/** The column name to index mapping. */
- private final Map mapping;
+ private final Map> mapping;
/** The record number. */
private final long recordNumber;
@@ -48,8 +50,8 @@ public final class CSVRecord implements Serializable, Iterable {
/** The values of the record */
private final String[] values;
- CSVRecord(final String[] values, final Map mapping, final String comment, final long recordNumber,
- final long characterPosition) {
+ CSVRecord(final String[] values, final Map> mapping, final String comment, final long recordNumber,
+ final long characterPosition) {
this.recordNumber = recordNumber;
this.values = values != null ? values : EMPTY_STRING_ARRAY;
this.mapping = mapping;
@@ -89,26 +91,81 @@ public String get(final int i) {
* if no header mapping was provided
* @throws IllegalArgumentException
* if {@code name} is not mapped or if the record is inconsistent
+ * @see #get(String, int)
* @see #isConsistent()
* @see CSVFormat#withNullString(String)
*/
public String get(final String name) {
+ return this.get(name, 0);
+ }
+
+ /**
+ * Returns a value by name and order.
+ *
+ * @param name the name of the (duplicate) column to be retrieved.
+ * @param order the order of duplicate column
+ * @return the column value, maybe null depending on {@link CSVFormat#getNullString()}.
+ * @throws IllegalStateException
+ * if no header mapping was provided
+ * @throws IllegalArgumentException
+ * if {@code name} is not mapped or an order not exists, or if the record is inconsistent
+ * @since 1.7
+ */
+ public String get(final String name, final int order) {
if (mapping == null) {
throw new IllegalStateException(
- "No header mapping was specified, the record values can't be accessed by name");
+ "No header mapping was specified, the record values can't be accessed by name");
+ }
+ Integer index = null;
+ int orderPointer = 0;
+ final Set indexes = mapping.get(name);
+ if (indexes != null) {
+ final Iterator iterator = indexes.iterator();
+ while (iterator.hasNext() && orderPointer <= order) {
+ index = iterator.next();
+ orderPointer++;
+ }
}
- final Integer index = mapping.get(name);
if (index == null) {
throw new IllegalArgumentException(String.format("Mapping for %s not found, expected one of %s", name,
- mapping.keySet()));
+ mapping.keySet()));
+ }
+ if (orderPointer <= order) {
+ throw new IllegalArgumentException(String.format(
+ "Order for duplicate header '%s' is %d but CSVRecord only has %d headers with same name!",
+ name, order, orderPointer));
}
try {
return values[index.intValue()];
} catch (final ArrayIndexOutOfBoundsException e) {
throw new IllegalArgumentException(String.format(
- "Index for header '%s' is %d but CSVRecord only has %d values!", name, index,
- Integer.valueOf(values.length)));
+ "Index for header '%s' is %d but CSVRecord only has %d values!", name, index,
+ Integer.valueOf(values.length)));
+ }
+ }
+
+ /**
+ * Returns a duplicate number by column name.
+ *
+ * @param name the name of the column.
+ * @return the 1 if the column has no duplicates otherwise returns the number of duplicates
+ * @throws IllegalStateException
+ * if no header mapping was provided
+ * @throws IllegalArgumentException
+ * if {@code name} is not mapped
+ * @since 1.7
+ */
+ public int getDuplicatesNumber(final String name) {
+ if (mapping == null) {
+ throw new IllegalStateException(
+ "No header mapping was specified, the record values can't be accessed by name");
+ }
+ final Set indexes = mapping.get(name);
+ if (indexes == null) {
+ throw new IllegalArgumentException(String.format("Mapping for %s not found, expected one of %s", name,
+ mapping.keySet()));
}
+ return indexes.size();
}
/**
@@ -194,7 +251,7 @@ public boolean isMapped(final String name) {
* @return whether a given columns is mapped and has a value
*/
public boolean isSet(final String name) {
- return isMapped(name) && mapping.get(name).intValue() < values.length;
+ return isMapped(name) && mapping.get(name).iterator().next() < values.length;
}
/**
@@ -218,8 +275,8 @@ > M putIn(final M map) {
if (mapping == null) {
return map;
}
- for (final Entry entry : mapping.entrySet()) {
- final int col = entry.getValue().intValue();
+ for (final Entry> entry : mapping.entrySet()) {
+ final int col = entry.getValue().iterator().next();
if (col < values.length) {
map.put(entry.getKey(), values[col]);
}
diff --git a/src/test/java/org/apache/commons/csv/CSVFormatTest.java b/src/test/java/org/apache/commons/csv/CSVFormatTest.java
index 641ae500cd..85d8d3548b 100644
--- a/src/test/java/org/apache/commons/csv/CSVFormatTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVFormatTest.java
@@ -1096,4 +1096,10 @@ public void testWithSystemRecordSeparator() throws Exception {
assertEquals(System.getProperty("line.separator"), formatWithRecordSeparator.getRecordSeparator());
}
+ @Test
+ public void testWithIgnoreDuplicateColumnHeaders() throws Exception {
+ assertFalse(CSVFormat.DEFAULT.withIgnoreDuplicateHeaderEntries(false).getIgnoreDuplicateHeaderEntries());
+ assertTrue(CSVFormat.DEFAULT.withIgnoreDuplicateHeaderEntries().getIgnoreDuplicateHeaderEntries());
+ }
+
}
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index 0802078d08..fbd3f90cc3 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -43,10 +43,13 @@
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
+import java.util.Set;
+import java.util.TreeSet;
import org.apache.commons.io.input.BOMInputStream;
import org.junit.Assert;
@@ -484,6 +487,60 @@ public void testGetHeaderMap() throws Exception {
}
}
+ @Test(expected = IllegalStateException.class)
+ public void testGetHeaderMapWithIgnoreDuplicate() throws Exception {
+ try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z",
+ CSVFormat.DEFAULT.withHeader("A", "B", "C").withIgnoreDuplicateHeaderEntries())) {
+ parser.getHeaderMap();
+ }
+ }
+
+ @Test
+ public void testGetDuplicateHeaderMap() throws Exception {
+ try (final CSVParser parser = CSVParser.parse("a,b,c,d\n1,2,3,4\nw,x,y,z",
+ CSVFormat.DEFAULT.withIgnoreDuplicateHeaderEntries().withHeader("A", "B", "C", "B"))) {
+ final Map> headerMap = parser.getDuplicateHeaderMap();
+ final Iterator columnNames = headerMap.keySet().iterator();
+ // Unique headers are iterated in column order.
+ Assert.assertEquals("A", columnNames.next());
+ Assert.assertEquals("B", columnNames.next());
+ Assert.assertEquals("C", columnNames.next());
+ Assert.assertFalse(columnNames.hasNext());
+ Assert.assertEquals(headerMap.get("B"), new TreeSet<>(Arrays.asList(1, 3)));
+ Assert.assertEquals(1, headerMap.get("A").size());
+ Assert.assertEquals(1, headerMap.get("C").size());
+ final Iterator records = parser.iterator();
+
+ // Parse to make sure getDuplicateHeaderMap did not have a side-effect.
+ for (int i = 0; i < 3; i++) {
+ assertTrue(records.hasNext());
+ final CSVRecord record = records.next();
+ assertEquals(record.get(0), record.get("A"));
+ assertEquals(record.get(1), record.get("B"));
+ assertEquals(record.get(2), record.get("C"));
+ assertEquals(record.get(3), record.get("B", 1));
+ }
+
+ assertFalse(records.hasNext());
+ }
+ }
+
+ @Test
+ public void testGetDuplicateHeaderMapWithoutHeader() throws IOException {
+ try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z",
+ CSVFormat.DEFAULT.withIgnoreDuplicateHeaderEntries())) {
+ Assert.assertNull(parser.getDuplicateHeaderMap());
+ }
+ }
+
+ @Test(expected = IllegalStateException.class)
+ public void testGetDuplicateHeaderMapWithoutIgnoreDuplicate() throws IOException {
+ try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z",
+ CSVFormat.DEFAULT.withHeader("A", "B", "C"))) {
+ parser.getDuplicateHeaderMap();
+ }
+ }
+
@Test
public void testGetLine() throws IOException {
try (final CSVParser parser = CSVParser.parse(CSV_INPUT, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) {
diff --git a/src/test/java/org/apache/commons/csv/CSVRecordTest.java b/src/test/java/org/apache/commons/csv/CSVRecordTest.java
index 6347cc51a4..6347a4d338 100644
--- a/src/test/java/org/apache/commons/csv/CSVRecordTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVRecordTest.java
@@ -24,10 +24,13 @@
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
+import java.util.Set;
import java.util.TreeMap;
+import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import org.junit.Assert;
@@ -39,18 +42,22 @@ public class CSVRecordTest {
private enum EnumFixture { UNKNOWN_COLUMN }
private String[] values;
- private CSVRecord record, recordWithHeader;
- private Map header;
+ private CSVRecord record, recordWithHeader, recordWithDuplicateHeader;
+ private Map> header;
@Before
public void setUp() throws Exception {
values = new String[] { "A", "B", "C" };
record = new CSVRecord(values, null, null, 0, -1);
header = new HashMap<>();
- header.put("first", Integer.valueOf(0));
- header.put("second", Integer.valueOf(1));
- header.put("third", Integer.valueOf(2));
+ header.put("first", Collections.singleton(0));
+ header.put("second", Collections.singleton(1));
+ header.put("third", Collections.singleton(2));
recordWithHeader = new CSVRecord(values, header, null, 0, -1);
+ final Map> duplicateHeader = new HashMap<>();
+ duplicateHeader.put("first", new TreeSet<>(Arrays.asList(0, 2)));
+ duplicateHeader.put("second", Collections.singleton(1));
+ recordWithDuplicateHeader = new CSVRecord(values, duplicateHeader, null, 0, -1);
}
@Test
@@ -67,12 +74,55 @@ public void testGetString() {
assertEquals(values[2], recordWithHeader.get("third"));
}
+ @Test
+ public void testGetByOrderString() {
+ assertEquals(values[0], recordWithDuplicateHeader.get("first", 0));
+ assertEquals(values[1], recordWithDuplicateHeader.get("second"));
+ assertEquals(values[2], recordWithDuplicateHeader.get("first", 1));
+ }
+
+ @Test
+ public void testGetDuplicatesNumber() {
+ assertEquals(2, recordWithDuplicateHeader.getDuplicatesNumber("first"));
+ assertEquals(1, recordWithDuplicateHeader.getDuplicatesNumber("second"));
+ }
+
@Test(expected = IllegalArgumentException.class)
public void testGetStringInconsistentRecord() {
- header.put("fourth", Integer.valueOf(4));
+ header.put("fourth", Collections.singleton(4));
recordWithHeader.get("fourth");
}
+ @Test(expected = IllegalArgumentException.class)
+ public void testGetStringNotExistsOrder() {
+ recordWithDuplicateHeader.get("first", 3);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testGetStringNegativeOrder() {
+ recordWithDuplicateHeader.get("first", -1);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testGetByOrderStringUnmappedName() {
+ recordWithDuplicateHeader.get("fifth", 1);
+ }
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testGetDuplicatesUnmappedName() {
+ recordWithDuplicateHeader.getDuplicatesNumber("fifth");
+ }
+
+ @Test(expected = IllegalStateException.class)
+ public void getByOrderStringNoHeader() {
+ record.get("first", 1);
+ }
+
+ @Test(expected = IllegalStateException.class)
+ public void testGetDuplicatesNoHeader() {
+ record.getDuplicatesNumber("first");
+ }
+
@Test(expected = IllegalStateException.class)
public void testGetStringNoHeader() {
record.get("first");
@@ -103,7 +153,7 @@ public void testIsConsistent() {
assertTrue(record.isConsistent());
assertTrue(recordWithHeader.isConsistent());
- header.put("fourth", Integer.valueOf(4));
+ header.put("fourth", Collections.singleton(4));
assertFalse(recordWithHeader.isConsistent());
}