diff --git a/src/main/java/org/apache/commons/csv/CSVFormat.java b/src/main/java/org/apache/commons/csv/CSVFormat.java index 9de9c80c94..ac2105fb4d 100644 --- a/src/main/java/org/apache/commons/csv/CSVFormat.java +++ b/src/main/java/org/apache/commons/csv/CSVFormat.java @@ -264,7 +264,7 @@ public CSVFormat getFormat() { * @see Predefined#Default */ public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, - null, null, null, false, false, false, false, false, false); + null, null, null, false, false, false, false, false, false, false); /** * Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is @@ -398,7 +398,7 @@ public CSVFormat getFormat() { .withQuoteMode(QuoteMode.MINIMAL) .withSkipHeaderRecord(false); // @formatter:off - + /** * Default MongoDB TSV format used by the {@code mongoexport} operation. *

@@ -434,7 +434,7 @@ public CSVFormat getFormat() { .withQuoteMode(QuoteMode.MINIMAL) .withSkipHeaderRecord(false); // @formatter:off - + /** * Default MySQL format used by the {@code SELECT INTO OUTFILE} and {@code LOAD DATA INFILE} operations. * @@ -670,7 +670,7 @@ private static boolean isLineBreak(final Character c) { */ public static CSVFormat newFormat(final char delimiter) { return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, null, false, false, - false, false, false, false); + false, false, false, false, false); } /** @@ -719,6 +719,8 @@ public static CSVFormat valueOf(final String format) { private final boolean autoFlush; + private final boolean ignoreDuplicateHeaderEntries; + /** * Creates a customized CSV format. * @@ -755,6 +757,8 @@ public static CSVFormat valueOf(final String format) { * @param trailingDelimiter * TODO * @param autoFlush + * @param ignoreDuplicateHeaderEntries + * {@code true} when duplicate header entries should be ignored * @throws IllegalArgumentException * if the delimiter is a line break character */ @@ -763,7 +767,7 @@ private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMo final boolean ignoreEmptyLines, final String recordSeparator, final String nullString, final Object[] headerComments, final String[] header, final boolean skipHeaderRecord, final boolean allowMissingColumnNames, final boolean ignoreHeaderCase, final boolean trim, - final boolean trailingDelimiter, final boolean autoFlush) { + final boolean trailingDelimiter, final boolean autoFlush, final boolean ignoreDuplicateHeaderEntries) { this.delimiter = delimiter; this.quoteCharacter = quoteChar; this.quoteMode = quoteMode; @@ -781,6 +785,7 @@ private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMo this.trailingDelimiter = trailingDelimiter; this.trim = trim; this.autoFlush = autoFlush; + this.ignoreDuplicateHeaderEntries = ignoreDuplicateHeaderEntries; validate(); } @@ -1034,6 +1039,15 @@ public boolean getTrim() { return trim; } + /** + * Returns whether to ignore duplicate header entries. + * + * @return whether to ignore duplicate header entries. + */ + public boolean getIgnoreDuplicateHeaderEntries() { + return ignoreDuplicateHeaderEntries; + } + @Override public int hashCode() { final int prime = 31; @@ -1542,7 +1556,7 @@ private void validate() throws IllegalArgumentException { } // validate header - if (header != null) { + if (header != null && !ignoreDuplicateHeaderEntries) { final Set dupCheck = new HashSet<>(); for (final String hdr : header) { if (!dupCheck.add(hdr)) { @@ -1575,7 +1589,8 @@ public CSVFormat withAllowMissingColumnNames() { public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNames) { return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush); + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, + ignoreDuplicateHeaderEntries); } /** @@ -1590,7 +1605,8 @@ public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNam public CSVFormat withAutoFlush(final boolean autoFlush) { return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush); + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, + ignoreDuplicateHeaderEntries); } /** @@ -1625,7 +1641,8 @@ public CSVFormat withCommentMarker(final Character commentMarker) { } return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush); + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, + ignoreDuplicateHeaderEntries); } /** @@ -1643,7 +1660,8 @@ public CSVFormat withDelimiter(final char delimiter) { } return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush); + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, + ignoreDuplicateHeaderEntries); } /** @@ -1674,7 +1692,8 @@ public CSVFormat withEscape(final Character escape) { } return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escape, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord, - allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush); + allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, + ignoreDuplicateHeaderEntries); } /** @@ -1829,7 +1848,8 @@ public CSVFormat withHeader(final ResultSetMetaData metaData) throws SQLExceptio public CSVFormat withHeader(final String... header) { return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush); + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, + ignoreDuplicateHeaderEntries); } /** @@ -1850,7 +1870,8 @@ public CSVFormat withHeader(final String... header) { public CSVFormat withHeaderComments(final Object... headerComments) { return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush); + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, + ignoreDuplicateHeaderEntries); } /** @@ -1875,7 +1896,8 @@ public CSVFormat withIgnoreEmptyLines() { public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) { return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush); + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, + ignoreDuplicateHeaderEntries); } /** @@ -1901,7 +1923,8 @@ public CSVFormat withIgnoreHeaderCase() { public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) { return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush); + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, + ignoreDuplicateHeaderEntries); } /** @@ -1926,7 +1949,8 @@ public CSVFormat withIgnoreSurroundingSpaces() { public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) { return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush); + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, + ignoreDuplicateHeaderEntries); } /** @@ -1945,7 +1969,8 @@ public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpac public CSVFormat withNullString(final String nullString) { return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush); + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, + ignoreDuplicateHeaderEntries); } /** @@ -1976,7 +2001,8 @@ public CSVFormat withQuote(final Character quoteChar) { } return new CSVFormat(delimiter, quoteChar, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord, - allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush); + allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, + ignoreDuplicateHeaderEntries); } /** @@ -1990,7 +2016,8 @@ public CSVFormat withQuote(final Character quoteChar) { public CSVFormat withQuoteMode(final QuoteMode quoteModePolicy) { return new CSVFormat(delimiter, quoteCharacter, quoteModePolicy, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush); + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, + ignoreDuplicateHeaderEntries); } /** @@ -2028,7 +2055,8 @@ public CSVFormat withRecordSeparator(final char recordSeparator) { public CSVFormat withRecordSeparator(final String recordSeparator) { return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush); + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, + ignoreDuplicateHeaderEntries); } /** @@ -2055,7 +2083,8 @@ public CSVFormat withSkipHeaderRecord() { public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) { return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush); + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, + ignoreDuplicateHeaderEntries); } /** @@ -2096,7 +2125,8 @@ public CSVFormat withTrailingDelimiter() { public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) { return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush); + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, + ignoreDuplicateHeaderEntries); } /** @@ -2121,6 +2151,39 @@ public CSVFormat withTrim() { public CSVFormat withTrim(final boolean trim) { return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, - skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush); + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, + ignoreDuplicateHeaderEntries); + } + + /** + * Returns a new {@code CSVFormat} with the ignore duplicate header entries set to {@code true}. + * Have to be called first in the forming chain of the {@code CSVFormat} if there is a possibility of duplicate + * header entries, e.g. {@code CSVFormat.DEFAULT.withIgnoreDuplicateHeaderEntries().withFirstRecordAsHeader()}. + * + * @return A new {@code CSVFormat} that will ignore duplicate header entries. + * @see #withIgnoreDuplicateHeaderEntries(boolean) + * @since 1.7 + */ + public CSVFormat withIgnoreDuplicateHeaderEntries() { + return this.withIgnoreDuplicateHeaderEntries(true); + } + + /** + * Returns a new {@code CSVFormat} with whether duplicate header entries should be ignored. + * Have to be called first in the forming chain of the {@code CSVFormat} if specified as {@code true} and + * there is a possibility of duplicate header entries, e.g. + * {@code CSVFormat.DEFAULT.withIgnoreDuplicateHeaderEntries().withFirstRecordAsHeader()}. + * + * @param ignoreDuplicateHeaderEntries + * the header entries naming behavior, {@code true} to maintaining duplicate header entries, + * {@code false} to leave the requirement for unique header entries. + * @return A new {@code CSVFormat} that will ignore duplicate header entries if specified as {@code true} + * @since 1.7 + */ + public CSVFormat withIgnoreDuplicateHeaderEntries(final boolean ignoreDuplicateHeaderEntries) { + return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, + ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, + skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, + ignoreDuplicateHeaderEntries); } } diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java index 6e121e8086..8c18d0045c 100644 --- a/src/main/java/org/apache/commons/csv/CSVParser.java +++ b/src/main/java/org/apache/commons/csv/CSVParser.java @@ -33,12 +33,15 @@ import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; +import java.util.Set; import java.util.TreeMap; +import java.util.TreeSet; /** * Parses CSV files according to the specified format. @@ -282,7 +285,7 @@ public static CSVParser parse(final URL url, final Charset charset, final CSVFor private final CSVFormat format; /** A mapping of column names to column indices */ - private final Map headerMap; + private final Map> headerMap; private final Lexer lexer; @@ -414,9 +417,52 @@ public String getFirstEndOfLine() { * The map keys are column names. The map values are 0-based indices. *

* @return a copy of the header map that iterates in column order. + * @throws IllegalStateException if the format is not supported unique header entries */ public Map getHeaderMap() { - return this.headerMap == null ? null : new LinkedHashMap<>(this.headerMap); + if (!this.format.getIgnoreDuplicateHeaderEntries()) { + if (this.headerMap != null) { + final LinkedHashMap headerMap = new LinkedHashMap<>(); + for (final Map.Entry> entry : this.headerMap.entrySet()) { + headerMap.put(entry.getKey(), entry.getValue().iterator().next()); + } + return headerMap; + } else { + return null; + } + } else { + throw new IllegalStateException("The current parser format is not supported unique header entries. To " + + "support this functionality need to exclude the ignore duplicate header entries from CSV format"); + } + } + + /** + * Returns a copy of the header map that complies with the ordering of unique columns. + * Each unique column contains the set of one element that indicates the column index. + * Duplicate column contains the ordered set of column indices. For example: + *

+ * original header -> [A, B, C, B] + *
duplicate header map -> {A}:[0], {B}:[1, 3], {C}:[2]
+ * + * @return a copy of the header map that complies with the ordering of unique columns. + * @throws IllegalStateException if the format is not supported duplicate header entries + * @since 1.7 + */ + public Map> getDuplicateHeaderMap() { + if (this.format.getIgnoreDuplicateHeaderEntries()) { + if (this.headerMap != null) { + final Map> headerMap = new LinkedHashMap<>(); + for (final Map.Entry> entry : this.headerMap.entrySet()) { + headerMap.put(entry.getKey(), new TreeSet<>(entry.getValue())); + } + return headerMap; + } else { + return null; + } + } else { + throw new IllegalStateException("The current parser format is not supported duplicate header entries. " + + "To support this functionality set up CSV format with ignore duplicate header entries"); + } } /** @@ -460,13 +506,13 @@ public List getRecords() throws IOException { * @return null if the format has no header. * @throws IOException if there is a problem reading the header or skipping the first record */ - private Map initializeHeader() throws IOException { - Map hdrMap = null; + private Map> initializeHeader() throws IOException { + Map> hdrMap = null; final String[] formatHeader = this.format.getHeader(); if (formatHeader != null) { hdrMap = this.format.getIgnoreHeaderCase() ? - new TreeMap(String.CASE_INSENSITIVE_ORDER) : - new LinkedHashMap(); + new TreeMap>(String.CASE_INSENSITIVE_ORDER) : + new LinkedHashMap>(); String[] headerRecord = null; if (formatHeader.length == 0) { @@ -488,11 +534,17 @@ private Map initializeHeader() throws IOException { final String header = headerRecord[i]; final boolean containsHeader = hdrMap.containsKey(header); final boolean emptyHeader = header == null || header.trim().isEmpty(); - if (containsHeader && (!emptyHeader || !this.format.getAllowMissingColumnNames())) { + if (containsHeader && (!emptyHeader || !this.format.getAllowMissingColumnNames()) + && !this.format.getIgnoreDuplicateHeaderEntries()) { throw new IllegalArgumentException("The header contains a duplicate name: \"" + header + "\" in " + Arrays.toString(headerRecord)); } - hdrMap.put(header, Integer.valueOf(i)); + final Set headerIndexes = hdrMap.get(header); + if (headerIndexes == null) { + hdrMap.put(header, new TreeSet<>(Collections.singleton(i))); + } else { + headerIndexes.add(i); + } } } } diff --git a/src/main/java/org/apache/commons/csv/CSVRecord.java b/src/main/java/org/apache/commons/csv/CSVRecord.java index 34a3ba2127..c20ac20a41 100644 --- a/src/main/java/org/apache/commons/csv/CSVRecord.java +++ b/src/main/java/org/apache/commons/csv/CSVRecord.java @@ -19,11 +19,13 @@ import java.io.Serializable; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Set; /** * A CSV record parsed from a CSV file. @@ -40,7 +42,7 @@ public final class CSVRecord implements Serializable, Iterable { private final String comment; /** The column name to index mapping. */ - private final Map mapping; + private final Map> mapping; /** The record number. */ private final long recordNumber; @@ -48,8 +50,8 @@ public final class CSVRecord implements Serializable, Iterable { /** The values of the record */ private final String[] values; - CSVRecord(final String[] values, final Map mapping, final String comment, final long recordNumber, - final long characterPosition) { + CSVRecord(final String[] values, final Map> mapping, final String comment, final long recordNumber, + final long characterPosition) { this.recordNumber = recordNumber; this.values = values != null ? values : EMPTY_STRING_ARRAY; this.mapping = mapping; @@ -89,26 +91,81 @@ public String get(final int i) { * if no header mapping was provided * @throws IllegalArgumentException * if {@code name} is not mapped or if the record is inconsistent + * @see #get(String, int) * @see #isConsistent() * @see CSVFormat#withNullString(String) */ public String get(final String name) { + return this.get(name, 0); + } + + /** + * Returns a value by name and order. + * + * @param name the name of the (duplicate) column to be retrieved. + * @param order the order of duplicate column + * @return the column value, maybe null depending on {@link CSVFormat#getNullString()}. + * @throws IllegalStateException + * if no header mapping was provided + * @throws IllegalArgumentException + * if {@code name} is not mapped or an order not exists, or if the record is inconsistent + * @since 1.7 + */ + public String get(final String name, final int order) { if (mapping == null) { throw new IllegalStateException( - "No header mapping was specified, the record values can't be accessed by name"); + "No header mapping was specified, the record values can't be accessed by name"); + } + Integer index = null; + int orderPointer = 0; + final Set indexes = mapping.get(name); + if (indexes != null) { + final Iterator iterator = indexes.iterator(); + while (iterator.hasNext() && orderPointer <= order) { + index = iterator.next(); + orderPointer++; + } } - final Integer index = mapping.get(name); if (index == null) { throw new IllegalArgumentException(String.format("Mapping for %s not found, expected one of %s", name, - mapping.keySet())); + mapping.keySet())); + } + if (orderPointer <= order) { + throw new IllegalArgumentException(String.format( + "Order for duplicate header '%s' is %d but CSVRecord only has %d headers with same name!", + name, order, orderPointer)); } try { return values[index.intValue()]; } catch (final ArrayIndexOutOfBoundsException e) { throw new IllegalArgumentException(String.format( - "Index for header '%s' is %d but CSVRecord only has %d values!", name, index, - Integer.valueOf(values.length))); + "Index for header '%s' is %d but CSVRecord only has %d values!", name, index, + Integer.valueOf(values.length))); + } + } + + /** + * Returns a duplicate number by column name. + * + * @param name the name of the column. + * @return the 1 if the column has no duplicates otherwise returns the number of duplicates + * @throws IllegalStateException + * if no header mapping was provided + * @throws IllegalArgumentException + * if {@code name} is not mapped + * @since 1.7 + */ + public int getDuplicatesNumber(final String name) { + if (mapping == null) { + throw new IllegalStateException( + "No header mapping was specified, the record values can't be accessed by name"); + } + final Set indexes = mapping.get(name); + if (indexes == null) { + throw new IllegalArgumentException(String.format("Mapping for %s not found, expected one of %s", name, + mapping.keySet())); } + return indexes.size(); } /** @@ -194,7 +251,7 @@ public boolean isMapped(final String name) { * @return whether a given columns is mapped and has a value */ public boolean isSet(final String name) { - return isMapped(name) && mapping.get(name).intValue() < values.length; + return isMapped(name) && mapping.get(name).iterator().next() < values.length; } /** @@ -218,8 +275,8 @@ > M putIn(final M map) { if (mapping == null) { return map; } - for (final Entry entry : mapping.entrySet()) { - final int col = entry.getValue().intValue(); + for (final Entry> entry : mapping.entrySet()) { + final int col = entry.getValue().iterator().next(); if (col < values.length) { map.put(entry.getKey(), values[col]); } diff --git a/src/test/java/org/apache/commons/csv/CSVFormatTest.java b/src/test/java/org/apache/commons/csv/CSVFormatTest.java index 641ae500cd..85d8d3548b 100644 --- a/src/test/java/org/apache/commons/csv/CSVFormatTest.java +++ b/src/test/java/org/apache/commons/csv/CSVFormatTest.java @@ -1096,4 +1096,10 @@ public void testWithSystemRecordSeparator() throws Exception { assertEquals(System.getProperty("line.separator"), formatWithRecordSeparator.getRecordSeparator()); } + @Test + public void testWithIgnoreDuplicateColumnHeaders() throws Exception { + assertFalse(CSVFormat.DEFAULT.withIgnoreDuplicateHeaderEntries(false).getIgnoreDuplicateHeaderEntries()); + assertTrue(CSVFormat.DEFAULT.withIgnoreDuplicateHeaderEntries().getIgnoreDuplicateHeaderEntries()); + } + } diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java index 0802078d08..fbd3f90cc3 100644 --- a/src/test/java/org/apache/commons/csv/CSVParserTest.java +++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java @@ -43,10 +43,13 @@ import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; +import java.util.Arrays; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; +import java.util.Set; +import java.util.TreeSet; import org.apache.commons.io.input.BOMInputStream; import org.junit.Assert; @@ -484,6 +487,60 @@ public void testGetHeaderMap() throws Exception { } } + @Test(expected = IllegalStateException.class) + public void testGetHeaderMapWithIgnoreDuplicate() throws Exception { + try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", + CSVFormat.DEFAULT.withHeader("A", "B", "C").withIgnoreDuplicateHeaderEntries())) { + parser.getHeaderMap(); + } + } + + @Test + public void testGetDuplicateHeaderMap() throws Exception { + try (final CSVParser parser = CSVParser.parse("a,b,c,d\n1,2,3,4\nw,x,y,z", + CSVFormat.DEFAULT.withIgnoreDuplicateHeaderEntries().withHeader("A", "B", "C", "B"))) { + final Map> headerMap = parser.getDuplicateHeaderMap(); + final Iterator columnNames = headerMap.keySet().iterator(); + // Unique headers are iterated in column order. + Assert.assertEquals("A", columnNames.next()); + Assert.assertEquals("B", columnNames.next()); + Assert.assertEquals("C", columnNames.next()); + Assert.assertFalse(columnNames.hasNext()); + Assert.assertEquals(headerMap.get("B"), new TreeSet<>(Arrays.asList(1, 3))); + Assert.assertEquals(1, headerMap.get("A").size()); + Assert.assertEquals(1, headerMap.get("C").size()); + final Iterator records = parser.iterator(); + + // Parse to make sure getDuplicateHeaderMap did not have a side-effect. + for (int i = 0; i < 3; i++) { + assertTrue(records.hasNext()); + final CSVRecord record = records.next(); + assertEquals(record.get(0), record.get("A")); + assertEquals(record.get(1), record.get("B")); + assertEquals(record.get(2), record.get("C")); + assertEquals(record.get(3), record.get("B", 1)); + } + + assertFalse(records.hasNext()); + } + } + + @Test + public void testGetDuplicateHeaderMapWithoutHeader() throws IOException { + try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", + CSVFormat.DEFAULT.withIgnoreDuplicateHeaderEntries())) { + Assert.assertNull(parser.getDuplicateHeaderMap()); + } + } + + @Test(expected = IllegalStateException.class) + public void testGetDuplicateHeaderMapWithoutIgnoreDuplicate() throws IOException { + try (final CSVParser parser = CSVParser.parse("a,b,c\n1,2,3\nx,y,z", + CSVFormat.DEFAULT.withHeader("A", "B", "C"))) { + parser.getDuplicateHeaderMap(); + } + } + @Test public void testGetLine() throws IOException { try (final CSVParser parser = CSVParser.parse(CSV_INPUT, CSVFormat.DEFAULT.withIgnoreSurroundingSpaces())) { diff --git a/src/test/java/org/apache/commons/csv/CSVRecordTest.java b/src/test/java/org/apache/commons/csv/CSVRecordTest.java index 6347cc51a4..6347a4d338 100644 --- a/src/test/java/org/apache/commons/csv/CSVRecordTest.java +++ b/src/test/java/org/apache/commons/csv/CSVRecordTest.java @@ -24,10 +24,13 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.Map; +import java.util.Set; import java.util.TreeMap; +import java.util.TreeSet; import java.util.concurrent.ConcurrentHashMap; import org.junit.Assert; @@ -39,18 +42,22 @@ public class CSVRecordTest { private enum EnumFixture { UNKNOWN_COLUMN } private String[] values; - private CSVRecord record, recordWithHeader; - private Map header; + private CSVRecord record, recordWithHeader, recordWithDuplicateHeader; + private Map> header; @Before public void setUp() throws Exception { values = new String[] { "A", "B", "C" }; record = new CSVRecord(values, null, null, 0, -1); header = new HashMap<>(); - header.put("first", Integer.valueOf(0)); - header.put("second", Integer.valueOf(1)); - header.put("third", Integer.valueOf(2)); + header.put("first", Collections.singleton(0)); + header.put("second", Collections.singleton(1)); + header.put("third", Collections.singleton(2)); recordWithHeader = new CSVRecord(values, header, null, 0, -1); + final Map> duplicateHeader = new HashMap<>(); + duplicateHeader.put("first", new TreeSet<>(Arrays.asList(0, 2))); + duplicateHeader.put("second", Collections.singleton(1)); + recordWithDuplicateHeader = new CSVRecord(values, duplicateHeader, null, 0, -1); } @Test @@ -67,12 +74,55 @@ public void testGetString() { assertEquals(values[2], recordWithHeader.get("third")); } + @Test + public void testGetByOrderString() { + assertEquals(values[0], recordWithDuplicateHeader.get("first", 0)); + assertEquals(values[1], recordWithDuplicateHeader.get("second")); + assertEquals(values[2], recordWithDuplicateHeader.get("first", 1)); + } + + @Test + public void testGetDuplicatesNumber() { + assertEquals(2, recordWithDuplicateHeader.getDuplicatesNumber("first")); + assertEquals(1, recordWithDuplicateHeader.getDuplicatesNumber("second")); + } + @Test(expected = IllegalArgumentException.class) public void testGetStringInconsistentRecord() { - header.put("fourth", Integer.valueOf(4)); + header.put("fourth", Collections.singleton(4)); recordWithHeader.get("fourth"); } + @Test(expected = IllegalArgumentException.class) + public void testGetStringNotExistsOrder() { + recordWithDuplicateHeader.get("first", 3); + } + + @Test(expected = IllegalArgumentException.class) + public void testGetStringNegativeOrder() { + recordWithDuplicateHeader.get("first", -1); + } + + @Test(expected = IllegalArgumentException.class) + public void testGetByOrderStringUnmappedName() { + recordWithDuplicateHeader.get("fifth", 1); + } + + @Test(expected = IllegalArgumentException.class) + public void testGetDuplicatesUnmappedName() { + recordWithDuplicateHeader.getDuplicatesNumber("fifth"); + } + + @Test(expected = IllegalStateException.class) + public void getByOrderStringNoHeader() { + record.get("first", 1); + } + + @Test(expected = IllegalStateException.class) + public void testGetDuplicatesNoHeader() { + record.getDuplicatesNumber("first"); + } + @Test(expected = IllegalStateException.class) public void testGetStringNoHeader() { record.get("first"); @@ -103,7 +153,7 @@ public void testIsConsistent() { assertTrue(record.isConsistent()); assertTrue(recordWithHeader.isConsistent()); - header.put("fourth", Integer.valueOf(4)); + header.put("fourth", Collections.singleton(4)); assertFalse(recordWithHeader.isConsistent()); }