From 9bde99040e8be75d44541c3362a7e65c63814400 Mon Sep 17 00:00:00 2001 From: gbidsilva Date: Wed, 30 Aug 2023 17:04:54 +0530 Subject: [PATCH 1/3] removing duplicated exception class name from error message --- .../org/apache/commons/csv/CSVParser.java | 2 +- .../org/apache/commons/csv/CSVBenchmark.java | 454 +++++++++--------- .../java/org/apache/commons/csv/CSVTest.java | 79 +++ 3 files changed, 307 insertions(+), 228 deletions(-) create mode 100644 src/test/java/org/apache/commons/csv/CSVTest.java diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java index 96e77a77dc..7d292c6a50 100644 --- a/src/main/java/org/apache/commons/csv/CSVParser.java +++ b/src/main/java/org/apache/commons/csv/CSVParser.java @@ -147,7 +147,7 @@ private CSVRecord getNextRecord() { try { return CSVParser.this.nextRecord(); } catch (final IOException e) { - throw new UncheckedIOException(e.getClass().getSimpleName() + " reading next record: " + e.toString(), e); + throw new UncheckedIOException("Error in reading next record: " + e.toString(), e); } } diff --git a/src/test/java/org/apache/commons/csv/CSVBenchmark.java b/src/test/java/org/apache/commons/csv/CSVBenchmark.java index 64d3f4980a..232142efaf 100644 --- a/src/test/java/org/apache/commons/csv/CSVBenchmark.java +++ b/src/test/java/org/apache/commons/csv/CSVBenchmark.java @@ -1,227 +1,227 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.commons.csv; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.Reader; -import java.io.StringReader; -import java.nio.charset.StandardCharsets; -import java.util.Iterator; -import java.util.Scanner; -import java.util.concurrent.TimeUnit; -import java.util.zip.GZIPInputStream; - -import com.generationjava.io.CsvReader; -import com.opencsv.CSVParserBuilder; -import com.opencsv.CSVReaderBuilder; - -import org.apache.commons.io.IOUtils; -import org.apache.commons.lang3.StringUtils; -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Fork; -import org.openjdk.jmh.annotations.Measurement; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.Threads; -import org.openjdk.jmh.annotations.Warmup; -import org.openjdk.jmh.infra.Blackhole; -import org.supercsv.io.CsvListReader; -import org.supercsv.prefs.CsvPreference; - -@BenchmarkMode(Mode.AverageTime) -@Fork(value = 1, jvmArgs = {"-server", "-Xms1024M", "-Xmx1024M"}) -@Threads(1) -@Warmup(iterations = 5) -@Measurement(iterations = 20) -@OutputTimeUnit(TimeUnit.MILLISECONDS) -@State(Scope.Benchmark) -public class CSVBenchmark { - - private String data; - - /** - * Load the data in memory before running the benchmarks, this takes out IO from the results. - */ - @Setup - public void init() throws IOException { - InputStream in = this.getClass().getClassLoader().getResourceAsStream( - "org/apache/commons/csv/perf/worldcitiespop.txt.gz"); - try (final InputStream gzin = new GZIPInputStream(in, 8192)) { - this.data = IOUtils.toString(gzin, StandardCharsets.ISO_8859_1); - } - } - - private Reader getReader() { - return new StringReader(data); - } - - @Benchmark - public int read(final Blackhole bh) throws Exception { - int count = 0; - - try (BufferedReader reader = new BufferedReader(getReader())) { - while (reader.readLine() != null) { - count++; - } - } - - bh.consume(count); - return count; - } - - @Benchmark - public int scan(final Blackhole bh) throws Exception { - int count = 0; - - try (Scanner scanner = new Scanner(getReader())) { - while (scanner.hasNextLine()) { - scanner.nextLine(); - count++; - } - } - - bh.consume(count); - return count; - } - - @Benchmark - public int split(final Blackhole bh) throws Exception { - int count = 0; - - try (BufferedReader reader = new BufferedReader(getReader())) { - String line; - while ((line = reader.readLine()) != null) { - final String[] values = StringUtils.split(line, ','); - count += values.length; - } - } - - bh.consume(count); - return count; - } - - @Benchmark - public int parseCommonsCSV(final Blackhole bh) throws Exception { - int count = 0; - - try (final Reader in = getReader()) { - final CSVFormat format = CSVFormat.Builder.create().setSkipHeaderRecord(true).build(); - Iterator iter = format.parse(in).iterator(); - while (iter.hasNext()) { - count++; - iter.next(); - } - } - - bh.consume(count); - return count; - } - - @Benchmark - public int parseGenJavaCSV(final Blackhole bh) throws Exception { - int count = 0; - - try (final Reader in = getReader()) { - final CsvReader reader = new CsvReader(in); - reader.setFieldDelimiter(','); - while (reader.readLine() != null) { - count++; - } - } - - bh.consume(count); - return count; - } - - @Benchmark - public int parseJavaCSV(final Blackhole bh) throws Exception { - int count = 0; - - try (final Reader in = getReader()) { - final com.csvreader.CsvReader reader = new com.csvreader.CsvReader(in, ','); - reader.setRecordDelimiter('\n'); - while (reader.readRecord()) { - count++; - } - } - - bh.consume(count); - return count; - } - - @Benchmark - public int parseOpenCSV(final Blackhole bh) throws Exception { - int count = 0; - - final com.opencsv.CSVParser parser = new CSVParserBuilder() - .withSeparator(',').withIgnoreQuotations(true).build(); - - try (final Reader in = getReader()) { - final com.opencsv.CSVReader reader = new CSVReaderBuilder(in).withSkipLines(1).withCSVParser(parser).build(); - while (reader.readNext() != null) { - count++; - } - } - - bh.consume(count); - return count; - } - - @Benchmark - public int parseSkifeCSV(final Blackhole bh) throws Exception { - final org.skife.csv.CSVReader reader = new org.skife.csv.SimpleReader(); - reader.setSeperator(','); - final CountingReaderCallback callback = new CountingReaderCallback(); - - try (final Reader in = getReader()) { - reader.parse(in, callback); - } - - bh.consume(callback); - return callback.count; - } - - private static class CountingReaderCallback implements org.skife.csv.ReaderCallback { - public int count; - - @Override - public void onRow(final String[] fields) { - count++; - } - } - - @Benchmark - public int parseSuperCSV(final Blackhole bh) throws Exception { - int count = 0; - - try (final CsvListReader reader = new CsvListReader(getReader(), CsvPreference.STANDARD_PREFERENCE)) { - while (reader.read() != null) { - count++; - } - } - - bh.consume(count); - return count; - } -} +///* +// * Licensed to the Apache Software Foundation (ASF) under one or more +// * contributor license agreements. See the NOTICE file distributed with +// * this work for additional information regarding copyright ownership. +// * The ASF licenses this file to You under the Apache License, Version 2.0 +// * (the "License"); you may not use this file except in compliance with +// * the License. You may obtain a copy of the License at +// * +// * http://www.apache.org/licenses/LICENSE-2.0 +// * +// * Unless required by applicable law or agreed to in writing, software +// * distributed under the License is distributed on an "AS IS" BASIS, +// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// * See the License for the specific language governing permissions and +// * limitations under the License. +// */ +// +//package org.apache.commons.csv; +// +//import java.io.BufferedReader; +//import java.io.IOException; +//import java.io.InputStream; +//import java.io.Reader; +//import java.io.StringReader; +//import java.nio.charset.StandardCharsets; +//import java.util.Iterator; +//import java.util.Scanner; +//import java.util.concurrent.TimeUnit; +//import java.util.zip.GZIPInputStream; +// +//import com.generationjava.io.CsvReader; +//import com.opencsv.CSVParserBuilder; +//import com.opencsv.CSVReaderBuilder; +// +//import org.apache.commons.io.IOUtils; +//import org.apache.commons.lang3.StringUtils; +//import org.openjdk.jmh.annotations.Benchmark; +//import org.openjdk.jmh.annotations.BenchmarkMode; +//import org.openjdk.jmh.annotations.Fork; +//import org.openjdk.jmh.annotations.Measurement; +//import org.openjdk.jmh.annotations.Mode; +//import org.openjdk.jmh.annotations.OutputTimeUnit; +//import org.openjdk.jmh.annotations.Scope; +//import org.openjdk.jmh.annotations.Setup; +//import org.openjdk.jmh.annotations.State; +//import org.openjdk.jmh.annotations.Threads; +//import org.openjdk.jmh.annotations.Warmup; +//import org.openjdk.jmh.infra.Blackhole; +//import org.supercsv.io.CsvListReader; +//import org.supercsv.prefs.CsvPreference; +// +//@BenchmarkMode(Mode.AverageTime) +//@Fork(value = 1, jvmArgs = {"-server", "-Xms1024M", "-Xmx1024M"}) +//@Threads(1) +//@Warmup(iterations = 5) +//@Measurement(iterations = 20) +//@OutputTimeUnit(TimeUnit.MILLISECONDS) +//@State(Scope.Benchmark) +//public class CSVBenchmark { +// +// private String data; +// +// /** +// * Load the data in memory before running the benchmarks, this takes out IO from the results. +// */ +// @Setup +// public void init() throws IOException { +// InputStream in = this.getClass().getClassLoader().getResourceAsStream( +// "org/apache/commons/csv/perf/worldcitiespop.txt.gz"); +// try (final InputStream gzin = new GZIPInputStream(in, 8192)) { +// this.data = IOUtils.toString(gzin, StandardCharsets.ISO_8859_1); +// } +// } +// +// private Reader getReader() { +// return new StringReader(data); +// } +// +// @Benchmark +// public int read(final Blackhole bh) throws Exception { +// int count = 0; +// +// try (BufferedReader reader = new BufferedReader(getReader())) { +// while (reader.readLine() != null) { +// count++; +// } +// } +// +// bh.consume(count); +// return count; +// } +// +// @Benchmark +// public int scan(final Blackhole bh) throws Exception { +// int count = 0; +// +// try (Scanner scanner = new Scanner(getReader())) { +// while (scanner.hasNextLine()) { +// scanner.nextLine(); +// count++; +// } +// } +// +// bh.consume(count); +// return count; +// } +// +// @Benchmark +// public int split(final Blackhole bh) throws Exception { +// int count = 0; +// +// try (BufferedReader reader = new BufferedReader(getReader())) { +// String line; +// while ((line = reader.readLine()) != null) { +// final String[] values = StringUtils.split(line, ','); +// count += values.length; +// } +// } +// +// bh.consume(count); +// return count; +// } +// +// @Benchmark +// public int parseCommonsCSV(final Blackhole bh) throws Exception { +// int count = 0; +// +// try (final Reader in = getReader()) { +// final CSVFormat format = CSVFormat.Builder.create().setSkipHeaderRecord(true).build(); +// Iterator iter = format.parse(in).iterator(); +// while (iter.hasNext()) { +// count++; +// iter.next(); +// } +// } +// +// bh.consume(count); +// return count; +// } +// +// @Benchmark +// public int parseGenJavaCSV(final Blackhole bh) throws Exception { +// int count = 0; +// +// try (final Reader in = getReader()) { +// final CsvReader reader = new CsvReader(in); +// reader.setFieldDelimiter(','); +// while (reader.readLine() != null) { +// count++; +// } +// } +// +// bh.consume(count); +// return count; +// } +// +// @Benchmark +// public int parseJavaCSV(final Blackhole bh) throws Exception { +// int count = 0; +// +// try (final Reader in = getReader()) { +// final com.csvreader.CsvReader reader = new com.csvreader.CsvReader(in, ','); +// reader.setRecordDelimiter('\n'); +// while (reader.readRecord()) { +// count++; +// } +// } +// +// bh.consume(count); +// return count; +// } +// +// @Benchmark +// public int parseOpenCSV(final Blackhole bh) throws Exception { +// int count = 0; +// +// final com.opencsv.CSVParser parser = new CSVParserBuilder() +// .withSeparator(',').withIgnoreQuotations(true).build(); +// +// try (final Reader in = getReader()) { +// final com.opencsv.CSVReader reader = new CSVReaderBuilder(in).withSkipLines(1).withCSVParser(parser).build(); +// while (reader.readNext() != null) { +// count++; +// } +// } +// +// bh.consume(count); +// return count; +// } +// +// @Benchmark +// public int parseSkifeCSV(final Blackhole bh) throws Exception { +// final org.skife.csv.CSVReader reader = new org.skife.csv.SimpleReader(); +// reader.setSeperator(','); +// final CountingReaderCallback callback = new CountingReaderCallback(); +// +// try (final Reader in = getReader()) { +// reader.parse(in, callback); +// } +// +// bh.consume(callback); +// return callback.count; +// } +// +// private static class CountingReaderCallback implements org.skife.csv.ReaderCallback { +// public int count; +// +// @Override +// public void onRow(final String[] fields) { +// count++; +// } +// } +// +// @Benchmark +// public int parseSuperCSV(final Blackhole bh) throws Exception { +// int count = 0; +// +// try (final CsvListReader reader = new CsvListReader(getReader(), CsvPreference.STANDARD_PREFERENCE)) { +// while (reader.read() != null) { +// count++; +// } +// } +// +// bh.consume(count); +// return count; +// } +//} diff --git a/src/test/java/org/apache/commons/csv/CSVTest.java b/src/test/java/org/apache/commons/csv/CSVTest.java new file mode 100644 index 0000000000..5e01c3c17c --- /dev/null +++ b/src/test/java/org/apache/commons/csv/CSVTest.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package org.apache.commons.csv; + +import org.junit.jupiter.api.Test; + +import java.io.FileReader; +import java.io.Reader; +import java.io.StringReader; + +public class CSVTest { + + // @Test + public void readCSVFileSimpleOne() { + // happy path one + try { + // change your csv file path properly + Reader in = new FileReader("D:\\code\\apache\\csv\\samples\\basicCsvSample-1\\src\\main\\resources\\longCsvFile.csv"); + + CSVFormat csvFormat = CSVFormat.DEFAULT.builder() + .setHeader() + .setSkipHeaderRecord(true) + .build(); + + Iterable records = csvFormat.parse(in); // return a CSVParser - which is an Iterable + + for (CSVRecord record : records) { + String firstName = record.get("firstname"); + String lastName = record.get("lastname"); + String age = record.get("age"); + String email = record.get("email"); + System.out.println("FirstName: " + firstName + ", LastName: "+ lastName +", Age: " + age + ", Email: " + email); + } + } catch (Exception e) { + System.out.println("An error occurred"); + e.printStackTrace(); + } + } + + @Test + public void testFaultyCSVshouldThrowErrorWithDetailedMessage(){ + + String csvContent = "col1,col2,col3,col4,col5,col6,col7,col8,col9,col10\n" + + "rec1,rec2,rec3,rec4,rec5,rec6,rec7,rec8,\"\"rec9\"\",rec10"; + + try { + StringReader stringReader = new StringReader(csvContent); + CSVFormat csvFormat = CSVFormat.DEFAULT.builder() + .setHeader() + .setSkipHeaderRecord(true) + .build(); + + Iterable records = csvFormat.parse(stringReader); + + for (CSVRecord record : records) { + System.out.println(record.get(0) + " " + record.get(1) + " " + record.get(2) + " " + record.get(3) + " " + record.get(4) + " " + record.get(5) + " " + record.get(6) + " " + record.get(7) + " " + record.get(8) + " " + record.get(9)); + } + } catch (Exception e) { + System.out.println("An error occurred"); + e.printStackTrace(); + } + } +} From eaba3415351e279611730e258e69011ad9c989fc Mon Sep 17 00:00:00 2001 From: gbidsilva Date: Wed, 30 Aug 2023 17:08:12 +0530 Subject: [PATCH 2/3] removing unwanted files --- .../org/apache/commons/csv/CSVBenchmark.java | 454 +++++++++--------- .../java/org/apache/commons/csv/CSVTest.java | 79 --- 2 files changed, 227 insertions(+), 306 deletions(-) delete mode 100644 src/test/java/org/apache/commons/csv/CSVTest.java diff --git a/src/test/java/org/apache/commons/csv/CSVBenchmark.java b/src/test/java/org/apache/commons/csv/CSVBenchmark.java index 232142efaf..64d3f4980a 100644 --- a/src/test/java/org/apache/commons/csv/CSVBenchmark.java +++ b/src/test/java/org/apache/commons/csv/CSVBenchmark.java @@ -1,227 +1,227 @@ -///* -// * Licensed to the Apache Software Foundation (ASF) under one or more -// * contributor license agreements. See the NOTICE file distributed with -// * this work for additional information regarding copyright ownership. -// * The ASF licenses this file to You under the Apache License, Version 2.0 -// * (the "License"); you may not use this file except in compliance with -// * the License. You may obtain a copy of the License at -// * -// * http://www.apache.org/licenses/LICENSE-2.0 -// * -// * Unless required by applicable law or agreed to in writing, software -// * distributed under the License is distributed on an "AS IS" BASIS, -// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// * See the License for the specific language governing permissions and -// * limitations under the License. -// */ -// -//package org.apache.commons.csv; -// -//import java.io.BufferedReader; -//import java.io.IOException; -//import java.io.InputStream; -//import java.io.Reader; -//import java.io.StringReader; -//import java.nio.charset.StandardCharsets; -//import java.util.Iterator; -//import java.util.Scanner; -//import java.util.concurrent.TimeUnit; -//import java.util.zip.GZIPInputStream; -// -//import com.generationjava.io.CsvReader; -//import com.opencsv.CSVParserBuilder; -//import com.opencsv.CSVReaderBuilder; -// -//import org.apache.commons.io.IOUtils; -//import org.apache.commons.lang3.StringUtils; -//import org.openjdk.jmh.annotations.Benchmark; -//import org.openjdk.jmh.annotations.BenchmarkMode; -//import org.openjdk.jmh.annotations.Fork; -//import org.openjdk.jmh.annotations.Measurement; -//import org.openjdk.jmh.annotations.Mode; -//import org.openjdk.jmh.annotations.OutputTimeUnit; -//import org.openjdk.jmh.annotations.Scope; -//import org.openjdk.jmh.annotations.Setup; -//import org.openjdk.jmh.annotations.State; -//import org.openjdk.jmh.annotations.Threads; -//import org.openjdk.jmh.annotations.Warmup; -//import org.openjdk.jmh.infra.Blackhole; -//import org.supercsv.io.CsvListReader; -//import org.supercsv.prefs.CsvPreference; -// -//@BenchmarkMode(Mode.AverageTime) -//@Fork(value = 1, jvmArgs = {"-server", "-Xms1024M", "-Xmx1024M"}) -//@Threads(1) -//@Warmup(iterations = 5) -//@Measurement(iterations = 20) -//@OutputTimeUnit(TimeUnit.MILLISECONDS) -//@State(Scope.Benchmark) -//public class CSVBenchmark { -// -// private String data; -// -// /** -// * Load the data in memory before running the benchmarks, this takes out IO from the results. -// */ -// @Setup -// public void init() throws IOException { -// InputStream in = this.getClass().getClassLoader().getResourceAsStream( -// "org/apache/commons/csv/perf/worldcitiespop.txt.gz"); -// try (final InputStream gzin = new GZIPInputStream(in, 8192)) { -// this.data = IOUtils.toString(gzin, StandardCharsets.ISO_8859_1); -// } -// } -// -// private Reader getReader() { -// return new StringReader(data); -// } -// -// @Benchmark -// public int read(final Blackhole bh) throws Exception { -// int count = 0; -// -// try (BufferedReader reader = new BufferedReader(getReader())) { -// while (reader.readLine() != null) { -// count++; -// } -// } -// -// bh.consume(count); -// return count; -// } -// -// @Benchmark -// public int scan(final Blackhole bh) throws Exception { -// int count = 0; -// -// try (Scanner scanner = new Scanner(getReader())) { -// while (scanner.hasNextLine()) { -// scanner.nextLine(); -// count++; -// } -// } -// -// bh.consume(count); -// return count; -// } -// -// @Benchmark -// public int split(final Blackhole bh) throws Exception { -// int count = 0; -// -// try (BufferedReader reader = new BufferedReader(getReader())) { -// String line; -// while ((line = reader.readLine()) != null) { -// final String[] values = StringUtils.split(line, ','); -// count += values.length; -// } -// } -// -// bh.consume(count); -// return count; -// } -// -// @Benchmark -// public int parseCommonsCSV(final Blackhole bh) throws Exception { -// int count = 0; -// -// try (final Reader in = getReader()) { -// final CSVFormat format = CSVFormat.Builder.create().setSkipHeaderRecord(true).build(); -// Iterator iter = format.parse(in).iterator(); -// while (iter.hasNext()) { -// count++; -// iter.next(); -// } -// } -// -// bh.consume(count); -// return count; -// } -// -// @Benchmark -// public int parseGenJavaCSV(final Blackhole bh) throws Exception { -// int count = 0; -// -// try (final Reader in = getReader()) { -// final CsvReader reader = new CsvReader(in); -// reader.setFieldDelimiter(','); -// while (reader.readLine() != null) { -// count++; -// } -// } -// -// bh.consume(count); -// return count; -// } -// -// @Benchmark -// public int parseJavaCSV(final Blackhole bh) throws Exception { -// int count = 0; -// -// try (final Reader in = getReader()) { -// final com.csvreader.CsvReader reader = new com.csvreader.CsvReader(in, ','); -// reader.setRecordDelimiter('\n'); -// while (reader.readRecord()) { -// count++; -// } -// } -// -// bh.consume(count); -// return count; -// } -// -// @Benchmark -// public int parseOpenCSV(final Blackhole bh) throws Exception { -// int count = 0; -// -// final com.opencsv.CSVParser parser = new CSVParserBuilder() -// .withSeparator(',').withIgnoreQuotations(true).build(); -// -// try (final Reader in = getReader()) { -// final com.opencsv.CSVReader reader = new CSVReaderBuilder(in).withSkipLines(1).withCSVParser(parser).build(); -// while (reader.readNext() != null) { -// count++; -// } -// } -// -// bh.consume(count); -// return count; -// } -// -// @Benchmark -// public int parseSkifeCSV(final Blackhole bh) throws Exception { -// final org.skife.csv.CSVReader reader = new org.skife.csv.SimpleReader(); -// reader.setSeperator(','); -// final CountingReaderCallback callback = new CountingReaderCallback(); -// -// try (final Reader in = getReader()) { -// reader.parse(in, callback); -// } -// -// bh.consume(callback); -// return callback.count; -// } -// -// private static class CountingReaderCallback implements org.skife.csv.ReaderCallback { -// public int count; -// -// @Override -// public void onRow(final String[] fields) { -// count++; -// } -// } -// -// @Benchmark -// public int parseSuperCSV(final Blackhole bh) throws Exception { -// int count = 0; -// -// try (final CsvListReader reader = new CsvListReader(getReader(), CsvPreference.STANDARD_PREFERENCE)) { -// while (reader.read() != null) { -// count++; -// } -// } -// -// bh.consume(count); -// return count; -// } -//} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.csv; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; +import java.io.StringReader; +import java.nio.charset.StandardCharsets; +import java.util.Iterator; +import java.util.Scanner; +import java.util.concurrent.TimeUnit; +import java.util.zip.GZIPInputStream; + +import com.generationjava.io.CsvReader; +import com.opencsv.CSVParserBuilder; +import com.opencsv.CSVReaderBuilder; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; +import org.supercsv.io.CsvListReader; +import org.supercsv.prefs.CsvPreference; + +@BenchmarkMode(Mode.AverageTime) +@Fork(value = 1, jvmArgs = {"-server", "-Xms1024M", "-Xmx1024M"}) +@Threads(1) +@Warmup(iterations = 5) +@Measurement(iterations = 20) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +@State(Scope.Benchmark) +public class CSVBenchmark { + + private String data; + + /** + * Load the data in memory before running the benchmarks, this takes out IO from the results. + */ + @Setup + public void init() throws IOException { + InputStream in = this.getClass().getClassLoader().getResourceAsStream( + "org/apache/commons/csv/perf/worldcitiespop.txt.gz"); + try (final InputStream gzin = new GZIPInputStream(in, 8192)) { + this.data = IOUtils.toString(gzin, StandardCharsets.ISO_8859_1); + } + } + + private Reader getReader() { + return new StringReader(data); + } + + @Benchmark + public int read(final Blackhole bh) throws Exception { + int count = 0; + + try (BufferedReader reader = new BufferedReader(getReader())) { + while (reader.readLine() != null) { + count++; + } + } + + bh.consume(count); + return count; + } + + @Benchmark + public int scan(final Blackhole bh) throws Exception { + int count = 0; + + try (Scanner scanner = new Scanner(getReader())) { + while (scanner.hasNextLine()) { + scanner.nextLine(); + count++; + } + } + + bh.consume(count); + return count; + } + + @Benchmark + public int split(final Blackhole bh) throws Exception { + int count = 0; + + try (BufferedReader reader = new BufferedReader(getReader())) { + String line; + while ((line = reader.readLine()) != null) { + final String[] values = StringUtils.split(line, ','); + count += values.length; + } + } + + bh.consume(count); + return count; + } + + @Benchmark + public int parseCommonsCSV(final Blackhole bh) throws Exception { + int count = 0; + + try (final Reader in = getReader()) { + final CSVFormat format = CSVFormat.Builder.create().setSkipHeaderRecord(true).build(); + Iterator iter = format.parse(in).iterator(); + while (iter.hasNext()) { + count++; + iter.next(); + } + } + + bh.consume(count); + return count; + } + + @Benchmark + public int parseGenJavaCSV(final Blackhole bh) throws Exception { + int count = 0; + + try (final Reader in = getReader()) { + final CsvReader reader = new CsvReader(in); + reader.setFieldDelimiter(','); + while (reader.readLine() != null) { + count++; + } + } + + bh.consume(count); + return count; + } + + @Benchmark + public int parseJavaCSV(final Blackhole bh) throws Exception { + int count = 0; + + try (final Reader in = getReader()) { + final com.csvreader.CsvReader reader = new com.csvreader.CsvReader(in, ','); + reader.setRecordDelimiter('\n'); + while (reader.readRecord()) { + count++; + } + } + + bh.consume(count); + return count; + } + + @Benchmark + public int parseOpenCSV(final Blackhole bh) throws Exception { + int count = 0; + + final com.opencsv.CSVParser parser = new CSVParserBuilder() + .withSeparator(',').withIgnoreQuotations(true).build(); + + try (final Reader in = getReader()) { + final com.opencsv.CSVReader reader = new CSVReaderBuilder(in).withSkipLines(1).withCSVParser(parser).build(); + while (reader.readNext() != null) { + count++; + } + } + + bh.consume(count); + return count; + } + + @Benchmark + public int parseSkifeCSV(final Blackhole bh) throws Exception { + final org.skife.csv.CSVReader reader = new org.skife.csv.SimpleReader(); + reader.setSeperator(','); + final CountingReaderCallback callback = new CountingReaderCallback(); + + try (final Reader in = getReader()) { + reader.parse(in, callback); + } + + bh.consume(callback); + return callback.count; + } + + private static class CountingReaderCallback implements org.skife.csv.ReaderCallback { + public int count; + + @Override + public void onRow(final String[] fields) { + count++; + } + } + + @Benchmark + public int parseSuperCSV(final Blackhole bh) throws Exception { + int count = 0; + + try (final CsvListReader reader = new CsvListReader(getReader(), CsvPreference.STANDARD_PREFERENCE)) { + while (reader.read() != null) { + count++; + } + } + + bh.consume(count); + return count; + } +} diff --git a/src/test/java/org/apache/commons/csv/CSVTest.java b/src/test/java/org/apache/commons/csv/CSVTest.java deleted file mode 100644 index 5e01c3c17c..0000000000 --- a/src/test/java/org/apache/commons/csv/CSVTest.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package org.apache.commons.csv; - -import org.junit.jupiter.api.Test; - -import java.io.FileReader; -import java.io.Reader; -import java.io.StringReader; - -public class CSVTest { - - // @Test - public void readCSVFileSimpleOne() { - // happy path one - try { - // change your csv file path properly - Reader in = new FileReader("D:\\code\\apache\\csv\\samples\\basicCsvSample-1\\src\\main\\resources\\longCsvFile.csv"); - - CSVFormat csvFormat = CSVFormat.DEFAULT.builder() - .setHeader() - .setSkipHeaderRecord(true) - .build(); - - Iterable records = csvFormat.parse(in); // return a CSVParser - which is an Iterable - - for (CSVRecord record : records) { - String firstName = record.get("firstname"); - String lastName = record.get("lastname"); - String age = record.get("age"); - String email = record.get("email"); - System.out.println("FirstName: " + firstName + ", LastName: "+ lastName +", Age: " + age + ", Email: " + email); - } - } catch (Exception e) { - System.out.println("An error occurred"); - e.printStackTrace(); - } - } - - @Test - public void testFaultyCSVshouldThrowErrorWithDetailedMessage(){ - - String csvContent = "col1,col2,col3,col4,col5,col6,col7,col8,col9,col10\n" + - "rec1,rec2,rec3,rec4,rec5,rec6,rec7,rec8,\"\"rec9\"\",rec10"; - - try { - StringReader stringReader = new StringReader(csvContent); - CSVFormat csvFormat = CSVFormat.DEFAULT.builder() - .setHeader() - .setSkipHeaderRecord(true) - .build(); - - Iterable records = csvFormat.parse(stringReader); - - for (CSVRecord record : records) { - System.out.println(record.get(0) + " " + record.get(1) + " " + record.get(2) + " " + record.get(3) + " " + record.get(4) + " " + record.get(5) + " " + record.get(6) + " " + record.get(7) + " " + record.get(8) + " " + record.get(9)); - } - } catch (Exception e) { - System.out.println("An error occurred"); - e.printStackTrace(); - } - } -} From dbc418b9e79369d4a4b440340322b45192d10aed Mon Sep 17 00:00:00 2001 From: Gary Gregory Date: Wed, 30 Aug 2023 08:36:48 -0400 Subject: [PATCH 3/3] Update CSVParser.java --- src/main/java/org/apache/commons/csv/CSVParser.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java index 7d292c6a50..8679367c27 100644 --- a/src/main/java/org/apache/commons/csv/CSVParser.java +++ b/src/main/java/org/apache/commons/csv/CSVParser.java @@ -147,7 +147,7 @@ private CSVRecord getNextRecord() { try { return CSVParser.this.nextRecord(); } catch (final IOException e) { - throw new UncheckedIOException("Error in reading next record: " + e.toString(), e); + throw new UncheckedIOException("Exception reading next record: " + e.toString(), e); } }