Skip to content

Commit 0abcfad

Browse files
committed
Handle any arbitrary line endings
Handle any arbitrary line endings, as long as they are some combination of carriage return and newline. Fixes #223.
1 parent 25a4f7a commit 0abcfad

File tree

2 files changed

+57
-3
lines changed

2 files changed

+57
-3
lines changed

include/internal/basic_csv_parser.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -139,8 +139,8 @@ namespace csv {
139139
case ParseFlags::NEWLINE:
140140
this->data_pos++;
141141

142-
// Catches CRLF (or LFLF)
143-
if (this->data_pos < in.size() && parse_flag(in[this->data_pos]) == ParseFlags::NEWLINE)
142+
// Catches CRLF (or LFLF, CRCRLF, or any other non-sensical combination of newlines)
143+
while (this->data_pos < in.size() && parse_flag(in[this->data_pos]) == ParseFlags::NEWLINE)
144144
this->data_pos++;
145145

146146
// End of record -> Write record

tests/test_read_csv.cpp

+55-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ TEST_CASE( "Test Parse Flags", "[test_parse_flags]" ) {
1616
}
1717

1818
// Test Main Functions
19-
TEST_CASE( "Test Reading CSV From Direct Input", "[read_csv_direct]" ) {
19+
TEST_CASE("Test Reading CSV From Direct Input", "[read_csv_direct]" ) {
2020
SECTION("Expected Results") {
2121
auto rows = "A,B,C\r\n" // Header row
2222
"123,234,345\r\n"
@@ -181,6 +181,60 @@ TEST_CASE( "Test leading and trailing escaped quote", "[read_csv_quote]" ) {
181181
}
182182
//! [Parse Example]
183183

184+
// Verify the CSV parser can handle any arbitrary line endings composed of carriage return & newline
185+
TEST_CASE("Cursed Newlines", "[read_csv_cursed_newline]") {
186+
auto row_str = GENERATE(as<std::string> {},
187+
(
188+
// Windows style
189+
"A,B,C\r\n" // Header row
190+
"123,234,345\r\n"
191+
"1,2,3\r\n"
192+
"4,5,6",
193+
194+
// Unix style
195+
"A,B,C\n" // Header row
196+
"123,234,345\n"
197+
"1,2,3\n"
198+
"4,5,6",
199+
200+
// Eww brother what is that...
201+
"A,B,C\r\r\n" // Header row
202+
"123,234,345\r\r\n"
203+
"1,2,3\r\r\n"
204+
"4,5,6",
205+
206+
// Doubled-up Windows style (ridiculous: but I'm sure it exists somewhere)
207+
"A,B,C\r\n\r\n" // Header row
208+
"123,234,345\r\n\r\n"
209+
"1,2,3\r\n\r\n"
210+
"4,5,6"
211+
)
212+
);
213+
214+
// Set CSVFormat to KEEP all rows, even empty ones (because there shouldn't be any)
215+
CSVFormat format;
216+
format.header_row(0).variable_columns(VariableColumnPolicy::KEEP);
217+
auto rows = parse(row_str, format);
218+
219+
CSVRow row;
220+
rows.read_row(row);
221+
vector<string> first_row = { "123", "234", "345" };
222+
REQUIRE(vector<string>(row) == first_row);
223+
REQUIRE(row["A"] == "123");
224+
REQUIRE(row["B"] == "234");
225+
REQUIRE(row["C"] == "345");
226+
227+
rows.read_row(row);
228+
vector<string> second_row = { "1", "2", "3" };
229+
REQUIRE(vector<string>(row) == second_row);
230+
231+
rows.read_row(row);
232+
vector<string> third_row = { "4", "5", "6" };
233+
REQUIRE(vector<string>(row) == third_row);
234+
235+
REQUIRE(rows.n_rows() == 3);
236+
}
237+
184238
TEST_CASE("Test Whitespace Trimming", "[read_csv_trim]") {
185239
auto row_str = GENERATE(as<std::string> {},
186240
"A,B,C\r\n" // Header row

0 commit comments

Comments
 (0)