Skip to content

Commit

Permalink
feat(csv): support fieldsPerRecord in CsvParseStream (#5600)
Browse files Browse the repository at this point in the history
Although the constructor of `CsvParseStream` accepts `fieldsPerRecord` option
(see https://jsr.io/@std/[email protected]/doc/~/CsvParseStreamOptions) that
ensures that every record has the specified (or inferred from the first row)
number of fields, this option has no effect at all in the current
implementation. To fix this issue, this patch implements the `fieldsPerRecord`
logic in `CsvParseStream` together with sufficient amount of test cases.
  • Loading branch information
magurotuna authored Aug 1, 2024
1 parent 577fd9a commit b85d219
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 16 deletions.
38 changes: 38 additions & 0 deletions csv/parse_stream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,17 @@ export class CsvParseStream<
#lineIndex = 0;
#isFirstRow = true;

// The number of fields per record that is either inferred from the first row
// (when options.fieldsPerRecord = 0), or set by the caller (when
// options.fieldsPerRecord > 0).
//
// Each possible variant means the following:
// "ANY": Variable number of fields is allowed.
// "UNINITIALIZED": The first row has not been read yet. Once it's read, the
// number of fields will be set.
// <number>: The number of fields per record that every record must follow.
#fieldsPerRecord: "ANY" | "UNINITIALIZED" | number;

#headers: readonly string[] = [];

/** Construct a new instance.
Expand All @@ -147,6 +158,18 @@ export class CsvParseStream<
...options,
};

if (
this.#options.fieldsPerRecord === undefined ||
this.#options.fieldsPerRecord < 0
) {
this.#fieldsPerRecord = "ANY";
} else if (this.#options.fieldsPerRecord === 0) {
this.#fieldsPerRecord = "UNINITIALIZED";
} else {
// TODO: Should we check if it's a valid integer?
this.#fieldsPerRecord = this.#options.fieldsPerRecord;
}

this.#lines = new TextDelimiterStream("\n");
this.#lineReader = new StreamLineReader(this.#lines.readable.getReader());
this.#readable = new ReadableStream({
Expand Down Expand Up @@ -198,6 +221,21 @@ export class CsvParseStream<
if (this.#options.skipFirstRow) {
return this.#pull(controller);
}

if (this.#fieldsPerRecord === "UNINITIALIZED") {
this.#fieldsPerRecord = record.length;
}
}

if (
typeof this.#fieldsPerRecord === "number" &&
record.length !== this.#fieldsPerRecord
) {
throw new SyntaxError(
`record on line ${
this.#lineIndex + 1
}: expected ${this.#fieldsPerRecord} fields but got ${record.length}`,
);
}

this.#lineIndex++;
Expand Down
93 changes: 77 additions & 16 deletions csv/parse_stream_test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license.
import { CsvParseStream } from "./parse_stream.ts";
import type { CsvParseStreamOptions } from "./parse_stream.ts";
import { assertEquals, assertRejects } from "@std/assert";
import { assert, assertEquals, assertRejects } from "@std/assert";
import type { AssertTrue, IsExact } from "@std/testing/types";
import { fromFileUrl, join } from "@std/path";
import { delay } from "@std/async/delay";
Expand Down Expand Up @@ -83,8 +83,11 @@ Deno.test({
{
name: "Separator is undefined",
input: "a;b;c\n",
errorMessage: "Separator is required",
separator: undefined,
error: {
klass: TypeError,
msg: "Separator is required",
},
},
{
name: "MultiLine",
Expand Down Expand Up @@ -128,13 +131,52 @@ field"`,
],
},
{
name: "FieldCount",
name: "fieldsPerRecord - variable number of fields is allowed",
input: "a,b,c\nd,e",
output: [
["a", "b", "c"],
["d", "e"],
],
},
{
name: "fieldsPerRecord = -42 - variable number of fields is allowed",
input: "a,b,c\nd,e",
output: [
["a", "b", "c"],
["d", "e"],
],
fieldsPerRecord: -42,
},
{
name:
"fieldsPerRecord = 0 - the number of fields is inferred from the first row",
input: "a,b,c\nd,e,f",
output: [
["a", "b", "c"],
["d", "e", "f"],
],
fieldsPerRecord: 0,
},
{
name:
"fieldsPerRecord = 0 - inferred number of fields does not match subsequent rows",
input: "a,b,c\nd,e",
fieldsPerRecord: 0,
error: {
klass: SyntaxError,
msg: "record on line 2: expected 3 fields but got 2",
},
},
{
name:
"fieldsPerRecord = 3 - SyntaxError is thrown when the number of fields is not 2",
input: "a,b,c\nd,e",
fieldsPerRecord: 3,
error: {
klass: SyntaxError,
msg: "record on line 2: expected 3 fields but got 2",
},
},
{
name: "TrailingCommaEOF",
input: "a,b,c,",
Expand Down Expand Up @@ -312,18 +354,29 @@ x,,,
input: "a,b,c\nd,e",
skipFirstRow: true,
columns: ["foo", "bar", "baz"],
errorMessage:
"Error number of fields line: 1\nNumber of fields found: 3\nExpected number of fields: 2",
error: {
klass: Error,
msg:
"Error number of fields line: 1\nNumber of fields found: 3\nExpected number of fields: 2",
},
},
{
name: "bad quote in bare field",
input: `a "word",1,2,3`,
errorMessage: "Error line: 1\nBad quoting",
error: {
klass: SyntaxError,
msg:
'record on line 1; parse error on line 0, column 2: bare " in non-quoted-field',
},
},
{
name: "bad quote in quoted field",
input: `"wo"rd",1,2,3`,
errorMessage: "Error line: 1\nBad quoting",
error: {
klass: SyntaxError,
msg:
'record on line 1; parse error on line 0, column 3: extraneous or missing " in quoted-field',
},
},
{
name: "lazy quote",
Expand All @@ -341,18 +394,21 @@ x,,,
if ("comment" in testCase) {
options.comment = testCase.comment;
}
if ("skipFirstRow" in testCase) {
options.skipFirstRow = testCase.skipFirstRow;
}
if ("columns" in testCase) {
options.columns = testCase.columns;
}
if ("trimLeadingSpace" in testCase) {
options.trimLeadingSpace = testCase.trimLeadingSpace;
}
if ("lazyQuotes" in testCase) {
options.lazyQuotes = testCase.lazyQuotes;
}
if ("fieldsPerRecord" in testCase) {
options.fieldsPerRecord = testCase.fieldsPerRecord;
}
if ("skipFirstRow" in testCase) {
options.skipFirstRow = testCase.skipFirstRow;
}
if ("columns" in testCase) {
options.columns = testCase.columns;
}

const readable = ReadableStream.from(testCase.input)
.pipeThrough(new CsvParseStream(options));
Expand All @@ -361,9 +417,14 @@ x,,,
const actual = await Array.fromAsync(readable);
assertEquals(actual, testCase.output);
} else {
await assertRejects(async () => {
for await (const _ of readable);
}, testCase.errorMessage);
assert(testCase.error);
await assertRejects(
async () => {
for await (const _ of readable);
},
testCase.error.klass,
testCase.error.msg,
);
}
});
}
Expand Down

0 comments on commit b85d219

Please sign in to comment.