-
Notifications
You must be signed in to change notification settings - Fork 3
/
index.js
140 lines (113 loc) · 3.25 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
const C_COMMA = 44;
const C_NEWLINE = 10;
const C_QUOTE = 34;
/**
* Iterate through a CSV. Returns all fields as string. Each row can be of varied length.
*
* @param {string} source
* @return {Generator<string[], void, void>}
*/
export function *iter(source) {
let i = 0;
let newline = -1;
let length = source.length;
/** @type {string[]} */
let row = [];
/** @type {string} */
let s;
/** @type {number} */
let temp;
let sourceCharCodeAt = _ => source.charCodeAt(i);
// appends source.slice(i, new_i) to s and sets i = new_i
let appendSliceAndSetI = (/** @type {number} */ new_i) => s += source.slice(i, i = new_i); // slice is smaller than substring
/** @type {number} */
let nextIndexTemp;
let nextIndex = (/** @type {string} */ c) =>
(nextIndexTemp = source.indexOf(c, i)) < 0 ? length : nextIndexTemp;
for (;;) {
// we consume at most one col per outer loop
if (sourceCharCodeAt() == C_NEWLINE) {
// yielding row and resetting is smaller but about 10% slower
yield row.splice(0);
++i;
}
if (!(i < length)) {
break;
}
s = '';
if (sourceCharCodeAt() == C_QUOTE) {
// consume many parts of quoted string
for (; ;) {
++i;
appendSliceAndSetI(nextIndex('"'));
if (!(++i < length && (temp = sourceCharCodeAt()) != C_COMMA && temp != C_NEWLINE)) {
break; // end of string or end of input
}
// @ts-ignore you *can* subtract booleans from numbers
i -= temp != C_QUOTE;
// the above line is this, which saves two bytes:
// if (temp != C_QUOTE) {
// --i; // allow missing double quote _anyway_
// }
s += '"';
}
} else {
// this is a "normal" value, ends with a comma or newline
// look for comma first (educated guess)
appendSliceAndSetI((temp = nextIndex(',')) > (newline = i > newline ? nextIndex('\n') : newline) ? newline : temp);
// the above line is this, which saves some bytes:
/*
if (i > newline) {
newline = nextIndex('\n');
}
temp = nextIndex(',');
if (newline < temp) {
temp = newline;
}
s = source.slice(i, temp);
i = temp;
*/
}
row.push(s);
// look for ,
// @ts-ignore you *can* add booleans to numbers
i += sourceCharCodeAt() == C_COMMA;
// the above line is this, which saves 2 bytes:
// if (sourceCharCodeAt() == C_COMMA) {
// ++i;
// }
}
yield row;
}
/**
* Parse a whole CSV.
*
* @param {string} source
* @return {string[][]}
*/
const parse = (source) => {
return [...iter(source)];
}
let needsQuoteRegexp = /["\n,]/;
let globalQuote = /"/g;
/**
* @param {any} raw
*/
let r = (raw) => {
// we hide string conversion inside this arg: on the `return`, raw is already stringified
if (!needsQuoteRegexp.test(raw += '')) {
return raw;
}
return `"${raw.replace(globalQuote, '""')}"`;
};
/**
* Builds a CSV from raw data. Every value is stringified before render.
*
* @param {any[][]} raw
*/
const build = (raw) => {
// we could stringify array with ''+arr, but it's 50% slower than .join()
// .join() without args is always with ','
return raw.map((row) => row.map(r).join()).join('\n');
}
export { build, parse };