Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions gradle/verification-metadata.xml
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,16 @@
<sha256 value="86a53da582f1ed7874a13f20a6aceb2a578d38e2e50ed22c967e7b28b8126318" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="com.fasterxml.jackson.dataformat" name="jackson-dataformat-csv" version="2.17.2">
<artifact name="jackson-dataformat-csv-2.17.2.jar">
<sha256 value="9e6cfdb34463abdd005314db819aedeba58d4aa45ff88d74396bddf467440cf1" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="com.fasterxml.jackson.dataformat" name="jackson-dataformat-csv" version="2.19.2">
<artifact name="jackson-dataformat-csv-2.19.2.jar">
<sha256 value="00a733ae8d1eadefefbef22dd2a2565b665e687864a11db3fad3487bb9448905" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="com.fasterxml.jackson.dataformat" name="jackson-dataformat-smile" version="2.14.2">
<artifact name="jackson-dataformat-smile-2.14.2.jar">
<sha256 value="9c279bb29770de09289c14cf8862dd195112687cd7fde815919f54a9157ce213" origin="Generated by Gradle"/>
Expand Down
1 change: 1 addition & 0 deletions x-pack/plugin/esql-datasource-csv/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ dependencies {

testImplementation project(':test:framework')
testImplementation(testArtifact(project(xpackModule('core'))))
testImplementation project(xpackModule('esql:qa:testFixtures'))
}

tasks.named("dependencyLicenses").configure {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,16 @@
* <td>—</td><td>{@code Array()} type notation</td></tr>
* </table>
*
* <h2>Bracket multi-value syntax</h2>
* When {@code multi_value_syntax} is {@code brackets}, array-like values support:
* <ul>
* <li>{@code [a,b,c]} — unquoted elements</li>
* <li>{@code ["a","b","c"]} — quoted elements (quotes stripped)</li>
* <li>{@code [a,"b,c"]} — mixed; commas inside quotes are literal</li>
* </ul>
* <p>With comma delimiter, a cell like {@code [hello,world]} is treated as one column:
* commas inside {@code [...]} are not column delimiters.
*
* <h2>Error handling</h2>
* Controlled by {@link ErrorPolicy} and its {@link ErrorPolicy.Mode}:
* <table>
Expand Down Expand Up @@ -444,10 +454,17 @@ private List<Attribute> parseSchema(String schemaLine) {
return attributes;
}

/**
* Parse CSV type names to ESQL DataType. Small numeric types (SHORT, BYTE, FLOAT, etc.)
* are widened to INTEGER/DOUBLE since the planner expects widened types.
*/
private DataType parseDataType(String typeName) {
return switch (typeName) {
String upper = typeName.toUpperCase(Locale.ROOT);
return switch (upper) {
case "SHORT", "BYTE" -> DataType.INTEGER;
case "INTEGER", "INT", "I" -> DataType.INTEGER;
case "LONG", "L" -> DataType.LONG;
case "FLOAT", "HALF_FLOAT", "SCALED_FLOAT" -> DataType.DOUBLE;
case "DOUBLE", "D" -> DataType.DOUBLE;
case "KEYWORD", "K", "STRING", "S" -> DataType.KEYWORD;
case "TEXT", "TXT" -> DataType.TEXT;
Expand Down Expand Up @@ -583,29 +600,36 @@ private Page readNextBatch() throws IOException {
}
initProjection();

CsvSchema csvSchema = CsvSchema.emptySchema()
.withColumnSeparator(options.delimiter())
.withQuoteChar(options.quoteChar())
.withEscapeChar(options.escapeChar())
.withNullValue(options.nullValue());
csvIterator = sharedCsvMapper.readerFor(List.class).with(csvSchema).readValues(reader);
boolean useBracketAwareParsing = bracketMultiValues && options.delimiter() == ',';
if (useBracketAwareParsing == false) {
CsvSchema csvSchema = CsvSchema.emptySchema()
.withColumnSeparator(options.delimiter())
.withQuoteChar(options.quoteChar())
.withEscapeChar(options.escapeChar())
.withNullValue(options.nullValue());
csvIterator = sharedCsvMapper.readerFor(List.class).with(csvSchema).readValues(reader);
}
}
while (true) {
List<String[]> rows = new ArrayList<>();
while (rows.size() < batchSize && csvIterator.hasNext()) {
List<?> rowList = csvIterator.next();
String[] row = new String[rowList.size()];
for (int i = 0; i < rowList.size(); i++) {
Object val = rowList.get(i);
row[i] = val != null ? val.toString() : null;
}
if (hasCommentFilter && row.length > 0 && row[0] != null) {
String trimmedFirstCell = row[0].trim();
if (trimmedFirstCell.startsWith(options.commentPrefix())) {
continue;
if (bracketMultiValues && options.delimiter() == ',') {
rows = readRowsBracketAware(batchSize);
} else {
while (rows.size() < batchSize && csvIterator.hasNext()) {
List<?> rowList = csvIterator.next();
String[] row = new String[rowList.size()];
for (int i = 0; i < rowList.size(); i++) {
Object val = rowList.get(i);
row[i] = val != null ? val.toString() : null;
}
if (hasCommentFilter && row.length > 0 && row[0] != null) {
String trimmedFirstCell = row[0].trim();
if (trimmedFirstCell.startsWith(options.commentPrefix())) {
continue;
}
}
rows.add(row);
}
rows.add(row);
}

if (rows.isEmpty()) {
Expand All @@ -619,6 +643,129 @@ private Page readNextBatch() throws IOException {
}
}

/**
* Reads CSV rows using bracket-aware parsing. When a cell starts with {@code [} after a comma
* and ends with {@code ]} before a comma, commas inside are not column delimiters.
* The cell value is kept as {@code [a,b,c]} so multi-value conversion can parse it.
* Supports multi-line quoted fields.
*/
private List<String[]> readRowsBracketAware(int batchSize) throws IOException {
List<String[]> rows = new ArrayList<>();
String line;
while (rows.size() < batchSize && (line = reader.readLine()) != null) {
line = line.trim();
if (line.isEmpty() || (hasCommentFilter && line.startsWith(options.commentPrefix()))) {
continue;
}
StringBuilder logicalLine = new StringBuilder(line);
while (hasUnclosedQuote(logicalLine.toString(), options.quoteChar())) {
String next = reader.readLine();
if (next == null) {
break;
}
logicalLine.append('\n').append(next);
}
String[] row = splitLineBracketAware(logicalLine.toString());
rows.add(row);
}
return rows;
}

private static boolean hasUnclosedQuote(String s, char quote) {
boolean inQuotes = false;
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (c == quote) {
if (i + 1 < s.length() && s.charAt(i + 1) == quote) {
i++;
continue;
}
inQuotes = !inQuotes;
}
}
return inQuotes;
}

/**
* Splits a CSV line by delimiter, treating quoted fields and {@code [..,..,..]} as single cells.
* Commas inside quotes or brackets are not delimiters. Escaped commas ({@code \,}) are skipped.
*/
private String[] splitLineBracketAware(String line) {
List<String> entries = new ArrayList<>();
char delim = options.delimiter();
char quote = options.quoteChar();
char esc = options.escapeChar();
StringBuilder current = new StringBuilder();
boolean inQuotes = false;
boolean inBrackets = false;
int i = 0;
while (i < line.length()) {
char c = line.charAt(i);
if (inQuotes) {
if (c == quote) {
if (i + 1 < line.length() && line.charAt(i + 1) == quote) {
current.append(quote);
i += 2;
continue;
}
inQuotes = false;
} else if (c == esc && i + 1 < line.length() && line.charAt(i + 1) == delim) {
current.append(delim);
i += 2;
continue;
} else {
current.append(c);
}
i++;
} else if (inBrackets) {
current.append(c);
if (c == ']') {
inBrackets = false;
entries.add(current.toString());
current = new StringBuilder();
i++;
while (i < line.length() && line.charAt(i) == ' ') {
i++;
}
if (i < line.length() && line.charAt(i) == delim) {
i++;
continue;
}
continue;
}
i++;
} else if (c == quote) {
inQuotes = true;
i++;
} else if (c == '[' && current.length() == 0) {
inBrackets = true;
current.append(c);
i++;
} else if (c == delim) {
if (i > 0 && line.charAt(i - 1) == esc) {
current.append(c);
} else {
entries.add(current.toString().trim());
current = new StringBuilder();
}
i++;
} else {
current.append(c);
i++;
}
}
if (inQuotes) {
throw new EsqlIllegalArgumentException("Unclosed quoted field in line [{}]", line);
}
if (inBrackets) {
throw new EsqlIllegalArgumentException("Unclosed bracket cell in line [{}]", line);
}
if (current.length() > 0) {
entries.add(current.toString().trim());
}
return entries.toArray(String[]::new);
}

private void initProjection() {
int schemaSize = schema.size();
if (projectedColumns == null || projectedColumns.isEmpty()) {
Expand Down Expand Up @@ -768,16 +915,30 @@ private List<String> splitBracketContent(String content) {
List<String> result = new ArrayList<>();
StringBuilder current = new StringBuilder();
char esc = options.escapeChar();
char quote = options.quoteChar();
boolean inQuotes = false;
int i = 0;
while (i < content.length()) {
char c = content.charAt(i);
if (c == esc && i + 1 < content.length() && content.charAt(i + 1) == ',') {
current.append(',');
i += 2;
} else if (c == ',') {
if (c == quote) {
if (inQuotes) {
if (i + 1 < content.length() && content.charAt(i + 1) == quote) {
current.append(quote);
i += 2;
continue;
}
inQuotes = false;
} else {
inQuotes = true;
}
i++;
} else if (c == ',' && inQuotes == false) {
result.add(current.toString().trim());
current = new StringBuilder();
i++;
} else if (c == esc && inQuotes == false && i + 1 < content.length() && content.charAt(i + 1) == ',') {
current.append(',');
i += 2;
} else {
current.append(c);
i++;
Expand All @@ -794,6 +955,10 @@ private Object parseElement(String value, DataType dataType) {
if (hasCustomNullValue && value.equals(nullValueStr)) {
return null;
}
value = unquoteElement(value);
if (value.isEmpty()) {
return null;
}
return switch (dataType) {
case INTEGER -> tryParseInt(value);
case LONG -> tryParseLong(value);
Expand All @@ -809,6 +974,19 @@ private Object parseElement(String value, DataType dataType) {
};
}

/**
* Unquotes an element that is wrapped in the configured quote character.
* Removes leading/trailing quotes and replaces {@code ""} with {@code "} in the inner content.
*/
private String unquoteElement(String value) {
char quote = options.quoteChar();
if (value.length() >= 2 && value.charAt(0) == quote && value.charAt(value.length() - 1) == quote) {
String inner = value.substring(1, value.length() - 1);
return inner.replace(String.valueOf(quote) + quote, String.valueOf(quote));
}
return value;
}

private Object tryParseInt(String value) {
try {
return Integer.parseInt(value);
Expand Down Expand Up @@ -936,6 +1114,9 @@ private Class<?> javaClassForDataType(DataType dataType) {
}

private static boolean looksNumeric(String value) {
if (value == null || value.isEmpty()) {
return false;
}
int start = (value.charAt(0) == '-') ? 1 : 0;
if (start >= value.length()) {
return false;
Expand Down
Loading
Loading