diff --git a/x-pack/plugin/esql-datasource-ndjson/README.md b/x-pack/plugin/esql-datasource-ndjson/README.md new file mode 100644 index 0000000000000..18ea6228bfbe1 --- /dev/null +++ b/x-pack/plugin/esql-datasource-ndjson/README.md @@ -0,0 +1,36 @@ +# ESQL NDJSON Data Source Plugin + +Provides NDJSON (newline-delimited JSON) format support for ESQL external data sources. + +## Features + +- Schema inference from the first 100 non-empty lines +- Type conflict resolution to KEYWORD +- Ignores blank lines; logs warnings for malformed lines (does not fail the file) +- Supports `.ndjson` and `.jsonl` extensions + +## Usage + +Once installed, ESQL will use this plugin for files ending in `.ndjson` or `.jsonl`: + +```sql +FROM "https://example.com/data/events.ndjson" +| WHERE status = "ok" +| LIMIT 100 +``` + +## Limitations / TODO + +Pages only contain blocks, not attributes. There's no way to know what +these blocks represent (name, type, nullability, etc.) +`SourceMetadata` should be provided to `FormatReader.read()` + +1. Avoid rediscovering the schema +2. Ensure data is interpreted as expected by the schema +3. Blocks are laid out in the page in the order they appear in the metadata + +## Misc + +- `employees.ndjson` created by running `CsvTestsDataLoader` and extracted using + `curl 'http://localhost:9200/employees/_search?size=1000' | jq -c '.hits.hits[] | ._source'` + and manually convert "string booleans" to actual booleans. diff --git a/x-pack/plugin/esql-datasource-ndjson/build.gradle b/x-pack/plugin/esql-datasource-ndjson/build.gradle new file mode 100644 index 0000000000000..551edea124c81 --- /dev/null +++ b/x-pack/plugin/esql-datasource-ndjson/build.gradle @@ -0,0 +1,35 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +apply plugin: 'elasticsearch.internal-es-plugin' +apply plugin: 'elasticsearch.publish' + +esplugin { + name = 'esql-datasource-ndjson' + description = 'NDJSON (newline-delimited JSON) format support for ESQL external data sources' + classname = 'org.elasticsearch.xpack.esql.datasource.ndjson.NdJsonDataSourcePlugin' + extendedPlugins = ['x-pack-esql'] +} + +base { + archivesName = 'esql-datasource-ndjson' +} + +dependencies { + // SPI interfaces from ESQL core + compileOnly project(path: xpackModule('esql')) + compileOnly project(path: xpackModule('esql-core')) + compileOnly project(path: xpackModule('core')) + compileOnly project(':server') + compileOnly project(xpackModule('esql:compute')) + + implementation "com.fasterxml.jackson.core:jackson-core:${versions.jackson}" + + testImplementation project(':test:framework') + testImplementation('commons-io:commons-io:2.15.1') + testImplementation(testArtifact(project(xpackModule('core')))) +} diff --git a/x-pack/plugin/esql-datasource-ndjson/qa/build.gradle b/x-pack/plugin/esql-datasource-ndjson/qa/build.gradle new file mode 100644 index 0000000000000..8a89007498215 --- /dev/null +++ b/x-pack/plugin/esql-datasource-ndjson/qa/build.gradle @@ -0,0 +1,62 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +apply plugin: 'elasticsearch.internal-java-rest-test' +apply plugin: org.elasticsearch.gradle.internal.precommit.CheckstylePrecommitPlugin +apply plugin: org.elasticsearch.gradle.internal.precommit.ForbiddenApisPrecommitPlugin +apply plugin: org.elasticsearch.gradle.internal.precommit.ForbiddenPatternsPrecommitPlugin +apply plugin: org.elasticsearch.gradle.internal.precommit.FilePermissionsPrecommitPlugin +apply plugin: org.elasticsearch.gradle.internal.precommit.LoggerUsagePrecommitPlugin +apply plugin: org.elasticsearch.gradle.internal.precommit.TestingConventionsPrecommitPlugin + +dependencies { + // Test fixtures and spec reader infrastructure + javaRestTestImplementation project(xpackModule('esql:qa:testFixtures')) + javaRestTestImplementation project(xpackModule('esql:qa:server')) + javaRestTestImplementation project(xpackModule('esql')) + javaRestTestImplementation(project(path: xpackModule('esql'), configuration: 'testRuntimeElements')) + + // S3 fixture infrastructure for mocking S3 operations + javaRestTestImplementation project(':test:fixtures:s3-fixture') + javaRestTestImplementation project(':test:fixtures:aws-fixture-utils') + + // Repository S3 module for cluster + clusterModules project(':modules:repository-s3') + + // The datasource plugin under test + clusterPlugins project(xpackModule('esql-datasource-ndjson')) + clusterPlugins project(xpackModule('esql-datasource-http')) + clusterPlugins project(xpackModule('esql-datasource-s3')) +} + +// The test fixtures are included directly in this module's javaRestTest/resources directory + +tasks.named('javaRestTest') { + usesDefaultDistribution("to be triaged") + maxParallelForks = 1 + + // Increase timeouts for S3 operations which may take longer than standard queries + systemProperty 'tests.rest.client_timeout', '60' + systemProperty 'tests.rest.socket_timeout', '60' + + // Enable more verbose logging for debugging + testLogging { + events = ["passed", "skipped", "failed"] + exceptionFormat = "full" + showStandardStreams = false + } +} + +restResources { + restApi { + include '_common', 'bulk', 'get', 'indices', 'esql', 'xpack', 'cluster', 'capabilities', 'index' + } + restTests { + includeXpack 'esql' + } +} + diff --git a/x-pack/plugin/esql-datasource-ndjson/qa/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/ndjson/Clusters.java b/x-pack/plugin/esql-datasource-ndjson/qa/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/ndjson/Clusters.java new file mode 100644 index 0000000000000..2d6ebbce17192 --- /dev/null +++ b/x-pack/plugin/esql-datasource-ndjson/qa/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/ndjson/Clusters.java @@ -0,0 +1,74 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.qa.ndjson; + +import org.elasticsearch.core.PathUtils; +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.test.cluster.local.LocalClusterConfigProvider; +import org.elasticsearch.test.cluster.local.distribution.DistributionType; + +import java.net.URISyntaxException; +import java.net.URL; +import java.util.function.Supplier; + +import static org.elasticsearch.xpack.esql.datasources.S3FixtureUtils.ACCESS_KEY; +import static org.elasticsearch.xpack.esql.datasources.S3FixtureUtils.SECRET_KEY; + +/** + * Cluster configuration for NDJSON integration tests. + */ +public class Clusters { + + public static ElasticsearchCluster testCluster(Supplier s3EndpointSupplier, LocalClusterConfigProvider configProvider) { + return ElasticsearchCluster.local() + .distribution(DistributionType.DEFAULT) + .shared(true) + // Enable S3 repository plugin for S3 access + .module("repository-s3") + // Basic cluster settings + .setting("xpack.security.enabled", "false") + .setting("xpack.license.self_generated.type", "trial") + // Disable ML to avoid native code loading issues in some environments + .setting("xpack.ml.enabled", "false") + // Allow the LOCAL storage backend to read fixture files from the test resources directory. + // The esql-datasource-http plugin's entitlement policy uses shared_repo for file read access. + .setting("path.repo", fixturesPath()) + // S3 client configuration for accessing the S3HttpFixture + .setting("s3.client.default.endpoint", s3EndpointSupplier) + // S3 credentials must be stored in keystore, not as regular settings + .keystore("s3.client.default.access_key", ACCESS_KEY) + .keystore("s3.client.default.secret_key", SECRET_KEY) + // Disable SSL for HTTP fixture + .setting("s3.client.default.protocol", "http") + // Disable AWS SDK profile file loading by pointing to non-existent files + // This prevents the SDK from trying to read ~/.aws/credentials and ~/.aws/config + // which would violate Elasticsearch entitlements + .environment("AWS_CONFIG_FILE", "/dev/null/aws/config") + .environment("AWS_SHARED_CREDENTIALS_FILE", "/dev/null/aws/credentials") + // Apply any additional configuration + .apply(() -> configProvider) + .build(); + } + + public static ElasticsearchCluster testCluster(Supplier s3EndpointSupplier) { + return testCluster(s3EndpointSupplier, config -> {}); + } + + private static String fixturesPath() { + URL resourceUrl = Clusters.class.getResource("/iceberg-fixtures"); + if (resourceUrl != null && resourceUrl.getProtocol().equals("file")) { + try { + return PathUtils.get(resourceUrl.toURI()).toAbsolutePath().toString(); + } catch (URISyntaxException e) { + throw new IllegalStateException("Failed to resolve fixtures path", e); + } + } + // Fall back to a safe default; LOCAL tests will fail gracefully + return "/tmp"; + } +} diff --git a/x-pack/plugin/esql-datasource-ndjson/qa/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/ndjson/NdJsonFormatSpecIT.java b/x-pack/plugin/esql-datasource-ndjson/qa/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/ndjson/NdJsonFormatSpecIT.java new file mode 100644 index 0000000000000..97a9e53367ef1 --- /dev/null +++ b/x-pack/plugin/esql-datasource-ndjson/qa/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/ndjson/NdJsonFormatSpecIT.java @@ -0,0 +1,52 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.qa.ndjson; + +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; + +import org.elasticsearch.test.TestClustersThreadFilter; +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.xpack.esql.CsvSpecReader.CsvTestCase; +import org.elasticsearch.xpack.esql.qa.rest.AbstractExternalSourceSpecTestCase; +import org.junit.ClassRule; + +import java.util.List; + +/** + * Parameterized integration tests for standalone NDJSON files. + * Each csv-spec test is run against every configured storage backend (S3, HTTP, LOCAL). + */ +@ThreadLeakFilters(filters = TestClustersThreadFilter.class) +public class NdJsonFormatSpecIT extends AbstractExternalSourceSpecTestCase { + + @ClassRule + public static ElasticsearchCluster cluster = Clusters.testCluster(() -> s3Fixture.getAddress()); + + public NdJsonFormatSpecIT( + String fileName, + String groupName, + String testName, + Integer lineNumber, + CsvTestCase testCase, + String instructions, + StorageBackend storageBackend + ) { + super(fileName, groupName, testName, lineNumber, testCase, instructions, storageBackend, "ndjson"); + } + + @Override + protected String getTestRestCluster() { + return cluster.getHttpAddresses(); + } + + @ParametersFactory(argumentFormatting = "csv-spec:%2$s.%3$s [%7$s]") + public static List readScriptSpec() throws Exception { + return readExternalSpecTests("/external-ndjson.csv-spec"); + } +} diff --git a/x-pack/plugin/esql-datasource-ndjson/qa/src/javaRestTest/resources/external-ndjson.csv-spec b/x-pack/plugin/esql-datasource-ndjson/qa/src/javaRestTest/resources/external-ndjson.csv-spec new file mode 100644 index 0000000000000..c51aa8d0903a0 --- /dev/null +++ b/x-pack/plugin/esql-datasource-ndjson/qa/src/javaRestTest/resources/external-ndjson.csv-spec @@ -0,0 +1,168 @@ +// Tests for NDJSON files, whose schema is inferred, contrarily to Parquet or even our CSV dialect. +// Adapted from `external-basic.csv-spec`. +// Uses {{employees}} template that gets replaced with the actual path based on storage backend and format + +readAllEmployees +EXTERNAL "{{employees}}" +| KEEP emp_no, first_name, last_name, birth_date, gender, hire_date, languages, height, salary, still_hired +| SORT emp_no +| LIMIT 5; + +emp_no:integer | first_name:keyword | last_name:keyword | birth_date:date | gender:keyword | hire_date:date | languages:integer | height:double | salary:integer | still_hired:boolean +10001 | "Georgi" | "Facello" | 1953-09-02T00:00:00.000Z | "M" | 1986-06-26T00:00:00.000Z | 2 | 2.03 | 57305 | true +10002 | "Bezalel" | "Simmel" | 1964-06-02T00:00:00.000Z | "F" | 1985-11-21T00:00:00.000Z | 5 | 2.08 | 56371 | true +10003 | "Parto" | "Bamford" | 1959-12-03T00:00:00.000Z | "M" | 1986-08-28T00:00:00.000Z | 4 | 1.83 | 61805 | false +10004 | "Chirstian" | "Koblick" | 1954-05-01T00:00:00.000Z | "M" | 1986-12-01T00:00:00.000Z | 5 | 1.78 | 36174 | true +10005 | "Kyoichi" | "Maliniak" | 1955-01-21T00:00:00.000Z | "M" | 1989-09-12T00:00:00.000Z | 1 | 2.05 | 63528 | true +; + +selectSpecificColumns +EXTERNAL "{{employees}}" +| KEEP emp_no, first_name, last_name, salary +| SORT emp_no +| LIMIT 5; + +emp_no:integer | first_name:keyword | last_name:keyword | salary:integer +10001 | "Georgi" | "Facello" | 57305 +10002 | "Bezalel" | "Simmel" | 56371 +10003 | "Parto" | "Bamford" | 61805 +10004 | "Chirstian" | "Koblick" | 36174 +10005 | "Kyoichi" | "Maliniak" | 63528 +; + +filterByEmployeeNumber +EXTERNAL "{{employees}}" +| WHERE emp_no == 10001 +| KEEP emp_no, first_name, last_name; + +emp_no:integer | first_name:keyword | last_name:keyword +10001 | "Georgi" | "Facello" +; + +filterBySalaryRange +EXTERNAL "{{employees}}" +| WHERE salary > 60000 AND salary < 70000 +| KEEP emp_no, first_name, salary +| SORT emp_no +| LIMIT 5; + +emp_no:integer | first_name:keyword | salary:integer +10003 | "Parto" | 61805 +10005 | "Kyoichi" | 63528 +10006 | "Anneke" | 60335 +10009 | "Sumant" | 66174 +10016 | "Kazuhito" | 61358 +; + +filterByGender +EXTERNAL "{{employees}}" +| WHERE gender == "F" +| KEEP emp_no, first_name, last_name, gender +| SORT emp_no +| LIMIT 3; + +emp_no:integer | first_name:keyword | last_name:keyword | gender:keyword +10002 | "Bezalel" | "Simmel" | "F" +10006 | "Anneke" | "Preusig" | "F" +10007 | "Tzvetan" | "Zielinski" | "F" +; + +filterByEmploymentStatus +EXTERNAL "{{employees}}" +| WHERE still_hired == false +| KEEP emp_no, first_name, last_name, still_hired +| SORT emp_no +| LIMIT 3; + +emp_no:integer | first_name:keyword | last_name:keyword | still_hired:boolean +10003 | "Parto" | "Bamford" | false +10006 | "Anneke" | "Preusig" | false +10009 | "Sumant" | "Peac" | false +; + +aggregateCount +EXTERNAL "{{employees}}" +| STATS count = COUNT(*); + +count:long +100 +; + +aggregateByGender +EXTERNAL "{{employees}}" +| STATS count = COUNT(*) BY gender +| SORT gender; + +count:long | gender:keyword +33 | "F" +57 | "M" +10 | null +; + +aggregateAverageSalary +EXTERNAL "{{employees}}" +| STATS avg_salary = AVG(salary); + +avg_salary:double +48248.55 +; + +aggregateSalaryStats +EXTERNAL "{{employees}}" +| STATS min_salary = MIN(salary), max_salary = MAX(salary), avg_salary = AVG(salary); + +min_salary:integer | max_salary:integer | avg_salary:double +25324 | 74999 | 48248.55 +; + +aggregateSalaryByGender +EXTERNAL "{{employees}}" +| STATS avg_salary = AVG(salary), count = COUNT(*) BY gender +| SORT gender; + +avg_salary:double | count:long | gender:keyword +50490.78787878788 | 33 | "F" +46860.59649122807 | 57 | "M" +48760.5 | 10 | null +; + +filterAndSort +EXTERNAL "{{employees}}" +| WHERE salary > 70000 +| KEEP emp_no, first_name, salary +| SORT salary DESC +| LIMIT 5; + +emp_no:integer | first_name:keyword | salary:integer +10029 | "Otmar" | 74999 +10045 | "Moss" | 74970 +10007 | "Tzvetan" | 74572 +10027 | "Divier" | 73851 +10019 | "Lillian" | 73717 +; + +evalComputedColumn +EXTERNAL "{{employees}}" +| EVAL annual_bonus = salary * 0.1 +| KEEP emp_no, first_name, salary, annual_bonus +| SORT emp_no +| LIMIT 3; + +emp_no:integer | first_name:keyword | salary:integer | annual_bonus:double +10001 | "Georgi" | 57305 | 5730.5 +10002 | "Bezalel" | 56371 | 5637.1 +10003 | "Parto" | 61805 | 6180.5 +; + +complexQuery +EXTERNAL "{{employees}}" +| WHERE still_hired == true AND salary > 55000 +| EVAL salary_category = CASE(salary < 60000, "standard", salary < 70000, "senior", "principal") +| STATS count = COUNT(*), avg_salary = AVG(salary) BY salary_category +| SORT salary_category; + +count:long | avg_salary:double | salary_category:keyword +2 | 74075.0 | "principal" +5 | 67017.0 | "senior" +4 | 56789.25 | "standard" +; diff --git a/x-pack/plugin/esql-datasource-ndjson/qa/src/javaRestTest/resources/iceberg-fixtures/standalone/employees.ndjson b/x-pack/plugin/esql-datasource-ndjson/qa/src/javaRestTest/resources/iceberg-fixtures/standalone/employees.ndjson new file mode 100644 index 0000000000000..f279e8a44b2df --- /dev/null +++ b/x-pack/plugin/esql-datasource-ndjson/qa/src/javaRestTest/resources/iceberg-fixtures/standalone/employees.ndjson @@ -0,0 +1,100 @@ +{"birth_date":"1953-09-02T00:00:00Z","emp_no":10001,"first_name":"Georgi","gender":"M","hire_date":"1986-06-26T00:00:00Z","languages":2,"last_name":"Facello","salary":57305,"height":2.03,"still_hired":true,"avg_worked_seconds":268728049,"job_positions":["Senior Python Developer","Accountant"],"is_rehired":[false,true],"salary_change":1.19} +{"birth_date":"1964-06-02T00:00:00Z","emp_no":10002,"first_name":"Bezalel","gender":"F","hire_date":"1985-11-21T00:00:00Z","languages":5,"last_name":"Simmel","salary":56371,"height":2.08,"still_hired":true,"avg_worked_seconds":328922887,"job_positions":"Senior Team Lead","is_rehired":[false,false],"salary_change":[-7.23,11.17]} +{"birth_date":"1959-12-03T00:00:00Z","emp_no":10003,"first_name":"Parto","gender":"M","hire_date":"1986-08-28T00:00:00Z","languages":4,"last_name":"Bamford","salary":61805,"height":1.83,"still_hired":false,"avg_worked_seconds":200296405,"salary_change":[14.68,12.82]} +{"birth_date":"1954-05-01T00:00:00Z","emp_no":10004,"first_name":"Chirstian","gender":"M","hire_date":"1986-12-01T00:00:00Z","languages":5,"last_name":"Koblick","salary":36174,"height":1.78,"still_hired":true,"avg_worked_seconds":311267831,"job_positions":["Reporting Analyst","Tech Lead","Head Human Resources","Support Engineer"],"is_rehired":true,"salary_change":[3.65,-0.35,1.13,13.48]} +{"birth_date":"1955-01-21T00:00:00Z","emp_no":10005,"first_name":"Kyoichi","gender":"M","hire_date":"1989-09-12T00:00:00Z","languages":1,"last_name":"Maliniak","salary":63528,"height":2.05,"still_hired":true,"avg_worked_seconds":244294991,"is_rehired":[false,false,false,true],"salary_change":[-2.14,13.07]} +{"birth_date":"1953-04-20T00:00:00Z","emp_no":10006,"first_name":"Anneke","gender":"F","hire_date":"1989-06-02T00:00:00Z","languages":3,"last_name":"Preusig","salary":60335,"height":1.56,"still_hired":false,"avg_worked_seconds":372957040,"job_positions":["Tech Lead","Principal Support Engineer","Senior Team Lead"],"salary_change":-3.90} +{"birth_date":"1957-05-23T00:00:00Z","emp_no":10007,"first_name":"Tzvetan","gender":"F","hire_date":"1989-02-10T00:00:00Z","languages":4,"last_name":"Zielinski","salary":74572,"height":1.70,"still_hired":true,"avg_worked_seconds":393084805,"is_rehired":[true,false,true,false],"salary_change":[-7.06,1.99,0.57]} +{"birth_date":"1958-02-19T00:00:00Z","emp_no":10008,"first_name":"Saniya","gender":"M","hire_date":"1994-09-15T00:00:00Z","languages":2,"last_name":"Kalloufi","salary":43906,"height":2.10,"still_hired":true,"avg_worked_seconds":283074758,"job_positions":["Senior Python Developer","Junior Developer","Purchase Manager","Internship"],"is_rehired":[true,false],"salary_change":[12.68,3.54,0.75,-2.92]} +{"birth_date":"1952-04-19T00:00:00Z","emp_no":10009,"first_name":"Sumant","gender":"F","hire_date":"1985-02-18T00:00:00Z","languages":1,"last_name":"Peac","salary":66174,"height":1.85,"still_hired":false,"avg_worked_seconds":236805489,"job_positions":["Senior Python Developer","Internship"]} +{"birth_date":"1963-06-01T00:00:00Z","emp_no":10010,"first_name":"Duangkaew","hire_date":"1989-08-24T00:00:00Z","languages":4,"last_name":"Piveteau","salary":45797,"height":1.70,"still_hired":false,"avg_worked_seconds":315236372,"job_positions":["Architect","Reporting Analyst","Tech Lead","Purchase Manager"],"is_rehired":[true,true,false,false],"salary_change":[5.05,-6.77,4.69,12.15]} +{"birth_date":"1953-11-07T00:00:00Z","emp_no":10011,"first_name":"Mary","hire_date":"1990-01-22T00:00:00Z","languages":5,"last_name":"Sluis","salary":31120,"height":1.50,"still_hired":true,"avg_worked_seconds":239615525,"job_positions":["Architect","Reporting Analyst","Tech Lead","Senior Team Lead"],"is_rehired":[true,true],"salary_change":[10.35,-7.82,8.73,3.48]} +{"birth_date":"1960-10-04T00:00:00Z","emp_no":10012,"first_name":"Patricio","hire_date":"1992-12-18T00:00:00Z","languages":5,"last_name":"Bridgland","salary":48942,"height":1.97,"still_hired":false,"avg_worked_seconds":365510850,"job_positions":["Head Human Resources","Accountant"],"is_rehired":[false,true,true,false],"salary_change":0.04} +{"birth_date":"1963-06-07T00:00:00Z","emp_no":10013,"first_name":"Eberhardt","hire_date":"1985-10-20T00:00:00Z","languages":1,"last_name":"Terkki","salary":48735,"height":1.94,"still_hired":true,"avg_worked_seconds":253864340,"job_positions":"Reporting Analyst","is_rehired":[true,true]} +{"birth_date":"1956-02-12T00:00:00Z","emp_no":10014,"first_name":"Berni","hire_date":"1987-03-11T00:00:00Z","languages":5,"last_name":"Genin","salary":37137,"height":1.99,"still_hired":false,"avg_worked_seconds":225049139,"job_positions":["Reporting Analyst","Data Scientist","Head Human Resources"],"salary_change":[-1.89,9.07]} +{"birth_date":"1959-08-19T00:00:00Z","emp_no":10015,"first_name":"Guoxiang","hire_date":"1987-07-02T00:00:00Z","languages":5,"last_name":"Nooteboom","salary":25324,"height":1.66,"still_hired":true,"avg_worked_seconds":390266432,"job_positions":["Principal Support Engineer","Junior Developer","Head Human Resources","Support Engineer"],"is_rehired":[true,false,false,false],"salary_change":[14.25,12.40]} +{"birth_date":"1961-05-02T00:00:00Z","emp_no":10016,"first_name":"Kazuhito","hire_date":"1995-01-27T00:00:00Z","languages":2,"last_name":"Cappelletti","salary":61358,"height":1.54,"still_hired":false,"avg_worked_seconds":253029411,"job_positions":["Reporting Analyst","Python Developer","Accountant","Purchase Manager"],"is_rehired":[false,false],"salary_change":[-5.18,7.69]} +{"birth_date":"1958-07-06T00:00:00Z","emp_no":10017,"first_name":"Cristinel","hire_date":"1993-08-03T00:00:00Z","languages":2,"last_name":"Bouloucos","salary":58715,"height":1.74,"still_hired":false,"avg_worked_seconds":236703986,"job_positions":["Data Scientist","Head Human Resources","Purchase Manager"],"is_rehired":[true,false,true,true],"salary_change":-6.33} +{"birth_date":"1954-06-19T00:00:00Z","emp_no":10018,"first_name":"Kazuhide","hire_date":"1987-04-03T00:00:00Z","languages":2,"last_name":"Peha","salary":56760,"height":1.97,"still_hired":false,"avg_worked_seconds":309604079,"job_positions":"Junior Developer","is_rehired":[false,false,true,true],"salary_change":[-1.64,11.51,-5.32]} +{"birth_date":"1953-01-23T00:00:00Z","emp_no":10019,"first_name":"Lillian","hire_date":"1999-04-30T00:00:00Z","languages":1,"last_name":"Haddadi","salary":73717,"height":2.06,"still_hired":false,"avg_worked_seconds":342855721,"job_positions":"Purchase Manager","is_rehired":[false,false],"salary_change":[-6.84,8.42,-7.26]} +{"birth_date":"1952-12-24T00:00:00Z","emp_no":10020,"first_name":"Mayuko","gender":"M","hire_date":"1991-01-26T00:00:00Z","last_name":"Warwick","salary":40031,"height":1.41,"still_hired":false,"avg_worked_seconds":373309605,"job_positions":"Tech Lead","is_rehired":[true,true,false],"salary_change":-5.81} +{"birth_date":"1960-02-20T00:00:00Z","emp_no":10021,"first_name":"Ramzi","gender":"M","hire_date":"1988-02-10T00:00:00Z","last_name":"Erde","salary":60408,"height":1.47,"still_hired":false,"avg_worked_seconds":287654610,"job_positions":"Support Engineer","is_rehired":true} +{"birth_date":"1952-07-08T00:00:00Z","emp_no":10022,"first_name":"Shahaf","gender":"M","hire_date":"1995-08-22T00:00:00Z","last_name":"Famili","salary":48233,"height":1.82,"still_hired":false,"avg_worked_seconds":233521306,"job_positions":["Reporting Analyst","Data Scientist","Python Developer","Internship"],"is_rehired":[true,false],"salary_change":[12.09,2.85]} +{"birth_date":"1953-09-29T00:00:00Z","emp_no":10023,"first_name":"Bojan","gender":"F","hire_date":"1989-12-17T00:00:00Z","last_name":"Montemayor","salary":47896,"height":1.75,"still_hired":true,"avg_worked_seconds":330870342,"job_positions":["Accountant","Support Engineer","Purchase Manager"],"is_rehired":[true,true,false],"salary_change":[14.63,0.80]} +{"birth_date":"1958-09-05T00:00:00Z","emp_no":10024,"first_name":"Suzette","gender":"F","hire_date":"1997-05-19T00:00:00Z","last_name":"Pettey","salary":64675,"height":2.08,"still_hired":true,"avg_worked_seconds":367717671,"job_positions":"Junior Developer","is_rehired":[true,true,true,true]} +{"birth_date":"1958-10-31T00:00:00Z","emp_no":10025,"first_name":"Prasadram","gender":"M","hire_date":"1987-08-17T00:00:00Z","last_name":"Heyers","salary":47411,"height":1.87,"still_hired":false,"avg_worked_seconds":371270797,"job_positions":"Accountant","is_rehired":[true,false],"salary_change":[-4.33,-2.90,12.06,-3.46]} +{"birth_date":"1953-04-03T00:00:00Z","emp_no":10026,"first_name":"Yongqiao","gender":"M","hire_date":"1995-03-20T00:00:00Z","last_name":"Berztiss","salary":28336,"height":2.10,"still_hired":true,"avg_worked_seconds":359208133,"job_positions":"Reporting Analyst","is_rehired":[false,true],"salary_change":[-7.37,10.62,11.20]} +{"birth_date":"1962-07-10T00:00:00Z","emp_no":10027,"first_name":"Divier","gender":"F","hire_date":"1989-07-07T00:00:00Z","last_name":"Reistad","salary":73851,"height":1.53,"still_hired":false,"avg_worked_seconds":374037782,"job_positions":"Senior Python Developer","is_rehired":false} +{"birth_date":"1963-11-26T00:00:00Z","emp_no":10028,"first_name":"Domenick","gender":"M","hire_date":"1991-10-22T00:00:00Z","last_name":"Tempesti","salary":39356,"height":2.07,"still_hired":true,"avg_worked_seconds":226435054,"job_positions":["Tech Lead","Python Developer","Accountant","Internship"],"is_rehired":[true,false,false,true]} +{"birth_date":"1956-12-13T00:00:00Z","emp_no":10029,"first_name":"Otmar","gender":"M","hire_date":"1985-11-20T00:00:00Z","last_name":"Herbst","salary":74999,"height":1.99,"still_hired":false,"avg_worked_seconds":257694181,"job_positions":["Senior Python Developer","Data Scientist","Principal Support Engineer"],"is_rehired":true,"salary_change":[-0.32,-1.90,-8.19]} +{"birth_date":"1958-07-14T00:00:00Z","emp_no":10030,"gender":"M","hire_date":"1994-02-17T00:00:00Z","languages":3,"last_name":"Demeyer","salary":67492,"height":1.92,"still_hired":false,"avg_worked_seconds":394597613,"job_positions":["Tech Lead","Data Scientist","Senior Team Lead"],"is_rehired":[true,false,false],"salary_change":-0.40} +{"birth_date":"1959-01-27T00:00:00Z","emp_no":10031,"gender":"M","hire_date":"1991-09-01T00:00:00Z","languages":4,"last_name":"Joslin","salary":37716,"height":1.68,"still_hired":false,"avg_worked_seconds":348545109,"job_positions":["Architect","Senior Python Developer","Purchase Manager","Senior Team Lead"],"is_rehired":false} +{"birth_date":"1960-08-09T00:00:00Z","emp_no":10032,"gender":"F","hire_date":"1990-06-20T00:00:00Z","languages":3,"last_name":"Reistad","salary":62233,"height":2.10,"still_hired":false,"avg_worked_seconds":277622619,"job_positions":["Architect","Senior Python Developer","Junior Developer","Purchase Manager"],"is_rehired":[false,false],"salary_change":[9.32,-4.92]} +{"birth_date":"1956-11-14T00:00:00Z","emp_no":10033,"gender":"M","hire_date":"1987-03-18T00:00:00Z","languages":1,"last_name":"Merlo","salary":70011,"height":1.63,"still_hired":false,"avg_worked_seconds":208374744,"is_rehired":true} +{"birth_date":"1962-12-29T00:00:00Z","emp_no":10034,"gender":"M","hire_date":"1988-09-21T00:00:00Z","languages":1,"last_name":"Swan","salary":39878,"height":1.46,"still_hired":false,"avg_worked_seconds":214393176,"job_positions":["Business Analyst","Data Scientist","Python Developer","Accountant"],"is_rehired":false,"salary_change":-8.46} +{"birth_date":"1953-02-08T00:00:00Z","emp_no":10035,"gender":"M","hire_date":"1988-09-05T00:00:00Z","languages":5,"last_name":"Chappelet","salary":25945,"height":1.81,"still_hired":false,"avg_worked_seconds":203838153,"job_positions":["Senior Python Developer","Data Scientist"],"is_rehired":false,"salary_change":[-2.54,-6.58]} +{"birth_date":"1959-08-10T00:00:00Z","emp_no":10036,"gender":"M","hire_date":"1992-01-03T00:00:00Z","languages":4,"last_name":"Portugali","salary":60781,"height":1.61,"still_hired":false,"avg_worked_seconds":305493131,"job_positions":"Senior Python Developer","is_rehired":[true,false,false]} +{"birth_date":"1963-07-22T00:00:00Z","emp_no":10037,"gender":"M","hire_date":"1990-12-05T00:00:00Z","languages":2,"last_name":"Makrucki","salary":37691,"height":2.00,"still_hired":true,"avg_worked_seconds":359217000,"job_positions":["Senior Python Developer","Tech Lead","Accountant"],"is_rehired":false,"salary_change":-7.08} +{"birth_date":"1960-07-20T00:00:00Z","emp_no":10038,"gender":"M","hire_date":"1989-09-20T00:00:00Z","languages":4,"last_name":"Lortz","salary":35222,"height":1.53,"still_hired":true,"avg_worked_seconds":314036411,"job_positions":["Senior Python Developer","Python Developer","Support Engineer"]} +{"birth_date":"1959-10-01T00:00:00Z","emp_no":10039,"gender":"M","hire_date":"1988-01-19T00:00:00Z","languages":2,"last_name":"Brender","salary":36051,"height":1.55,"still_hired":false,"avg_worked_seconds":243221262,"job_positions":["Business Analyst","Python Developer","Principal Support Engineer"],"is_rehired":[true,true],"salary_change":-6.90} +{"emp_no":10040,"first_name":"Weiyi","gender":"F","hire_date":"1993-02-14T00:00:00Z","languages":4,"last_name":"Meriste","salary":37112,"height":1.90,"still_hired":false,"avg_worked_seconds":244478622,"job_positions":"Principal Support Engineer","is_rehired":[true,false,true,true],"salary_change":[6.97,14.74,-8.94,1.92]} +{"emp_no":10041,"first_name":"Uri","gender":"F","hire_date":"1989-11-12T00:00:00Z","languages":1,"last_name":"Lenart","salary":56415,"height":1.75,"still_hired":false,"avg_worked_seconds":287789442,"job_positions":["Data Scientist","Head Human Resources","Internship","Senior Team Lead"],"salary_change":[9.21,0.05,7.29,-2.94]} +{"emp_no":10042,"first_name":"Magy","gender":"F","hire_date":"1993-03-21T00:00:00Z","languages":3,"last_name":"Stamatiou","salary":30404,"height":1.44,"still_hired":true,"avg_worked_seconds":246355863,"job_positions":["Architect","Business Analyst","Junior Developer","Internship"],"salary_change":[-9.28,9.42]} +{"emp_no":10043,"first_name":"Yishay","gender":"M","hire_date":"1990-10-20T00:00:00Z","languages":1,"last_name":"Tzvieli","salary":34341,"height":1.52,"still_hired":true,"avg_worked_seconds":287222180,"job_positions":["Data Scientist","Python Developer","Support Engineer"],"is_rehired":[false,true,true],"salary_change":[-5.17,4.62,7.42]} +{"emp_no":10044,"first_name":"Mingsen","gender":"F","hire_date":"1994-05-21T00:00:00Z","languages":1,"last_name":"Casley","salary":39728,"height":2.06,"still_hired":false,"avg_worked_seconds":387408356,"job_positions":["Tech Lead","Principal Support Engineer","Accountant","Support Engineer"],"is_rehired":[true,true],"salary_change":8.09} +{"emp_no":10045,"first_name":"Moss","gender":"M","hire_date":"1989-09-02T00:00:00Z","languages":3,"last_name":"Shanbhogue","salary":74970,"height":1.70,"still_hired":false,"avg_worked_seconds":371418933,"job_positions":["Principal Support Engineer","Junior Developer","Accountant","Purchase Manager"],"is_rehired":[true,false]} +{"emp_no":10046,"first_name":"Lucien","gender":"M","hire_date":"1992-06-20T00:00:00Z","languages":4,"last_name":"Rosenbaum","salary":50064,"height":1.52,"still_hired":true,"avg_worked_seconds":302353405,"job_positions":["Principal Support Engineer","Junior Developer","Head Human Resources","Internship"],"is_rehired":[true,true,false,true],"salary_change":2.39} +{"emp_no":10047,"first_name":"Zvonko","gender":"M","hire_date":"1989-03-31T00:00:00Z","languages":4,"last_name":"Nyanchama","salary":42716,"height":1.52,"still_hired":true,"avg_worked_seconds":306369346,"job_positions":["Architect","Data Scientist","Principal Support Engineer","Senior Team Lead"],"is_rehired":true,"salary_change":[-6.36,12.12]} +{"emp_no":10048,"first_name":"Florian","gender":"M","hire_date":"1985-02-24T00:00:00Z","languages":3,"last_name":"Syrotiuk","salary":26436,"height":2.00,"still_hired":false,"avg_worked_seconds":248451647,"job_positions":"Internship","is_rehired":[true,true]} +{"emp_no":10049,"first_name":"Basil","gender":"F","hire_date":"1992-05-04T00:00:00Z","languages":5,"last_name":"Tramer","salary":37853,"height":1.52,"still_hired":true,"avg_worked_seconds":320725709,"job_positions":["Senior Python Developer","Business Analyst"],"salary_change":-1.05} +{"birth_date":"1958-05-21T00:00:00Z","emp_no":10050,"first_name":"Yinghua","gender":"M","hire_date":"1990-12-25T00:00:00Z","languages":2,"last_name":"Dredge","salary":43026,"height":1.96,"still_hired":true,"avg_worked_seconds":242731798,"job_positions":["Reporting Analyst","Junior Developer","Accountant","Support Engineer"],"is_rehired":true,"salary_change":[8.70,10.94]} +{"birth_date":"1953-07-28T00:00:00Z","emp_no":10051,"first_name":"Hidefumi","gender":"M","hire_date":"1992-10-15T00:00:00Z","languages":3,"last_name":"Caine","salary":58121,"height":1.89,"still_hired":true,"avg_worked_seconds":374753122,"job_positions":["Business Analyst","Accountant","Purchase Manager"]} +{"birth_date":"1961-02-26T00:00:00Z","emp_no":10052,"first_name":"Heping","gender":"M","hire_date":"1988-05-21T00:00:00Z","languages":1,"last_name":"Nitsch","salary":55360,"height":1.79,"still_hired":true,"avg_worked_seconds":299654717,"is_rehired":[true,true,false],"salary_change":[-0.55,-1.89,-4.22,-6.03]} +{"birth_date":"1954-09-13T00:00:00Z","emp_no":10053,"first_name":"Sanjiv","gender":"F","hire_date":"1986-02-04T00:00:00Z","languages":3,"last_name":"Zschoche","salary":54462,"height":1.58,"still_hired":false,"avg_worked_seconds":368103911,"job_positions":"Support Engineer","is_rehired":[true,false,true,false],"salary_change":[-7.67,-3.25]} +{"birth_date":"1957-04-04T00:00:00Z","emp_no":10054,"first_name":"Mayumi","gender":"M","hire_date":"1995-03-13T00:00:00Z","languages":4,"last_name":"Schueller","salary":65367,"height":1.82,"still_hired":false,"avg_worked_seconds":297441693,"job_positions":"Principal Support Engineer","is_rehired":[false,false]} +{"birth_date":"1956-06-06T00:00:00Z","emp_no":10055,"first_name":"Georgy","gender":"M","hire_date":"1992-04-27T00:00:00Z","languages":5,"last_name":"Dredge","salary":49281,"height":2.04,"still_hired":false,"avg_worked_seconds":283157844,"job_positions":["Senior Python Developer","Head Human Resources","Internship","Support Engineer"],"is_rehired":[false,false,true],"salary_change":[7.34,12.99,3.17]} +{"birth_date":"1961-09-01T00:00:00Z","emp_no":10056,"first_name":"Brendon","gender":"F","hire_date":"1990-02-01T00:00:00Z","languages":2,"last_name":"Bernini","salary":33370,"height":1.57,"still_hired":true,"avg_worked_seconds":349086555,"job_positions":"Senior Team Lead","is_rehired":[true,false,false],"salary_change":[10.99,-5.17]} +{"birth_date":"1954-05-30T00:00:00Z","emp_no":10057,"first_name":"Ebbe","gender":"F","hire_date":"1992-01-15T00:00:00Z","languages":4,"last_name":"Callaway","salary":27215,"height":1.59,"still_hired":true,"avg_worked_seconds":324356269,"job_positions":["Python Developer","Head Human Resources"],"salary_change":[-6.73,-2.43,-5.27,1.03]} +{"birth_date":"1954-10-01T00:00:00Z","emp_no":10058,"first_name":"Berhard","gender":"M","hire_date":"1987-04-13T00:00:00Z","languages":3,"last_name":"McFarlin","salary":38376,"height":1.83,"still_hired":false,"avg_worked_seconds":268378108,"job_positions":"Principal Support Engineer","salary_change":-4.89} +{"birth_date":"1953-09-19T00:00:00Z","emp_no":10059,"first_name":"Alejandro","gender":"F","hire_date":"1991-06-26T00:00:00Z","languages":2,"last_name":"McAlpine","salary":44307,"height":1.48,"still_hired":false,"avg_worked_seconds":237368465,"job_positions":["Architect","Principal Support Engineer","Purchase Manager","Senior Team Lead"],"is_rehired":false,"salary_change":[5.53,13.38,-4.69,6.27]} +{"birth_date":"1961-10-15T00:00:00Z","emp_no":10060,"first_name":"Breannda","gender":"M","hire_date":"1987-11-02T00:00:00Z","languages":2,"last_name":"Billingsley","salary":29175,"height":1.42,"still_hired":true,"avg_worked_seconds":341158890,"job_positions":["Business Analyst","Data Scientist","Senior Team Lead"],"is_rehired":[false,false,true,false],"salary_change":[-1.76,-0.85]} +{"birth_date":"1962-10-19T00:00:00Z","emp_no":10061,"first_name":"Tse","gender":"M","hire_date":"1985-09-17T00:00:00Z","languages":1,"last_name":"Herber","salary":49095,"height":1.45,"still_hired":false,"avg_worked_seconds":327550310,"job_positions":["Purchase Manager","Senior Team Lead"],"is_rehired":[false,true],"salary_change":[14.39,-2.58,-0.95]} +{"birth_date":"1961-11-02T00:00:00Z","emp_no":10062,"first_name":"Anoosh","gender":"M","hire_date":"1991-08-30T00:00:00Z","languages":3,"last_name":"Peyn","salary":65030,"height":1.70,"still_hired":false,"avg_worked_seconds":203989706,"job_positions":["Python Developer","Senior Team Lead"],"is_rehired":[false,true,true],"salary_change":-1.17} +{"birth_date":"1952-08-06T00:00:00Z","emp_no":10063,"first_name":"Gino","gender":"F","hire_date":"1989-04-08T00:00:00Z","languages":3,"last_name":"Leonhardt","salary":52121,"height":1.78,"still_hired":true,"avg_worked_seconds":214068302,"is_rehired":true} +{"birth_date":"1959-04-07T00:00:00Z","emp_no":10064,"first_name":"Udi","gender":"M","hire_date":"1985-11-20T00:00:00Z","languages":5,"last_name":"Jansch","salary":33956,"height":1.93,"still_hired":false,"avg_worked_seconds":307364077,"job_positions":"Purchase Manager","is_rehired":[false,false,true,false],"salary_change":[-8.66,-2.52]} +{"birth_date":"1963-04-14T00:00:00Z","emp_no":10065,"first_name":"Satosi","gender":"M","hire_date":"1988-05-18T00:00:00Z","languages":2,"last_name":"Awdeh","salary":50249,"height":1.59,"still_hired":false,"avg_worked_seconds":372660279,"job_positions":["Business Analyst","Data Scientist","Principal Support Engineer"],"is_rehired":[false,true],"salary_change":[-1.47,14.44,-9.81]} +{"birth_date":"1952-11-13T00:00:00Z","emp_no":10066,"first_name":"Kwee","gender":"M","hire_date":"1986-02-26T00:00:00Z","languages":5,"last_name":"Schusler","salary":31897,"height":2.10,"still_hired":true,"avg_worked_seconds":360906451,"job_positions":["Senior Python Developer","Data Scientist","Accountant","Internship"],"is_rehired":[true,true,true],"salary_change":5.94} +{"birth_date":"1953-01-07T00:00:00Z","emp_no":10067,"first_name":"Claudi","gender":"M","hire_date":"1987-03-04T00:00:00Z","languages":2,"last_name":"Stavenow","salary":52044,"height":1.77,"still_hired":true,"avg_worked_seconds":347664141,"job_positions":["Tech Lead","Principal Support Engineer"],"is_rehired":[false,false],"salary_change":[8.72,4.44]} +{"birth_date":"1962-11-26T00:00:00Z","emp_no":10068,"first_name":"Charlene","gender":"M","hire_date":"1987-08-07T00:00:00Z","languages":3,"last_name":"Brattka","salary":28941,"height":1.58,"still_hired":true,"avg_worked_seconds":233999584,"job_positions":"Architect","is_rehired":true,"salary_change":[3.43,-5.61,-5.29]} +{"birth_date":"1960-09-06T00:00:00Z","emp_no":10069,"first_name":"Margareta","gender":"F","hire_date":"1989-11-05T00:00:00Z","languages":5,"last_name":"Bierman","salary":41933,"height":1.77,"still_hired":true,"avg_worked_seconds":366512352,"job_positions":["Business Analyst","Junior Developer","Purchase Manager","Support Engineer"],"is_rehired":false,"salary_change":[-3.34,-6.33,6.23,-0.31]} +{"birth_date":"1955-08-20T00:00:00Z","emp_no":10070,"first_name":"Reuven","gender":"M","hire_date":"1985-10-14T00:00:00Z","languages":3,"last_name":"Garigliano","salary":54329,"height":1.77,"still_hired":true,"avg_worked_seconds":347188604,"is_rehired":[true,true,true],"salary_change":-5.90} +{"birth_date":"1958-01-21T00:00:00Z","emp_no":10071,"first_name":"Hisao","gender":"M","hire_date":"1987-10-01T00:00:00Z","languages":2,"last_name":"Lipner","salary":40612,"height":2.07,"still_hired":false,"avg_worked_seconds":306671693,"job_positions":["Business Analyst","Reporting Analyst","Senior Team Lead"],"is_rehired":[false,false,false],"salary_change":-2.69} +{"birth_date":"1952-05-15T00:00:00Z","emp_no":10072,"first_name":"Hironoby","gender":"F","hire_date":"1988-07-21T00:00:00Z","languages":5,"last_name":"Sidou","salary":54518,"height":1.82,"still_hired":true,"avg_worked_seconds":209506065,"job_positions":["Architect","Tech Lead","Python Developer","Senior Team Lead"],"is_rehired":[false,false,true,false],"salary_change":[11.21,-2.30,2.22,-5.44]} +{"birth_date":"1954-02-23T00:00:00Z","emp_no":10073,"first_name":"Shir","gender":"M","hire_date":"1991-12-01T00:00:00Z","languages":4,"last_name":"McClurg","salary":32568,"height":1.66,"still_hired":false,"avg_worked_seconds":314930367,"job_positions":["Principal Support Engineer","Python Developer","Junior Developer","Purchase Manager"],"is_rehired":[true,false],"salary_change":-5.67} +{"birth_date":"1955-08-28T00:00:00Z","emp_no":10074,"first_name":"Mokhtar","gender":"F","hire_date":"1990-08-13T00:00:00Z","languages":5,"last_name":"Bernatsky","salary":38992,"height":1.64,"still_hired":true,"avg_worked_seconds":382397583,"job_positions":["Senior Python Developer","Python Developer"],"is_rehired":[true,false,false,true],"salary_change":[6.70,1.98,-5.64,2.96]} +{"birth_date":"1960-03-09T00:00:00Z","emp_no":10075,"first_name":"Gao","gender":"F","hire_date":"1987-03-19T00:00:00Z","languages":5,"last_name":"Dolinsky","salary":51956,"height":1.94,"still_hired":false,"avg_worked_seconds":370238919,"job_positions":"Purchase Manager","is_rehired":true,"salary_change":[9.63,-3.29,8.42]} +{"birth_date":"1952-06-13T00:00:00Z","emp_no":10076,"first_name":"Erez","gender":"F","hire_date":"1985-07-09T00:00:00Z","languages":3,"last_name":"Ritzmann","salary":62405,"height":1.83,"still_hired":false,"avg_worked_seconds":376240317,"job_positions":["Architect","Senior Python Developer"],"is_rehired":false,"salary_change":[-6.90,-1.30,8.75]} +{"birth_date":"1964-04-18T00:00:00Z","emp_no":10077,"first_name":"Mona","gender":"M","hire_date":"1990-03-02T00:00:00Z","languages":5,"last_name":"Azuma","salary":46595,"height":1.68,"still_hired":false,"avg_worked_seconds":351960222,"job_positions":"Internship","salary_change":-0.01} +{"birth_date":"1959-12-25T00:00:00Z","emp_no":10078,"first_name":"Danel","gender":"F","hire_date":"1987-05-26T00:00:00Z","languages":2,"last_name":"Mondadori","salary":69904,"height":1.81,"still_hired":true,"avg_worked_seconds":377116038,"job_positions":["Architect","Principal Support Engineer","Internship"],"is_rehired":true,"salary_change":[-7.88,9.98,12.52]} +{"birth_date":"1961-10-05T00:00:00Z","emp_no":10079,"first_name":"Kshitij","gender":"F","hire_date":"1986-03-27T00:00:00Z","languages":2,"last_name":"Gils","salary":32263,"height":1.59,"still_hired":false,"avg_worked_seconds":320953330,"is_rehired":false,"salary_change":7.58} +{"birth_date":"1957-12-03T00:00:00Z","emp_no":10080,"first_name":"Premal","gender":"M","hire_date":"1985-11-19T00:00:00Z","languages":5,"last_name":"Baek","salary":52833,"height":1.80,"still_hired":false,"avg_worked_seconds":239266137,"job_positions":"Senior Python Developer","salary_change":[-4.35,7.36,5.56]} +{"birth_date":"1960-12-17T00:00:00Z","emp_no":10081,"first_name":"Zhongwei","gender":"M","hire_date":"1986-10-30T00:00:00Z","languages":2,"last_name":"Rosen","salary":50128,"height":1.44,"still_hired":true,"avg_worked_seconds":321375511,"job_positions":["Accountant","Internship"],"is_rehired":[false,false,false]} +{"birth_date":"1963-09-09T00:00:00Z","emp_no":10082,"first_name":"Parviz","gender":"M","hire_date":"1990-01-03T00:00:00Z","languages":4,"last_name":"Lortz","salary":49818,"height":1.61,"still_hired":false,"avg_worked_seconds":232522994,"job_positions":"Principal Support Engineer","is_rehired":false,"salary_change":[1.19,-3.39]} +{"birth_date":"1959-07-23T00:00:00Z","emp_no":10083,"first_name":"Vishv","gender":"M","hire_date":"1987-03-31T00:00:00Z","languages":1,"last_name":"Zockler","salary":39110,"height":1.42,"still_hired":false,"avg_worked_seconds":331236443,"job_positions":"Head Human Resources"} +{"birth_date":"1960-05-25T00:00:00Z","emp_no":10084,"first_name":"Tuval","gender":"M","hire_date":"1995-12-15T00:00:00Z","languages":1,"last_name":"Kalloufi","salary":28035,"height":1.51,"still_hired":true,"avg_worked_seconds":359067056,"job_positions":"Principal Support Engineer","is_rehired":false} +{"birth_date":"1962-11-07T00:00:00Z","emp_no":10085,"first_name":"Kenroku","gender":"M","hire_date":"1994-04-09T00:00:00Z","languages":5,"last_name":"Malabarba","salary":35742,"height":2.01,"still_hired":true,"avg_worked_seconds":353404008,"job_positions":["Senior Python Developer","Business Analyst","Tech Lead","Accountant"],"salary_change":[11.67,6.75,8.40]} +{"birth_date":"1962-11-19T00:00:00Z","emp_no":10086,"first_name":"Somnath","gender":"M","hire_date":"1990-02-16T00:00:00Z","languages":1,"last_name":"Foote","salary":68547,"height":1.74,"still_hired":true,"avg_worked_seconds":328580163,"job_positions":"Senior Python Developer","is_rehired":[false,true],"salary_change":13.61} +{"birth_date":"1959-07-23T00:00:00Z","emp_no":10087,"first_name":"Xinglin","gender":"F","hire_date":"1986-09-08T00:00:00Z","languages":5,"last_name":"Eugenio","salary":32272,"height":1.74,"still_hired":true,"avg_worked_seconds":305782871,"job_positions":["Junior Developer","Internship"],"is_rehired":[false,false],"salary_change":-2.05} +{"birth_date":"1954-02-25T00:00:00Z","emp_no":10088,"first_name":"Jungsoon","gender":"F","hire_date":"1988-09-02T00:00:00Z","languages":5,"last_name":"Syrzycki","salary":39638,"height":1.91,"still_hired":false,"avg_worked_seconds":330714423,"job_positions":["Reporting Analyst","Business Analyst","Tech Lead"],"is_rehired":true} +{"birth_date":"1963-03-21T00:00:00Z","emp_no":10089,"first_name":"Sudharsan","gender":"F","hire_date":"1986-08-12T00:00:00Z","languages":4,"last_name":"Flasterstein","salary":43602,"height":1.57,"still_hired":true,"avg_worked_seconds":232951673,"job_positions":["Junior Developer","Accountant"],"is_rehired":[true,false,false,false]} +{"birth_date":"1961-05-30T00:00:00Z","emp_no":10090,"first_name":"Kendra","gender":"M","hire_date":"1986-03-14T00:00:00Z","languages":2,"last_name":"Hofting","salary":44956,"height":2.03,"still_hired":true,"avg_worked_seconds":212460105,"is_rehired":[false,false,false,true],"salary_change":[7.15,-1.85,3.60]} +{"birth_date":"1955-10-04T00:00:00Z","emp_no":10091,"first_name":"Amabile","gender":"M","hire_date":"1992-11-18T00:00:00Z","languages":3,"last_name":"Gomatam","salary":38645,"height":2.09,"still_hired":true,"avg_worked_seconds":242582807,"job_positions":["Reporting Analyst","Python Developer"],"is_rehired":[true,true,false,false],"salary_change":[-9.23,7.50,5.85,5.19]} +{"birth_date":"1964-10-18T00:00:00Z","emp_no":10092,"first_name":"Valdiodio","gender":"F","hire_date":"1989-09-22T00:00:00Z","languages":1,"last_name":"Niizuma","salary":25976,"height":1.75,"still_hired":false,"avg_worked_seconds":313407352,"job_positions":["Junior Developer","Accountant"],"is_rehired":[false,false,true,true],"salary_change":[8.78,0.39,-6.77,8.30]} +{"birth_date":"1964-06-11T00:00:00Z","emp_no":10093,"first_name":"Sailaja","gender":"M","hire_date":"1996-11-05T00:00:00Z","languages":3,"last_name":"Desikan","salary":45656,"height":1.69,"still_hired":false,"avg_worked_seconds":315904921,"job_positions":["Reporting Analyst","Tech Lead","Principal Support Engineer","Purchase Manager"],"salary_change":-0.88} +{"birth_date":"1957-05-25T00:00:00Z","emp_no":10094,"first_name":"Arumugam","gender":"F","hire_date":"1987-04-18T00:00:00Z","languages":5,"last_name":"Ossenbruggen","salary":66817,"height":2.10,"still_hired":false,"avg_worked_seconds":332920135,"job_positions":["Senior Python Developer","Principal Support Engineer","Accountant"],"is_rehired":[true,false,true],"salary_change":[2.22,7.92]} +{"birth_date":"1965-01-03T00:00:00Z","emp_no":10095,"first_name":"Hilari","gender":"M","hire_date":"1986-07-15T00:00:00Z","languages":4,"last_name":"Morton","salary":37702,"height":1.55,"still_hired":false,"avg_worked_seconds":321850475,"is_rehired":[true,true,false,false],"salary_change":[-3.93,-6.66]} +{"birth_date":"1954-09-16T00:00:00Z","emp_no":10096,"first_name":"Jayson","gender":"M","hire_date":"1990-01-14T00:00:00Z","languages":4,"last_name":"Mandell","salary":43889,"height":1.94,"still_hired":false,"avg_worked_seconds":204381503,"job_positions":["Architect","Reporting Analyst"],"is_rehired":[false,false,false]} +{"birth_date":"1952-02-27T00:00:00Z","emp_no":10097,"first_name":"Remzi","gender":"M","hire_date":"1990-09-15T00:00:00Z","languages":3,"last_name":"Waschkowski","salary":71165,"height":1.53,"still_hired":false,"avg_worked_seconds":206258084,"job_positions":["Reporting Analyst","Tech Lead"],"is_rehired":[true,false],"salary_change":-1.12} +{"birth_date":"1961-09-23T00:00:00Z","emp_no":10098,"first_name":"Sreekrishna","gender":"F","hire_date":"1985-05-13T00:00:00Z","languages":4,"last_name":"Servieres","salary":44817,"height":2.00,"still_hired":false,"avg_worked_seconds":272392146,"job_positions":["Architect","Internship","Senior Team Lead"],"is_rehired":false,"salary_change":[-2.83,8.31,4.38]} +{"birth_date":"1956-05-25T00:00:00Z","emp_no":10099,"first_name":"Valter","gender":"F","hire_date":"1988-10-18T00:00:00Z","languages":2,"last_name":"Sullins","salary":73578,"height":1.81,"still_hired":true,"avg_worked_seconds":377713748,"is_rehired":[true,true],"salary_change":[10.71,14.26,-8.78,-3.98]} +{"birth_date":"1953-04-21T00:00:00Z","emp_no":10100,"first_name":"Hironobu","gender":"F","hire_date":"1987-09-21T00:00:00Z","languages":4,"last_name":"Haraldson","salary":68431,"height":1.77,"still_hired":true,"avg_worked_seconds":223910853,"job_positions":"Purchase Manager","is_rehired":[false,true,true,false],"salary_change":[13.97,-7.49]} diff --git a/x-pack/plugin/esql-datasource-ndjson/src/main/java/org/elasticsearch/xpack/esql/datasource/ndjson/NdJsonDataSourcePlugin.java b/x-pack/plugin/esql-datasource-ndjson/src/main/java/org/elasticsearch/xpack/esql/datasource/ndjson/NdJsonDataSourcePlugin.java new file mode 100644 index 0000000000000..755b96c0d2add --- /dev/null +++ b/x-pack/plugin/esql-datasource-ndjson/src/main/java/org/elasticsearch/xpack/esql/datasource/ndjson/NdJsonDataSourcePlugin.java @@ -0,0 +1,30 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.datasource.ndjson; + +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.xpack.esql.datasources.spi.DataSourcePlugin; +import org.elasticsearch.xpack.esql.datasources.spi.FormatReaderFactory; + +import java.util.Map; + +/** + * Data source plugin that provides NDJSON format support for ESQL external data sources. + *

+ * The NDJSON reader performs schema inference from the first 100 non-empty lines, + * resolves type conflicts to KEYWORD, marks nested objects/arrays as UNSUPPORTED, + * and ignores malformed lines while logging warnings with their line numbers. + */ +public class NdJsonDataSourcePlugin extends Plugin implements DataSourcePlugin { + + @Override + public Map formatReaders(Settings settings) { + return Map.of("ndjson", (s, blockFactory) -> new NdJsonFormatReader(blockFactory)); + } +} diff --git a/x-pack/plugin/esql-datasource-ndjson/src/main/java/org/elasticsearch/xpack/esql/datasource/ndjson/NdJsonFormatReader.java b/x-pack/plugin/esql-datasource-ndjson/src/main/java/org/elasticsearch/xpack/esql/datasource/ndjson/NdJsonFormatReader.java new file mode 100644 index 0000000000000..93be314294645 --- /dev/null +++ b/x-pack/plugin/esql-datasource-ndjson/src/main/java/org/elasticsearch/xpack/esql/datasource/ndjson/NdJsonFormatReader.java @@ -0,0 +1,61 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.datasource.ndjson; + +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.xpack.esql.core.expression.Attribute; +import org.elasticsearch.xpack.esql.datasources.CloseableIterator; +import org.elasticsearch.xpack.esql.datasources.spi.FormatReader; +import org.elasticsearch.xpack.esql.datasources.spi.SimpleSourceMetadata; +import org.elasticsearch.xpack.esql.datasources.spi.SourceMetadata; +import org.elasticsearch.xpack.esql.datasources.spi.StorageObject; + +import java.io.IOException; +import java.util.List; + +/** + * FormatReader implementation for NDJSON files. + */ +public class NdJsonFormatReader implements FormatReader { + + private final BlockFactory blockFactory; + + public NdJsonFormatReader(BlockFactory blockFactory) { + this.blockFactory = blockFactory; + } + + @Override + public SourceMetadata metadata(StorageObject object) throws IOException { + List schema; + try (var stream = object.newStream()) { + schema = NdJsonSchemaInferrer.inferSchema(stream); + } + return new SimpleSourceMetadata(schema, formatName(), object.path().toString()); + } + + @Override + public CloseableIterator read(StorageObject object, List projectedColumns, int batchSize) throws IOException { + return new NdJsonPageIterator(object, projectedColumns, batchSize, blockFactory); + } + + @Override + public String formatName() { + return "ndjson"; + } + + @Override + public List fileExtensions() { + return List.of(".ndjson", ".jsonl"); + } + + @Override + public void close() { + // Nothing to close at reader level + } +} diff --git a/x-pack/plugin/esql-datasource-ndjson/src/main/java/org/elasticsearch/xpack/esql/datasource/ndjson/NdJsonPageDecoder.java b/x-pack/plugin/esql-datasource-ndjson/src/main/java/org/elasticsearch/xpack/esql/datasource/ndjson/NdJsonPageDecoder.java new file mode 100644 index 0000000000000..6536f7bde7bb3 --- /dev/null +++ b/x-pack/plugin/esql-datasource-ndjson/src/main/java/org/elasticsearch/xpack/esql/datasource/ndjson/NdJsonPageDecoder.java @@ -0,0 +1,317 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.datasource.ndjson; + +import com.fasterxml.jackson.core.JsonParseException; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonToken; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.BooleanBlock; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.ConstantNullBlock; +import org.elasticsearch.compute.data.DoubleBlock; +import org.elasticsearch.compute.data.IntBlock; +import org.elasticsearch.compute.data.LongBlock; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.core.IOUtils; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; +import org.elasticsearch.xpack.esql.core.expression.Attribute; +import org.elasticsearch.xpack.esql.core.type.DataType; + +import java.io.Closeable; +import java.io.IOException; +import java.io.InputStream; +import java.time.Instant; +import java.util.BitSet; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class NdJsonPageDecoder implements Closeable { + + private static final Logger LOGGER = LogManager.getLogger(NdJsonPageDecoder.class); + + private InputStream input; + private final BlockDecoder decoder; + private final int batchSize; + private final BlockFactory blockFactory; + private JsonParser parser; + private final List projectedAttributes; + + // What blocks got a value on the current line? Needed because Block.Builder doesn't provide + // the number of positions that were added. + private final BitSet blockTracker; + + NdJsonPageDecoder( + InputStream input, + List attributes, + List projectedColumns, + int batchSize, + BlockFactory blockFactory + ) throws IOException { + this.input = input; + + var projectedAttributes = attributes; + if (projectedColumns.isEmpty() == false) { + // Keep projected columns in order, adding NULL for missing columns + projectedAttributes = projectedColumns.stream() + .map( + col -> attributes.stream() + .filter(a -> a.name().equals(col)) + .findFirst() + .orElseGet(() -> NdJsonSchemaInferrer.attribute(col, DataType.NULL, false)) + ) + .toList(); + } + + this.decoder = prepareSchema(projectedAttributes); + this.batchSize = batchSize; + this.blockFactory = blockFactory; + this.projectedAttributes = projectedAttributes; + this.blockTracker = new BitSet(projectedAttributes.size()); + + this.parser = NdJsonUtils.JSON_FACTORY.createParser(input); + } + + Page decodePage() throws IOException { + var blockBuilders = new Block.Builder[this.projectedAttributes.size()]; + // Setting up builders may trip the circuit breaker. Make sure they're all always closed + try { + this.decoder.setupBuilders(blockBuilders); + + int lineCount = 0; + while (lineCount < batchSize) { + try { + if (parser.nextToken() == null) { + break; // End of stream + } + } catch (JsonParseException e) { + LOGGER.warn("Malformed NDJSON at line {}: {}", lineCount, e); + this.input = NdJsonUtils.moveToNextLine(parser, this.input); + parser = NdJsonUtils.JSON_FACTORY.createParser(this.input); + continue; + } + + lineCount++; + this.blockTracker.clear(); + + try { + decoder.decodeObject(parser); + } catch (JsonParseException e) { + LOGGER.warn("Malformed NDJSON at line {}: {}", lineCount, e); + this.input = NdJsonUtils.moveToNextLine(parser, this.input); + parser = NdJsonUtils.JSON_FACTORY.createParser(this.input); + } + + // Make sure every block got moved forward + for (int i = 0; i < blockBuilders.length; i++) { + if (blockTracker.get(i) == false) { + blockBuilders[i].appendNull(); + } + } + } + + if (lineCount == 0) { + // Done + return null; + } + + var blocks = new Block[this.projectedAttributes.size()]; + var success = false; + try { + for (int i = 0; i < blockBuilders.length; i++) { + blocks[i] = blockBuilders[i].build(); + } + success = true; + } finally { + if (success == false) { + // Some blocks may have been created + Releasables.close(blocks); + } + } + return new Page(blocks); + + } finally { + Releasables.close(blockBuilders); + } + } + + // Prepare the tree of property decoders and return the root decoder. + private BlockDecoder prepareSchema(List attributes) { + BlockDecoder root = new BlockDecoder(); + int idx = 0; + for (var attribute : attributes) { + var decoder = root; + var path = attribute.name().split("\\."); + for (var part : path) { + if (decoder.children == null) { + decoder.children = new HashMap<>(); + } + decoder = decoder.children.computeIfAbsent(part, k -> new BlockDecoder()); + } + decoder.setAttribute(attribute, idx); + idx++; + } + return root; + } + + @Override + public void close() throws IOException { + IOUtils.close(input); + input = null; + } + + // --------------------------------------------------------------------------------------------- + // A tree of decoders. Avoids path reconstruction when traversing nested objects. + private class BlockDecoder { + @Nullable + Attribute attribute; + Block.Builder blockBuilder; + int blockIdx; + Map children; + + void setAttribute(Attribute attribute, int blockIdx) { + this.attribute = attribute; + this.blockIdx = blockIdx; + } + + // Builders setup independently as we need to create new ones for each page. + void setupBuilders(Block.Builder[] blockBuilders) { + if (attribute != null) { + blockBuilder = switch (attribute.dataType()) { + // Keep in sync with NdJsonSchemaInferrer.inferValueSchema + case BOOLEAN -> blockFactory.newBooleanBlockBuilder(batchSize); + case NULL -> new ConstantNullBlock.Builder(blockFactory); + case INTEGER -> blockFactory.newIntBlockBuilder(batchSize); + case LONG -> blockFactory.newLongBlockBuilder(batchSize); + case DOUBLE -> blockFactory.newDoubleBlockBuilder(batchSize); + case KEYWORD -> blockFactory.newBytesRefBlockBuilder(batchSize); + case DATETIME -> blockFactory.newLongBlockBuilder(batchSize); // milliseconds since epoch + default -> throw new IllegalArgumentException("Unsupported data type: " + attribute.dataType()); + }; + blockBuilders[blockIdx] = blockBuilder; + } + + if (children != null) { + for (var child : children.values()) { + child.setupBuilders(blockBuilders); + } + } + } + + private void decodeObject(JsonParser parser) throws IOException { + JsonToken token = parser.currentToken(); + if (token != JsonToken.START_OBJECT) { + throw new NdJsonParseException(parser, "Expected JSON object"); + } + while ((token = parser.nextToken()) != JsonToken.END_OBJECT) { + if (token != JsonToken.FIELD_NAME) { + throw new NdJsonParseException(parser, "Expected field name in object"); + } + var childDecoder = this.children == null ? null : this.children.get(parser.currentName()); + parser.nextToken(); + if (childDecoder == null) { + // Unknown field, skip it + parser.skipChildren(); + } else { + childDecoder.decodeValue(parser); + } + } + } + + private void decodeValue(JsonParser parser) throws IOException { + JsonToken token = parser.currentToken(); + blockTracker.set(blockIdx); + if (token == JsonToken.START_ARRAY) { + this.blockBuilder.beginPositionEntry(); + while (parser.nextToken() != JsonToken.END_ARRAY) { + decodeValue(parser); + } + this.blockBuilder.endPositionEntry(); + return; + } + + if (token == JsonToken.VALUE_NULL) { + blockBuilder.appendNull(); + return; + } + + switch (attribute.dataType()) { + case BOOLEAN -> { + if (token == JsonToken.VALUE_TRUE) { + ((BooleanBlock.Builder) blockBuilder).appendBoolean(true); + } else if (token == JsonToken.VALUE_FALSE) { + ((BooleanBlock.Builder) blockBuilder).appendBoolean(false); + } else { + unexpectedValue(parser); + } + } + case NULL -> { + // NULL handled above + unexpectedValue(parser); + } + case INTEGER -> { + if (token == JsonToken.VALUE_NUMBER_INT || token == JsonToken.VALUE_NUMBER_FLOAT) { + ((IntBlock.Builder) blockBuilder).appendInt(parser.getIntValue()); + } else { + unexpectedValue(parser); + } + } + case LONG -> { + if (token == JsonToken.VALUE_NUMBER_INT || token == JsonToken.VALUE_NUMBER_FLOAT) { + ((LongBlock.Builder) blockBuilder).appendLong(parser.getLongValue()); + } else { + unexpectedValue(parser); + } + } + case DOUBLE -> { + if (token == JsonToken.VALUE_NUMBER_INT || token == JsonToken.VALUE_NUMBER_FLOAT) { + ((DoubleBlock.Builder) blockBuilder).appendDouble(parser.getDoubleValue()); + } else { + unexpectedValue(parser); + } + } + case DATETIME -> { + try { + var millis = Instant.parse(parser.getValueAsString()).toEpochMilli(); + ((LongBlock.Builder) blockBuilder).appendLong(millis); + } catch (Exception e) { + unexpectedValue(parser); + } + } + case KEYWORD -> { + // Be lenient, this is a catch-all type + var str = parser.getValueAsString(); + if (str != null) { + ((BytesRefBlock.Builder) blockBuilder).appendBytesRef(new BytesRef(str)); + } else { + unexpectedValue(parser); + } + } + default -> throw new IllegalArgumentException("Unsupported data type: " + attribute.dataType()); + } + } + + private void unexpectedValue(JsonParser parser) throws IOException { + LOGGER.warn( + "Unexpected token type: {} for attribute: {} at {}", + parser.currentToken(), + attribute.name(), + parser.getTokenLocation() + ); + // Ignore any children to keep reading other values + parser.skipChildren(); + } + } +} diff --git a/x-pack/plugin/esql-datasource-ndjson/src/main/java/org/elasticsearch/xpack/esql/datasource/ndjson/NdJsonPageIterator.java b/x-pack/plugin/esql-datasource-ndjson/src/main/java/org/elasticsearch/xpack/esql/datasource/ndjson/NdJsonPageIterator.java new file mode 100644 index 0000000000000..b7cab462ebea2 --- /dev/null +++ b/x-pack/plugin/esql-datasource-ndjson/src/main/java/org/elasticsearch/xpack/esql/datasource/ndjson/NdJsonPageIterator.java @@ -0,0 +1,72 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.datasource.ndjson; + +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.core.IOUtils; +import org.elasticsearch.xpack.esql.core.expression.Attribute; +import org.elasticsearch.xpack.esql.datasources.CloseableIterator; +import org.elasticsearch.xpack.esql.datasources.spi.StorageObject; + +import java.io.IOException; +import java.util.List; +import java.util.NoSuchElementException; + +/** + * Iterator that reads NDJSON lines and produces ESQL Pages. + */ +final class NdJsonPageIterator implements CloseableIterator { + + private final NdJsonPageDecoder pageDecoder; + private boolean endOfFile = false; + private Page nextPage; + + NdJsonPageIterator(StorageObject object, List projectedColumns, int batchSize, BlockFactory blockFactory) throws IOException { + // FIXME: either read schema ahead of time, of buffer the stream and replay it to avoid opening it twice. + List attributes; + try (var stream = object.newStream()) { + attributes = NdJsonSchemaInferrer.inferSchema(stream); + } + + var inputStream = object.newStream(); + this.pageDecoder = new NdJsonPageDecoder(inputStream, attributes, projectedColumns, batchSize, blockFactory); + } + + @Override + public boolean hasNext() { + if (nextPage != null) { + return true; + } + if (endOfFile) { + return false; + } + try { + nextPage = pageDecoder.decodePage(); + endOfFile = nextPage == null; + return nextPage != null; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public Page next() { + if (hasNext() == false) { + throw new NoSuchElementException(); + } + Page result = nextPage; + nextPage = null; + return result; + } + + @Override + public void close() throws IOException { + IOUtils.close(pageDecoder); + } +} diff --git a/x-pack/plugin/esql-datasource-ndjson/src/main/java/org/elasticsearch/xpack/esql/datasource/ndjson/NdJsonParseException.java b/x-pack/plugin/esql-datasource-ndjson/src/main/java/org/elasticsearch/xpack/esql/datasource/ndjson/NdJsonParseException.java new file mode 100644 index 0000000000000..0f2968ebf8453 --- /dev/null +++ b/x-pack/plugin/esql-datasource-ndjson/src/main/java/org/elasticsearch/xpack/esql/datasource/ndjson/NdJsonParseException.java @@ -0,0 +1,17 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.datasource.ndjson; + +import com.fasterxml.jackson.core.JsonParseException; +import com.fasterxml.jackson.core.JsonParser; + +public class NdJsonParseException extends JsonParseException { + public NdJsonParseException(JsonParser p, String msg) { + super(p, msg); + } +} diff --git a/x-pack/plugin/esql-datasource-ndjson/src/main/java/org/elasticsearch/xpack/esql/datasource/ndjson/NdJsonSchemaInferrer.java b/x-pack/plugin/esql-datasource-ndjson/src/main/java/org/elasticsearch/xpack/esql/datasource/ndjson/NdJsonSchemaInferrer.java new file mode 100644 index 0000000000000..d8ffccdb7cdc3 --- /dev/null +++ b/x-pack/plugin/esql-datasource-ndjson/src/main/java/org/elasticsearch/xpack/esql/datasource/ndjson/NdJsonSchemaInferrer.java @@ -0,0 +1,229 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.datasource.ndjson; + +import com.fasterxml.jackson.core.JsonParseException; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonToken; + +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; +import org.elasticsearch.xpack.esql.core.expression.Attribute; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.expression.Nullability; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.core.type.EsField; + +import java.io.IOException; +import java.io.InputStream; +import java.time.Instant; +import java.time.format.DateTimeParseException; +import java.util.ArrayList; +import java.util.EnumSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** + * Infers schema from NDJSON files by reading the first N lines. + * - Flattens nested objects using dot notation + * - Detects arrays as multi-value fields + * - Marks fields as nullable when null values are encountered + * + * Types: KEYWORD, LONG, DOUBLE, BOOLEAN, NULL. Number types are large to be on the safe side if the sample + * only shows small values. + * + * Missing: + * - Support for union types (e.g., string or integer) + * - Support for nested arrays (does it exist in ESQL?) + * - Support for complex types (e.g., date, ip) + */ +public class NdJsonSchemaInferrer { + + private static final Logger logger = LogManager.getLogger(NdJsonSchemaInferrer.class); + private static final int DEFAULT_SAMPLE_SIZE = 100; + + /** + * Infers schema from an NDJSON input stream. + */ + public static List inferSchema(InputStream inputStream) throws IOException { + return inferSchema(inputStream, DEFAULT_SAMPLE_SIZE); + } + + /** + * Infers schema from an NDJSON input stream, reading up to maxLines. + */ + public static List inferSchema(InputStream inputStream, int maxLines) throws IOException { + FieldInfo root = new FieldInfo(); + JsonParser parser = NdJsonUtils.JSON_FACTORY.createParser(inputStream); + try { + int lineCount = 0; + while (lineCount < maxLines) { + try { + if (parser.nextToken() == null) { + break; // End of stream + } + } catch (JsonParseException e) { + logger.warn("Malformed NDJSON at line {}: {}", lineCount, e); + inputStream = NdJsonUtils.moveToNextLine(parser, inputStream); + parser = NdJsonUtils.JSON_FACTORY.createParser(inputStream); + continue; + } + + try { + inferObjectSchema(parser, root); + lineCount++; + } catch (JsonParseException e) { + logger.warn("Malformed NDJSON at line {}: {}", lineCount, e); + inputStream = NdJsonUtils.moveToNextLine(parser, inputStream); + parser = NdJsonUtils.JSON_FACTORY.createParser(inputStream); + } + } + } finally { + parser.close(); + } + + // Convert FieldInfo map to Attribute list + List attributes = new ArrayList<>(); + buildSchema(root, null, attributes); + return attributes; + } + + private static void inferObjectSchema(JsonParser parser, FieldInfo object) throws IOException { + JsonToken token = parser.currentToken(); + if (token != JsonToken.START_OBJECT) { + throw new NdJsonParseException(parser, "Expected JSON object"); + } + while ((token = parser.nextToken()) != JsonToken.END_OBJECT) { + if (token != JsonToken.FIELD_NAME) { + throw new NdJsonParseException(parser, "Expected field name in object"); + } + var child = object.getChild(parser.getCurrentName()); + parser.nextToken(); + inferValueSchema(parser, child); + } + } + + private static void inferValueSchema(JsonParser parser, FieldInfo field) throws IOException { + switch (parser.currentToken()) { + case START_ARRAY -> { + field.isArray = true; + while (parser.nextToken() != JsonToken.END_ARRAY) { + inferValueSchema(parser, field); + } + } + // Keep in sync with NdJsonPageIterator.Decoder + case START_OBJECT -> inferObjectSchema(parser, field); + case VALUE_STRING -> { + try { + Instant.parse(parser.getText()); + field.addType(DataType.DATETIME); + } catch (DateTimeParseException e) { + field.addType(DataType.KEYWORD); + } + } + case VALUE_NUMBER_INT -> { + long value = parser.getLongValue(); + if (value >= Integer.MIN_VALUE && value <= Integer.MAX_VALUE) { + field.addType(DataType.INTEGER); + } else { + field.addType(DataType.LONG); + } + } // conservative size + case VALUE_NUMBER_FLOAT -> field.addType(DataType.DOUBLE); // conservative size + case VALUE_TRUE, VALUE_FALSE -> field.addType(DataType.BOOLEAN); + case VALUE_NULL -> field.addType(DataType.NULL); + // Ignore all other events + } + } + + /** Build the list of Attribute by recursively traversing the FieldInfo tree */ + private static void buildSchema(FieldInfo field, String parentName, List attributes) { + for (Map.Entry entry : field.children.entrySet()) { + // TODO: disallow dots in names (or replace them) as it may cause issues when decoding + var name = entry.getKey(); + var info = entry.getValue(); + if (parentName != null) { + name = parentName + "." + name; + } + + DataType dataType = info.resolveType(); + if (dataType != DataType.UNSUPPORTED) { + // Unsupported is used for nested object properties + attributes.add(attribute(name, dataType, info.types.contains(DataType.NULL))); + } + + if (info.children != null) { + buildSchema(info, name, attributes); + } + } + } + + public static Attribute attribute(String name, DataType type, boolean nullable) { + return new FieldAttribute( + Source.EMPTY, + null, + null, + name, + new EsField(name, type, Map.of(), false, null), + nullable ? Nullability.TRUE : Nullability.UNKNOWN, + null, + false + ); + } + + /** + * Field type information collected during schema inference. + */ + private static class FieldInfo { + final EnumSet types = EnumSet.noneOf(DataType.class); + boolean isArray = false; + Map children = null; + + FieldInfo getChild(String name) { + // TODO: limit depth + if (children == null) { + children = new LinkedHashMap<>(); + } + return children.computeIfAbsent(name, (_name) -> new FieldInfo()); + } + + void addType(DataType type) { + types.add(type); + } + + DataType resolveType() { + if (types.isEmpty()) { + // Can happen with parent and always-empty array + return DataType.UNSUPPORTED; + } + if (types.size() == 1) { + return types.iterator().next(); + } + // Multiple types - for now, use the widest type + // TODO: Create MultiTypeEsField for proper union type support + if (types.contains(DataType.DATETIME)) { + return DataType.DATETIME; + } + if (types.contains(DataType.KEYWORD)) { + return DataType.KEYWORD; + } + if (types.contains(DataType.DOUBLE)) { + return DataType.DOUBLE; + } + if (types.contains(DataType.LONG)) { + return DataType.LONG; + } + if (types.contains(DataType.INTEGER)) { + return DataType.INTEGER; + } + return types.iterator().next(); + } + } +} diff --git a/x-pack/plugin/esql-datasource-ndjson/src/main/java/org/elasticsearch/xpack/esql/datasource/ndjson/NdJsonUtils.java b/x-pack/plugin/esql-datasource-ndjson/src/main/java/org/elasticsearch/xpack/esql/datasource/ndjson/NdJsonUtils.java new file mode 100644 index 0000000000000..1a72a02d1fd26 --- /dev/null +++ b/x-pack/plugin/esql-datasource-ndjson/src/main/java/org/elasticsearch/xpack/esql/datasource/ndjson/NdJsonUtils.java @@ -0,0 +1,91 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.datasource.ndjson; + +import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonParser; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.SequenceInputStream; + +class NdJsonUtils { + static final JsonFactory JSON_FACTORY = new JsonFactory(); + + /** + * Given a parser and the stream it reads from, restart parsing at the next line. + * @param parser the JSON parser + * @param input the stream the parser reads from + * @return a new stream to read from + */ + static InputStream moveToNextLine(JsonParser parser, InputStream input) throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + parser.releaseBuffered(baos); + parser.close(); + + if (baos.size() > 0) { + if (input instanceof RecoveredStream recoveredStream) { + recoveredStream.prependReleasedBuffer(baos); + } else { + input = new RecoveredStream(baos, input); + } + } + + int c; + while ((c = input.read()) != -1) { + if (c == '\n' || c == '\r') { + break; + } + } + + return input; + } + + private static class RecoveredStream extends InputStream { + private SequenceInputStream delegate; + // Released from Jackson's internal buffers + private ByteArrayInputStream releasedStream; + // Original stream + private final InputStream baseStream; + + RecoveredStream(ByteArrayOutputStream buffer, InputStream baseStream) { + this.releasedStream = new ByteArrayInputStream(buffer.toByteArray()); + this.baseStream = baseStream; + this.delegate = new SequenceInputStream(releasedStream, baseStream); + } + + void prependReleasedBuffer(ByteArrayOutputStream buffer) throws IOException { + // Re-add any previously released bytes + releasedStream.transferTo(buffer); + this.releasedStream = new ByteArrayInputStream(buffer.toByteArray()); + this.delegate = new SequenceInputStream(releasedStream, baseStream); + } + + @Override + public int read() throws IOException { + return delegate.read(); + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + return delegate.read(b, off, len); + } + + @Override + public int available() throws IOException { + return delegate.available(); + } + + @Override + public void close() throws IOException { + delegate.close(); + } + } +} diff --git a/x-pack/plugin/esql-datasource-ndjson/src/main/resources/META-INF/services/org.elasticsearch.xpack.esql.datasources.spi.DataSourcePlugin b/x-pack/plugin/esql-datasource-ndjson/src/main/resources/META-INF/services/org.elasticsearch.xpack.esql.datasources.spi.DataSourcePlugin new file mode 100644 index 0000000000000..7b3e62e3d4515 --- /dev/null +++ b/x-pack/plugin/esql-datasource-ndjson/src/main/resources/META-INF/services/org.elasticsearch.xpack.esql.datasources.spi.DataSourcePlugin @@ -0,0 +1,8 @@ +# +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License +# 2.0; you may not use this file except in compliance with the Elastic License +# 2.0. +# + +org.elasticsearch.xpack.esql.datasource.ndjson.NdJsonDataSourcePlugin diff --git a/x-pack/plugin/esql-datasource-ndjson/src/test/java/org/elasticsearch/xpack/esql/datasource/ndjson/BytesStorageObject.java b/x-pack/plugin/esql-datasource-ndjson/src/test/java/org/elasticsearch/xpack/esql/datasource/ndjson/BytesStorageObject.java new file mode 100644 index 0000000000000..a28807271704f --- /dev/null +++ b/x-pack/plugin/esql-datasource-ndjson/src/test/java/org/elasticsearch/xpack/esql/datasource/ndjson/BytesStorageObject.java @@ -0,0 +1,59 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.datasource.ndjson; + +import org.elasticsearch.xpack.esql.datasources.spi.StorageObject; +import org.elasticsearch.xpack.esql.datasources.spi.StoragePath; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.time.Instant; + +/** + * Byte-array backed storage object for testing + */ +public class BytesStorageObject implements StorageObject { + private final StoragePath path; + private final byte[] bytes; + + public BytesStorageObject(String path, byte[] bytes) { + this.bytes = bytes; + this.path = StoragePath.of(path); + } + + @Override + public InputStream newStream() throws IOException { + return new ByteArrayInputStream(bytes); + } + + @Override + public InputStream newStream(long position, long length) throws IOException { + return new ByteArrayInputStream(bytes, (int) position, (int) length); + } + + @Override + public long length() throws IOException { + return bytes.length; + } + + @Override + public Instant lastModified() throws IOException { + return Instant.now(); + } + + @Override + public boolean exists() throws IOException { + return true; + } + + @Override + public StoragePath path() { + return path; + } +} diff --git a/x-pack/plugin/esql-datasource-ndjson/src/test/java/org/elasticsearch/xpack/esql/datasource/ndjson/NdJsonPageIteratorTests.java b/x-pack/plugin/esql-datasource-ndjson/src/test/java/org/elasticsearch/xpack/esql/datasource/ndjson/NdJsonPageIteratorTests.java new file mode 100644 index 0000000000000..8130aaf1d684c --- /dev/null +++ b/x-pack/plugin/esql-datasource-ndjson/src/test/java/org/elasticsearch/xpack/esql/datasource/ndjson/NdJsonPageIteratorTests.java @@ -0,0 +1,114 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.datasource.ndjson; + +import org.apache.commons.io.IOUtils; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.breaker.NoopCircuitBreaker; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.BooleanBlock; +import org.elasticsearch.compute.data.ConstantNullBlock; +import org.elasticsearch.compute.data.DoubleBlock; +import org.elasticsearch.compute.data.IntBlock; +import org.elasticsearch.compute.data.LongBlock; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.datasources.spi.SourceMetadata; +import org.hamcrest.Matchers; + +import java.io.IOException; +import java.time.Instant; +import java.util.ArrayList; +import java.util.List; + +public class NdJsonPageIteratorTests extends ESTestCase { + + private BlockFactory blockFactory; + + @Override + public void setUp() throws Exception { + super.setUp(); + blockFactory = BlockFactory.getInstance(new NoopCircuitBreaker("test-noop"), BigArrays.NON_RECYCLING_INSTANCE); + } + + public void testIterator() throws IOException { + var reader = new NdJsonFormatReader(blockFactory); + var object = new BytesStorageObject("classpath://employees.ndjson", IOUtils.resourceToByteArray("/employees.ndjson")); + + List sizes = new ArrayList<>(); + try (var iterator = reader.read(object, List.of("still_hired", "emp_no", "birth_date", "non_existing_field"), 42)) { + while (iterator.hasNext()) { + var page = iterator.next(); + assertEquals(4, page.getBlockCount()); + checkBlockSizes(page); + + // Make sure blocks are returned in the order requested, with nulls for unknown columns + assertThat(page.getBlock(0), Matchers.instanceOf(BooleanBlock.class)); + assertThat(page.getBlock(1), Matchers.instanceOf(IntBlock.class)); + assertThat(page.getBlock(2), Matchers.instanceOf(LongBlock.class)); + assertThat(page.getBlock(3), Matchers.instanceOf(ConstantNullBlock.class)); + + sizes.add(page.getBlock(0).getPositionCount()); + } + } + + assertEquals(List.of(42, 42, 16), sizes); // Total 100 + } + + public void testSampleData() throws Exception { + var reader = new NdJsonFormatReader(blockFactory); + var object = new BytesStorageObject("classpath://employees.ndjson", IOUtils.resourceToByteArray("/employees.ndjson")); + + var metadata = reader.metadata(object); + var schema = metadata.schema(); + + assertEquals("birth_date", schema.get(0).name()); + assertEquals(DataType.DATETIME, schema.get(0).dataType()); + + assertEquals("emp_no", schema.get(1).name()); + assertEquals(DataType.INTEGER, schema.get(1).dataType()); + + assertEquals("still_hired", schema.get(9).name()); + assertEquals(DataType.BOOLEAN, schema.get(9).dataType()); + + try (var iterator = reader.read(object, List.of(), 1000)) { + var page = iterator.next(); + checkBlockSizes(page); + + LongBlock birthDate = page.getBlock(blockIdx(metadata, "birth_date")); + IntBlock empNo = page.getBlock(blockIdx(metadata, "emp_no")); + BooleanBlock stillHired = page.getBlock(blockIdx(metadata, "still_hired")); + DoubleBlock height = page.getBlock(blockIdx(metadata, "height")); + + var bytesRef = new BytesRef(); + + assertEquals("1963-06-01T00:00:00Z", Instant.ofEpochMilli(birthDate.getLong(9)).toString()); + assertEquals(10010, empNo.getInt(9)); + assertFalse(stillHired.getBoolean(9)); + assertEquals(1.70, height.getDouble(9), 0.0001); + } + } + + private int blockIdx(SourceMetadata meta, String name) { + for (int i = 0; i < meta.schema().size(); i++) { + if (meta.schema().get(i).name().equals(name)) { + return i; + } + } + throw new IllegalArgumentException("Column '" + name + "' not found in metadata"); + } + + private void checkBlockSizes(Page page) { + int size = page.getPositionCount(); + for (int i = 0; i < page.getBlockCount(); i++) { + assertEquals("Block[" + i + "] position count", size, page.getBlock(i).getPositionCount()); + } + } +} diff --git a/x-pack/plugin/esql-datasource-ndjson/src/test/java/org/elasticsearch/xpack/esql/datasource/ndjson/NdJsonSchemaInferrerTests.java b/x-pack/plugin/esql-datasource-ndjson/src/test/java/org/elasticsearch/xpack/esql/datasource/ndjson/NdJsonSchemaInferrerTests.java new file mode 100644 index 0000000000000..311fbd2ed31ba --- /dev/null +++ b/x-pack/plugin/esql-datasource-ndjson/src/test/java/org/elasticsearch/xpack/esql/datasource/ndjson/NdJsonSchemaInferrerTests.java @@ -0,0 +1,153 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.datasource.ndjson; + +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.esql.core.expression.Attribute; +import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.expression.Nullability; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.core.type.EsField; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Map; + +public class NdJsonSchemaInferrerTests extends ESTestCase { + + private Attribute field(String name, DataType type, boolean nullable) { + return new FieldAttribute( + Source.EMPTY, + null, + null, + name, + new EsField(name, type, Map.of(), false, null), + nullable ? Nullability.TRUE : Nullability.UNKNOWN, + null, + false + ); + } + + private Attribute field(String name, DataType type) { + return field(name, type, false); + } + + /** + * Test case: Verifies the correct schema inference for lines containing valid flat JSON objects. + */ + public void testInferSchemaForFlatJson() throws IOException { + check(""" + {"name": "John", "age": 30} + {"name": "Jane", "age": 25} + """, field("name", DataType.KEYWORD), field("age", DataType.INTEGER)); + } + + /** + * Test case: Verifies the schema inference properly handles nested JSON objects. + */ + public void testInferSchemaForNestedJson() throws IOException { + check(""" + {"user": {"name": "John", "age": 30, "long_value": 12345678901234}} + {"user": {"name": "Jane", "age": 25}} + """, field("user.name", DataType.KEYWORD), field("user.age", DataType.INTEGER), field("user.long_value", DataType.LONG)); + } + + /** + * Test case: Ensures the method ignores empty lines and invalid JSON lines. + */ + public void testIgnoreEmptyAndInvalidLines() throws IOException { + check(""" + {"name": "John", "age": 30} + not_json + + {"name": "Jane", "age": null} + """, field("name", DataType.KEYWORD), field("age", DataType.INTEGER, true)); + } + + /** + * Test case: check line ending variations + */ + public void testLineEndingVariations() throws IOException { + check( + "{\"name\": \"John\", \"age\": 30}\nnot_json\n\n{\"name\": \"Jane\", \"age\": null}", + field("name", DataType.KEYWORD), + field("age", DataType.INTEGER, true) + ); + + check( + "{\"name\": \"John\", \"age\": 30}\nnot_json\r\r{\"name\": \"Jane\", \"age\": null}", + field("name", DataType.KEYWORD), + field("age", DataType.INTEGER, true) + ); + + check( + "{\"name\": \"John\", \"age\": 30}\nnot_json\r\n\n\r{\"name\": \"Jane\", \"age\": null}", + field("name", DataType.KEYWORD), + field("age", DataType.INTEGER, true) + ); + } + + /** + * Test case: Verifies the inference correctly handles arrays in JSON objects. + */ + public void testInferSchemaForJsonWithArrays() throws IOException { + check(""" + {"scores": [85, 90, 95]} + {"scores": [70, null]} + """, field("scores", DataType.INTEGER, true)); + } + + /** + * Test case: Ensures correct schema inference when all fields are null. + */ + public void testInferSchemaForNullFields() throws IOException { + check(""" + {"name": null, "age": null} + {"name": null, "age": null} + """, field("name", DataType.NULL, true), field("age", DataType.NULL, true)); + } + + /** + * Test case: Verifies schema inference respects the maxLines parameter. + */ + public void testInferSchemaWithMaxLinesLimit() throws IOException { + check(""" + {"name": "John", "age": 30} + {"name": "Jane", "age": 25} + {"name": "Smith", "age": 40} + """, field("name", DataType.KEYWORD), field("age", DataType.INTEGER)); + } + + /** + * Test case: Verifies the correct handling of mixed field types. + */ + public void testInferSchemaForMixedTypeFields() throws IOException { + check(""" + {"mixed": 42} + {"mixed": "text"} + {"mixed": 3.14} + """, field("mixed", DataType.KEYWORD)); + } + + private void check(String ndjson, Attribute... expected) throws IOException { + try (ByteArrayInputStream inputStream = new ByteArrayInputStream(ndjson.getBytes(StandardCharsets.UTF_8))) { + List result = NdJsonSchemaInferrer.inferSchema(inputStream); + + assertEquals(expected.length, result.size()); + for (int i = 0; i < expected.length; i++) { + String name = result.get(i).name(); + assertEquals(name + " name", expected[i].name(), name); + assertEquals(name + " type", expected[i].dataType(), result.get(i).dataType()); + assertEquals(name + " nullable", expected[i].nullable(), result.get(i).nullable()); + } + } + } +} diff --git a/x-pack/plugin/esql-datasource-ndjson/src/test/resources/employees.ndjson b/x-pack/plugin/esql-datasource-ndjson/src/test/resources/employees.ndjson new file mode 100644 index 0000000000000..f279e8a44b2df --- /dev/null +++ b/x-pack/plugin/esql-datasource-ndjson/src/test/resources/employees.ndjson @@ -0,0 +1,100 @@ +{"birth_date":"1953-09-02T00:00:00Z","emp_no":10001,"first_name":"Georgi","gender":"M","hire_date":"1986-06-26T00:00:00Z","languages":2,"last_name":"Facello","salary":57305,"height":2.03,"still_hired":true,"avg_worked_seconds":268728049,"job_positions":["Senior Python Developer","Accountant"],"is_rehired":[false,true],"salary_change":1.19} +{"birth_date":"1964-06-02T00:00:00Z","emp_no":10002,"first_name":"Bezalel","gender":"F","hire_date":"1985-11-21T00:00:00Z","languages":5,"last_name":"Simmel","salary":56371,"height":2.08,"still_hired":true,"avg_worked_seconds":328922887,"job_positions":"Senior Team Lead","is_rehired":[false,false],"salary_change":[-7.23,11.17]} +{"birth_date":"1959-12-03T00:00:00Z","emp_no":10003,"first_name":"Parto","gender":"M","hire_date":"1986-08-28T00:00:00Z","languages":4,"last_name":"Bamford","salary":61805,"height":1.83,"still_hired":false,"avg_worked_seconds":200296405,"salary_change":[14.68,12.82]} +{"birth_date":"1954-05-01T00:00:00Z","emp_no":10004,"first_name":"Chirstian","gender":"M","hire_date":"1986-12-01T00:00:00Z","languages":5,"last_name":"Koblick","salary":36174,"height":1.78,"still_hired":true,"avg_worked_seconds":311267831,"job_positions":["Reporting Analyst","Tech Lead","Head Human Resources","Support Engineer"],"is_rehired":true,"salary_change":[3.65,-0.35,1.13,13.48]} +{"birth_date":"1955-01-21T00:00:00Z","emp_no":10005,"first_name":"Kyoichi","gender":"M","hire_date":"1989-09-12T00:00:00Z","languages":1,"last_name":"Maliniak","salary":63528,"height":2.05,"still_hired":true,"avg_worked_seconds":244294991,"is_rehired":[false,false,false,true],"salary_change":[-2.14,13.07]} +{"birth_date":"1953-04-20T00:00:00Z","emp_no":10006,"first_name":"Anneke","gender":"F","hire_date":"1989-06-02T00:00:00Z","languages":3,"last_name":"Preusig","salary":60335,"height":1.56,"still_hired":false,"avg_worked_seconds":372957040,"job_positions":["Tech Lead","Principal Support Engineer","Senior Team Lead"],"salary_change":-3.90} +{"birth_date":"1957-05-23T00:00:00Z","emp_no":10007,"first_name":"Tzvetan","gender":"F","hire_date":"1989-02-10T00:00:00Z","languages":4,"last_name":"Zielinski","salary":74572,"height":1.70,"still_hired":true,"avg_worked_seconds":393084805,"is_rehired":[true,false,true,false],"salary_change":[-7.06,1.99,0.57]} +{"birth_date":"1958-02-19T00:00:00Z","emp_no":10008,"first_name":"Saniya","gender":"M","hire_date":"1994-09-15T00:00:00Z","languages":2,"last_name":"Kalloufi","salary":43906,"height":2.10,"still_hired":true,"avg_worked_seconds":283074758,"job_positions":["Senior Python Developer","Junior Developer","Purchase Manager","Internship"],"is_rehired":[true,false],"salary_change":[12.68,3.54,0.75,-2.92]} +{"birth_date":"1952-04-19T00:00:00Z","emp_no":10009,"first_name":"Sumant","gender":"F","hire_date":"1985-02-18T00:00:00Z","languages":1,"last_name":"Peac","salary":66174,"height":1.85,"still_hired":false,"avg_worked_seconds":236805489,"job_positions":["Senior Python Developer","Internship"]} +{"birth_date":"1963-06-01T00:00:00Z","emp_no":10010,"first_name":"Duangkaew","hire_date":"1989-08-24T00:00:00Z","languages":4,"last_name":"Piveteau","salary":45797,"height":1.70,"still_hired":false,"avg_worked_seconds":315236372,"job_positions":["Architect","Reporting Analyst","Tech Lead","Purchase Manager"],"is_rehired":[true,true,false,false],"salary_change":[5.05,-6.77,4.69,12.15]} +{"birth_date":"1953-11-07T00:00:00Z","emp_no":10011,"first_name":"Mary","hire_date":"1990-01-22T00:00:00Z","languages":5,"last_name":"Sluis","salary":31120,"height":1.50,"still_hired":true,"avg_worked_seconds":239615525,"job_positions":["Architect","Reporting Analyst","Tech Lead","Senior Team Lead"],"is_rehired":[true,true],"salary_change":[10.35,-7.82,8.73,3.48]} +{"birth_date":"1960-10-04T00:00:00Z","emp_no":10012,"first_name":"Patricio","hire_date":"1992-12-18T00:00:00Z","languages":5,"last_name":"Bridgland","salary":48942,"height":1.97,"still_hired":false,"avg_worked_seconds":365510850,"job_positions":["Head Human Resources","Accountant"],"is_rehired":[false,true,true,false],"salary_change":0.04} +{"birth_date":"1963-06-07T00:00:00Z","emp_no":10013,"first_name":"Eberhardt","hire_date":"1985-10-20T00:00:00Z","languages":1,"last_name":"Terkki","salary":48735,"height":1.94,"still_hired":true,"avg_worked_seconds":253864340,"job_positions":"Reporting Analyst","is_rehired":[true,true]} +{"birth_date":"1956-02-12T00:00:00Z","emp_no":10014,"first_name":"Berni","hire_date":"1987-03-11T00:00:00Z","languages":5,"last_name":"Genin","salary":37137,"height":1.99,"still_hired":false,"avg_worked_seconds":225049139,"job_positions":["Reporting Analyst","Data Scientist","Head Human Resources"],"salary_change":[-1.89,9.07]} +{"birth_date":"1959-08-19T00:00:00Z","emp_no":10015,"first_name":"Guoxiang","hire_date":"1987-07-02T00:00:00Z","languages":5,"last_name":"Nooteboom","salary":25324,"height":1.66,"still_hired":true,"avg_worked_seconds":390266432,"job_positions":["Principal Support Engineer","Junior Developer","Head Human Resources","Support Engineer"],"is_rehired":[true,false,false,false],"salary_change":[14.25,12.40]} +{"birth_date":"1961-05-02T00:00:00Z","emp_no":10016,"first_name":"Kazuhito","hire_date":"1995-01-27T00:00:00Z","languages":2,"last_name":"Cappelletti","salary":61358,"height":1.54,"still_hired":false,"avg_worked_seconds":253029411,"job_positions":["Reporting Analyst","Python Developer","Accountant","Purchase Manager"],"is_rehired":[false,false],"salary_change":[-5.18,7.69]} +{"birth_date":"1958-07-06T00:00:00Z","emp_no":10017,"first_name":"Cristinel","hire_date":"1993-08-03T00:00:00Z","languages":2,"last_name":"Bouloucos","salary":58715,"height":1.74,"still_hired":false,"avg_worked_seconds":236703986,"job_positions":["Data Scientist","Head Human Resources","Purchase Manager"],"is_rehired":[true,false,true,true],"salary_change":-6.33} +{"birth_date":"1954-06-19T00:00:00Z","emp_no":10018,"first_name":"Kazuhide","hire_date":"1987-04-03T00:00:00Z","languages":2,"last_name":"Peha","salary":56760,"height":1.97,"still_hired":false,"avg_worked_seconds":309604079,"job_positions":"Junior Developer","is_rehired":[false,false,true,true],"salary_change":[-1.64,11.51,-5.32]} +{"birth_date":"1953-01-23T00:00:00Z","emp_no":10019,"first_name":"Lillian","hire_date":"1999-04-30T00:00:00Z","languages":1,"last_name":"Haddadi","salary":73717,"height":2.06,"still_hired":false,"avg_worked_seconds":342855721,"job_positions":"Purchase Manager","is_rehired":[false,false],"salary_change":[-6.84,8.42,-7.26]} +{"birth_date":"1952-12-24T00:00:00Z","emp_no":10020,"first_name":"Mayuko","gender":"M","hire_date":"1991-01-26T00:00:00Z","last_name":"Warwick","salary":40031,"height":1.41,"still_hired":false,"avg_worked_seconds":373309605,"job_positions":"Tech Lead","is_rehired":[true,true,false],"salary_change":-5.81} +{"birth_date":"1960-02-20T00:00:00Z","emp_no":10021,"first_name":"Ramzi","gender":"M","hire_date":"1988-02-10T00:00:00Z","last_name":"Erde","salary":60408,"height":1.47,"still_hired":false,"avg_worked_seconds":287654610,"job_positions":"Support Engineer","is_rehired":true} +{"birth_date":"1952-07-08T00:00:00Z","emp_no":10022,"first_name":"Shahaf","gender":"M","hire_date":"1995-08-22T00:00:00Z","last_name":"Famili","salary":48233,"height":1.82,"still_hired":false,"avg_worked_seconds":233521306,"job_positions":["Reporting Analyst","Data Scientist","Python Developer","Internship"],"is_rehired":[true,false],"salary_change":[12.09,2.85]} +{"birth_date":"1953-09-29T00:00:00Z","emp_no":10023,"first_name":"Bojan","gender":"F","hire_date":"1989-12-17T00:00:00Z","last_name":"Montemayor","salary":47896,"height":1.75,"still_hired":true,"avg_worked_seconds":330870342,"job_positions":["Accountant","Support Engineer","Purchase Manager"],"is_rehired":[true,true,false],"salary_change":[14.63,0.80]} +{"birth_date":"1958-09-05T00:00:00Z","emp_no":10024,"first_name":"Suzette","gender":"F","hire_date":"1997-05-19T00:00:00Z","last_name":"Pettey","salary":64675,"height":2.08,"still_hired":true,"avg_worked_seconds":367717671,"job_positions":"Junior Developer","is_rehired":[true,true,true,true]} +{"birth_date":"1958-10-31T00:00:00Z","emp_no":10025,"first_name":"Prasadram","gender":"M","hire_date":"1987-08-17T00:00:00Z","last_name":"Heyers","salary":47411,"height":1.87,"still_hired":false,"avg_worked_seconds":371270797,"job_positions":"Accountant","is_rehired":[true,false],"salary_change":[-4.33,-2.90,12.06,-3.46]} +{"birth_date":"1953-04-03T00:00:00Z","emp_no":10026,"first_name":"Yongqiao","gender":"M","hire_date":"1995-03-20T00:00:00Z","last_name":"Berztiss","salary":28336,"height":2.10,"still_hired":true,"avg_worked_seconds":359208133,"job_positions":"Reporting Analyst","is_rehired":[false,true],"salary_change":[-7.37,10.62,11.20]} +{"birth_date":"1962-07-10T00:00:00Z","emp_no":10027,"first_name":"Divier","gender":"F","hire_date":"1989-07-07T00:00:00Z","last_name":"Reistad","salary":73851,"height":1.53,"still_hired":false,"avg_worked_seconds":374037782,"job_positions":"Senior Python Developer","is_rehired":false} +{"birth_date":"1963-11-26T00:00:00Z","emp_no":10028,"first_name":"Domenick","gender":"M","hire_date":"1991-10-22T00:00:00Z","last_name":"Tempesti","salary":39356,"height":2.07,"still_hired":true,"avg_worked_seconds":226435054,"job_positions":["Tech Lead","Python Developer","Accountant","Internship"],"is_rehired":[true,false,false,true]} +{"birth_date":"1956-12-13T00:00:00Z","emp_no":10029,"first_name":"Otmar","gender":"M","hire_date":"1985-11-20T00:00:00Z","last_name":"Herbst","salary":74999,"height":1.99,"still_hired":false,"avg_worked_seconds":257694181,"job_positions":["Senior Python Developer","Data Scientist","Principal Support Engineer"],"is_rehired":true,"salary_change":[-0.32,-1.90,-8.19]} +{"birth_date":"1958-07-14T00:00:00Z","emp_no":10030,"gender":"M","hire_date":"1994-02-17T00:00:00Z","languages":3,"last_name":"Demeyer","salary":67492,"height":1.92,"still_hired":false,"avg_worked_seconds":394597613,"job_positions":["Tech Lead","Data Scientist","Senior Team Lead"],"is_rehired":[true,false,false],"salary_change":-0.40} +{"birth_date":"1959-01-27T00:00:00Z","emp_no":10031,"gender":"M","hire_date":"1991-09-01T00:00:00Z","languages":4,"last_name":"Joslin","salary":37716,"height":1.68,"still_hired":false,"avg_worked_seconds":348545109,"job_positions":["Architect","Senior Python Developer","Purchase Manager","Senior Team Lead"],"is_rehired":false} +{"birth_date":"1960-08-09T00:00:00Z","emp_no":10032,"gender":"F","hire_date":"1990-06-20T00:00:00Z","languages":3,"last_name":"Reistad","salary":62233,"height":2.10,"still_hired":false,"avg_worked_seconds":277622619,"job_positions":["Architect","Senior Python Developer","Junior Developer","Purchase Manager"],"is_rehired":[false,false],"salary_change":[9.32,-4.92]} +{"birth_date":"1956-11-14T00:00:00Z","emp_no":10033,"gender":"M","hire_date":"1987-03-18T00:00:00Z","languages":1,"last_name":"Merlo","salary":70011,"height":1.63,"still_hired":false,"avg_worked_seconds":208374744,"is_rehired":true} +{"birth_date":"1962-12-29T00:00:00Z","emp_no":10034,"gender":"M","hire_date":"1988-09-21T00:00:00Z","languages":1,"last_name":"Swan","salary":39878,"height":1.46,"still_hired":false,"avg_worked_seconds":214393176,"job_positions":["Business Analyst","Data Scientist","Python Developer","Accountant"],"is_rehired":false,"salary_change":-8.46} +{"birth_date":"1953-02-08T00:00:00Z","emp_no":10035,"gender":"M","hire_date":"1988-09-05T00:00:00Z","languages":5,"last_name":"Chappelet","salary":25945,"height":1.81,"still_hired":false,"avg_worked_seconds":203838153,"job_positions":["Senior Python Developer","Data Scientist"],"is_rehired":false,"salary_change":[-2.54,-6.58]} +{"birth_date":"1959-08-10T00:00:00Z","emp_no":10036,"gender":"M","hire_date":"1992-01-03T00:00:00Z","languages":4,"last_name":"Portugali","salary":60781,"height":1.61,"still_hired":false,"avg_worked_seconds":305493131,"job_positions":"Senior Python Developer","is_rehired":[true,false,false]} +{"birth_date":"1963-07-22T00:00:00Z","emp_no":10037,"gender":"M","hire_date":"1990-12-05T00:00:00Z","languages":2,"last_name":"Makrucki","salary":37691,"height":2.00,"still_hired":true,"avg_worked_seconds":359217000,"job_positions":["Senior Python Developer","Tech Lead","Accountant"],"is_rehired":false,"salary_change":-7.08} +{"birth_date":"1960-07-20T00:00:00Z","emp_no":10038,"gender":"M","hire_date":"1989-09-20T00:00:00Z","languages":4,"last_name":"Lortz","salary":35222,"height":1.53,"still_hired":true,"avg_worked_seconds":314036411,"job_positions":["Senior Python Developer","Python Developer","Support Engineer"]} +{"birth_date":"1959-10-01T00:00:00Z","emp_no":10039,"gender":"M","hire_date":"1988-01-19T00:00:00Z","languages":2,"last_name":"Brender","salary":36051,"height":1.55,"still_hired":false,"avg_worked_seconds":243221262,"job_positions":["Business Analyst","Python Developer","Principal Support Engineer"],"is_rehired":[true,true],"salary_change":-6.90} +{"emp_no":10040,"first_name":"Weiyi","gender":"F","hire_date":"1993-02-14T00:00:00Z","languages":4,"last_name":"Meriste","salary":37112,"height":1.90,"still_hired":false,"avg_worked_seconds":244478622,"job_positions":"Principal Support Engineer","is_rehired":[true,false,true,true],"salary_change":[6.97,14.74,-8.94,1.92]} +{"emp_no":10041,"first_name":"Uri","gender":"F","hire_date":"1989-11-12T00:00:00Z","languages":1,"last_name":"Lenart","salary":56415,"height":1.75,"still_hired":false,"avg_worked_seconds":287789442,"job_positions":["Data Scientist","Head Human Resources","Internship","Senior Team Lead"],"salary_change":[9.21,0.05,7.29,-2.94]} +{"emp_no":10042,"first_name":"Magy","gender":"F","hire_date":"1993-03-21T00:00:00Z","languages":3,"last_name":"Stamatiou","salary":30404,"height":1.44,"still_hired":true,"avg_worked_seconds":246355863,"job_positions":["Architect","Business Analyst","Junior Developer","Internship"],"salary_change":[-9.28,9.42]} +{"emp_no":10043,"first_name":"Yishay","gender":"M","hire_date":"1990-10-20T00:00:00Z","languages":1,"last_name":"Tzvieli","salary":34341,"height":1.52,"still_hired":true,"avg_worked_seconds":287222180,"job_positions":["Data Scientist","Python Developer","Support Engineer"],"is_rehired":[false,true,true],"salary_change":[-5.17,4.62,7.42]} +{"emp_no":10044,"first_name":"Mingsen","gender":"F","hire_date":"1994-05-21T00:00:00Z","languages":1,"last_name":"Casley","salary":39728,"height":2.06,"still_hired":false,"avg_worked_seconds":387408356,"job_positions":["Tech Lead","Principal Support Engineer","Accountant","Support Engineer"],"is_rehired":[true,true],"salary_change":8.09} +{"emp_no":10045,"first_name":"Moss","gender":"M","hire_date":"1989-09-02T00:00:00Z","languages":3,"last_name":"Shanbhogue","salary":74970,"height":1.70,"still_hired":false,"avg_worked_seconds":371418933,"job_positions":["Principal Support Engineer","Junior Developer","Accountant","Purchase Manager"],"is_rehired":[true,false]} +{"emp_no":10046,"first_name":"Lucien","gender":"M","hire_date":"1992-06-20T00:00:00Z","languages":4,"last_name":"Rosenbaum","salary":50064,"height":1.52,"still_hired":true,"avg_worked_seconds":302353405,"job_positions":["Principal Support Engineer","Junior Developer","Head Human Resources","Internship"],"is_rehired":[true,true,false,true],"salary_change":2.39} +{"emp_no":10047,"first_name":"Zvonko","gender":"M","hire_date":"1989-03-31T00:00:00Z","languages":4,"last_name":"Nyanchama","salary":42716,"height":1.52,"still_hired":true,"avg_worked_seconds":306369346,"job_positions":["Architect","Data Scientist","Principal Support Engineer","Senior Team Lead"],"is_rehired":true,"salary_change":[-6.36,12.12]} +{"emp_no":10048,"first_name":"Florian","gender":"M","hire_date":"1985-02-24T00:00:00Z","languages":3,"last_name":"Syrotiuk","salary":26436,"height":2.00,"still_hired":false,"avg_worked_seconds":248451647,"job_positions":"Internship","is_rehired":[true,true]} +{"emp_no":10049,"first_name":"Basil","gender":"F","hire_date":"1992-05-04T00:00:00Z","languages":5,"last_name":"Tramer","salary":37853,"height":1.52,"still_hired":true,"avg_worked_seconds":320725709,"job_positions":["Senior Python Developer","Business Analyst"],"salary_change":-1.05} +{"birth_date":"1958-05-21T00:00:00Z","emp_no":10050,"first_name":"Yinghua","gender":"M","hire_date":"1990-12-25T00:00:00Z","languages":2,"last_name":"Dredge","salary":43026,"height":1.96,"still_hired":true,"avg_worked_seconds":242731798,"job_positions":["Reporting Analyst","Junior Developer","Accountant","Support Engineer"],"is_rehired":true,"salary_change":[8.70,10.94]} +{"birth_date":"1953-07-28T00:00:00Z","emp_no":10051,"first_name":"Hidefumi","gender":"M","hire_date":"1992-10-15T00:00:00Z","languages":3,"last_name":"Caine","salary":58121,"height":1.89,"still_hired":true,"avg_worked_seconds":374753122,"job_positions":["Business Analyst","Accountant","Purchase Manager"]} +{"birth_date":"1961-02-26T00:00:00Z","emp_no":10052,"first_name":"Heping","gender":"M","hire_date":"1988-05-21T00:00:00Z","languages":1,"last_name":"Nitsch","salary":55360,"height":1.79,"still_hired":true,"avg_worked_seconds":299654717,"is_rehired":[true,true,false],"salary_change":[-0.55,-1.89,-4.22,-6.03]} +{"birth_date":"1954-09-13T00:00:00Z","emp_no":10053,"first_name":"Sanjiv","gender":"F","hire_date":"1986-02-04T00:00:00Z","languages":3,"last_name":"Zschoche","salary":54462,"height":1.58,"still_hired":false,"avg_worked_seconds":368103911,"job_positions":"Support Engineer","is_rehired":[true,false,true,false],"salary_change":[-7.67,-3.25]} +{"birth_date":"1957-04-04T00:00:00Z","emp_no":10054,"first_name":"Mayumi","gender":"M","hire_date":"1995-03-13T00:00:00Z","languages":4,"last_name":"Schueller","salary":65367,"height":1.82,"still_hired":false,"avg_worked_seconds":297441693,"job_positions":"Principal Support Engineer","is_rehired":[false,false]} +{"birth_date":"1956-06-06T00:00:00Z","emp_no":10055,"first_name":"Georgy","gender":"M","hire_date":"1992-04-27T00:00:00Z","languages":5,"last_name":"Dredge","salary":49281,"height":2.04,"still_hired":false,"avg_worked_seconds":283157844,"job_positions":["Senior Python Developer","Head Human Resources","Internship","Support Engineer"],"is_rehired":[false,false,true],"salary_change":[7.34,12.99,3.17]} +{"birth_date":"1961-09-01T00:00:00Z","emp_no":10056,"first_name":"Brendon","gender":"F","hire_date":"1990-02-01T00:00:00Z","languages":2,"last_name":"Bernini","salary":33370,"height":1.57,"still_hired":true,"avg_worked_seconds":349086555,"job_positions":"Senior Team Lead","is_rehired":[true,false,false],"salary_change":[10.99,-5.17]} +{"birth_date":"1954-05-30T00:00:00Z","emp_no":10057,"first_name":"Ebbe","gender":"F","hire_date":"1992-01-15T00:00:00Z","languages":4,"last_name":"Callaway","salary":27215,"height":1.59,"still_hired":true,"avg_worked_seconds":324356269,"job_positions":["Python Developer","Head Human Resources"],"salary_change":[-6.73,-2.43,-5.27,1.03]} +{"birth_date":"1954-10-01T00:00:00Z","emp_no":10058,"first_name":"Berhard","gender":"M","hire_date":"1987-04-13T00:00:00Z","languages":3,"last_name":"McFarlin","salary":38376,"height":1.83,"still_hired":false,"avg_worked_seconds":268378108,"job_positions":"Principal Support Engineer","salary_change":-4.89} +{"birth_date":"1953-09-19T00:00:00Z","emp_no":10059,"first_name":"Alejandro","gender":"F","hire_date":"1991-06-26T00:00:00Z","languages":2,"last_name":"McAlpine","salary":44307,"height":1.48,"still_hired":false,"avg_worked_seconds":237368465,"job_positions":["Architect","Principal Support Engineer","Purchase Manager","Senior Team Lead"],"is_rehired":false,"salary_change":[5.53,13.38,-4.69,6.27]} +{"birth_date":"1961-10-15T00:00:00Z","emp_no":10060,"first_name":"Breannda","gender":"M","hire_date":"1987-11-02T00:00:00Z","languages":2,"last_name":"Billingsley","salary":29175,"height":1.42,"still_hired":true,"avg_worked_seconds":341158890,"job_positions":["Business Analyst","Data Scientist","Senior Team Lead"],"is_rehired":[false,false,true,false],"salary_change":[-1.76,-0.85]} +{"birth_date":"1962-10-19T00:00:00Z","emp_no":10061,"first_name":"Tse","gender":"M","hire_date":"1985-09-17T00:00:00Z","languages":1,"last_name":"Herber","salary":49095,"height":1.45,"still_hired":false,"avg_worked_seconds":327550310,"job_positions":["Purchase Manager","Senior Team Lead"],"is_rehired":[false,true],"salary_change":[14.39,-2.58,-0.95]} +{"birth_date":"1961-11-02T00:00:00Z","emp_no":10062,"first_name":"Anoosh","gender":"M","hire_date":"1991-08-30T00:00:00Z","languages":3,"last_name":"Peyn","salary":65030,"height":1.70,"still_hired":false,"avg_worked_seconds":203989706,"job_positions":["Python Developer","Senior Team Lead"],"is_rehired":[false,true,true],"salary_change":-1.17} +{"birth_date":"1952-08-06T00:00:00Z","emp_no":10063,"first_name":"Gino","gender":"F","hire_date":"1989-04-08T00:00:00Z","languages":3,"last_name":"Leonhardt","salary":52121,"height":1.78,"still_hired":true,"avg_worked_seconds":214068302,"is_rehired":true} +{"birth_date":"1959-04-07T00:00:00Z","emp_no":10064,"first_name":"Udi","gender":"M","hire_date":"1985-11-20T00:00:00Z","languages":5,"last_name":"Jansch","salary":33956,"height":1.93,"still_hired":false,"avg_worked_seconds":307364077,"job_positions":"Purchase Manager","is_rehired":[false,false,true,false],"salary_change":[-8.66,-2.52]} +{"birth_date":"1963-04-14T00:00:00Z","emp_no":10065,"first_name":"Satosi","gender":"M","hire_date":"1988-05-18T00:00:00Z","languages":2,"last_name":"Awdeh","salary":50249,"height":1.59,"still_hired":false,"avg_worked_seconds":372660279,"job_positions":["Business Analyst","Data Scientist","Principal Support Engineer"],"is_rehired":[false,true],"salary_change":[-1.47,14.44,-9.81]} +{"birth_date":"1952-11-13T00:00:00Z","emp_no":10066,"first_name":"Kwee","gender":"M","hire_date":"1986-02-26T00:00:00Z","languages":5,"last_name":"Schusler","salary":31897,"height":2.10,"still_hired":true,"avg_worked_seconds":360906451,"job_positions":["Senior Python Developer","Data Scientist","Accountant","Internship"],"is_rehired":[true,true,true],"salary_change":5.94} +{"birth_date":"1953-01-07T00:00:00Z","emp_no":10067,"first_name":"Claudi","gender":"M","hire_date":"1987-03-04T00:00:00Z","languages":2,"last_name":"Stavenow","salary":52044,"height":1.77,"still_hired":true,"avg_worked_seconds":347664141,"job_positions":["Tech Lead","Principal Support Engineer"],"is_rehired":[false,false],"salary_change":[8.72,4.44]} +{"birth_date":"1962-11-26T00:00:00Z","emp_no":10068,"first_name":"Charlene","gender":"M","hire_date":"1987-08-07T00:00:00Z","languages":3,"last_name":"Brattka","salary":28941,"height":1.58,"still_hired":true,"avg_worked_seconds":233999584,"job_positions":"Architect","is_rehired":true,"salary_change":[3.43,-5.61,-5.29]} +{"birth_date":"1960-09-06T00:00:00Z","emp_no":10069,"first_name":"Margareta","gender":"F","hire_date":"1989-11-05T00:00:00Z","languages":5,"last_name":"Bierman","salary":41933,"height":1.77,"still_hired":true,"avg_worked_seconds":366512352,"job_positions":["Business Analyst","Junior Developer","Purchase Manager","Support Engineer"],"is_rehired":false,"salary_change":[-3.34,-6.33,6.23,-0.31]} +{"birth_date":"1955-08-20T00:00:00Z","emp_no":10070,"first_name":"Reuven","gender":"M","hire_date":"1985-10-14T00:00:00Z","languages":3,"last_name":"Garigliano","salary":54329,"height":1.77,"still_hired":true,"avg_worked_seconds":347188604,"is_rehired":[true,true,true],"salary_change":-5.90} +{"birth_date":"1958-01-21T00:00:00Z","emp_no":10071,"first_name":"Hisao","gender":"M","hire_date":"1987-10-01T00:00:00Z","languages":2,"last_name":"Lipner","salary":40612,"height":2.07,"still_hired":false,"avg_worked_seconds":306671693,"job_positions":["Business Analyst","Reporting Analyst","Senior Team Lead"],"is_rehired":[false,false,false],"salary_change":-2.69} +{"birth_date":"1952-05-15T00:00:00Z","emp_no":10072,"first_name":"Hironoby","gender":"F","hire_date":"1988-07-21T00:00:00Z","languages":5,"last_name":"Sidou","salary":54518,"height":1.82,"still_hired":true,"avg_worked_seconds":209506065,"job_positions":["Architect","Tech Lead","Python Developer","Senior Team Lead"],"is_rehired":[false,false,true,false],"salary_change":[11.21,-2.30,2.22,-5.44]} +{"birth_date":"1954-02-23T00:00:00Z","emp_no":10073,"first_name":"Shir","gender":"M","hire_date":"1991-12-01T00:00:00Z","languages":4,"last_name":"McClurg","salary":32568,"height":1.66,"still_hired":false,"avg_worked_seconds":314930367,"job_positions":["Principal Support Engineer","Python Developer","Junior Developer","Purchase Manager"],"is_rehired":[true,false],"salary_change":-5.67} +{"birth_date":"1955-08-28T00:00:00Z","emp_no":10074,"first_name":"Mokhtar","gender":"F","hire_date":"1990-08-13T00:00:00Z","languages":5,"last_name":"Bernatsky","salary":38992,"height":1.64,"still_hired":true,"avg_worked_seconds":382397583,"job_positions":["Senior Python Developer","Python Developer"],"is_rehired":[true,false,false,true],"salary_change":[6.70,1.98,-5.64,2.96]} +{"birth_date":"1960-03-09T00:00:00Z","emp_no":10075,"first_name":"Gao","gender":"F","hire_date":"1987-03-19T00:00:00Z","languages":5,"last_name":"Dolinsky","salary":51956,"height":1.94,"still_hired":false,"avg_worked_seconds":370238919,"job_positions":"Purchase Manager","is_rehired":true,"salary_change":[9.63,-3.29,8.42]} +{"birth_date":"1952-06-13T00:00:00Z","emp_no":10076,"first_name":"Erez","gender":"F","hire_date":"1985-07-09T00:00:00Z","languages":3,"last_name":"Ritzmann","salary":62405,"height":1.83,"still_hired":false,"avg_worked_seconds":376240317,"job_positions":["Architect","Senior Python Developer"],"is_rehired":false,"salary_change":[-6.90,-1.30,8.75]} +{"birth_date":"1964-04-18T00:00:00Z","emp_no":10077,"first_name":"Mona","gender":"M","hire_date":"1990-03-02T00:00:00Z","languages":5,"last_name":"Azuma","salary":46595,"height":1.68,"still_hired":false,"avg_worked_seconds":351960222,"job_positions":"Internship","salary_change":-0.01} +{"birth_date":"1959-12-25T00:00:00Z","emp_no":10078,"first_name":"Danel","gender":"F","hire_date":"1987-05-26T00:00:00Z","languages":2,"last_name":"Mondadori","salary":69904,"height":1.81,"still_hired":true,"avg_worked_seconds":377116038,"job_positions":["Architect","Principal Support Engineer","Internship"],"is_rehired":true,"salary_change":[-7.88,9.98,12.52]} +{"birth_date":"1961-10-05T00:00:00Z","emp_no":10079,"first_name":"Kshitij","gender":"F","hire_date":"1986-03-27T00:00:00Z","languages":2,"last_name":"Gils","salary":32263,"height":1.59,"still_hired":false,"avg_worked_seconds":320953330,"is_rehired":false,"salary_change":7.58} +{"birth_date":"1957-12-03T00:00:00Z","emp_no":10080,"first_name":"Premal","gender":"M","hire_date":"1985-11-19T00:00:00Z","languages":5,"last_name":"Baek","salary":52833,"height":1.80,"still_hired":false,"avg_worked_seconds":239266137,"job_positions":"Senior Python Developer","salary_change":[-4.35,7.36,5.56]} +{"birth_date":"1960-12-17T00:00:00Z","emp_no":10081,"first_name":"Zhongwei","gender":"M","hire_date":"1986-10-30T00:00:00Z","languages":2,"last_name":"Rosen","salary":50128,"height":1.44,"still_hired":true,"avg_worked_seconds":321375511,"job_positions":["Accountant","Internship"],"is_rehired":[false,false,false]} +{"birth_date":"1963-09-09T00:00:00Z","emp_no":10082,"first_name":"Parviz","gender":"M","hire_date":"1990-01-03T00:00:00Z","languages":4,"last_name":"Lortz","salary":49818,"height":1.61,"still_hired":false,"avg_worked_seconds":232522994,"job_positions":"Principal Support Engineer","is_rehired":false,"salary_change":[1.19,-3.39]} +{"birth_date":"1959-07-23T00:00:00Z","emp_no":10083,"first_name":"Vishv","gender":"M","hire_date":"1987-03-31T00:00:00Z","languages":1,"last_name":"Zockler","salary":39110,"height":1.42,"still_hired":false,"avg_worked_seconds":331236443,"job_positions":"Head Human Resources"} +{"birth_date":"1960-05-25T00:00:00Z","emp_no":10084,"first_name":"Tuval","gender":"M","hire_date":"1995-12-15T00:00:00Z","languages":1,"last_name":"Kalloufi","salary":28035,"height":1.51,"still_hired":true,"avg_worked_seconds":359067056,"job_positions":"Principal Support Engineer","is_rehired":false} +{"birth_date":"1962-11-07T00:00:00Z","emp_no":10085,"first_name":"Kenroku","gender":"M","hire_date":"1994-04-09T00:00:00Z","languages":5,"last_name":"Malabarba","salary":35742,"height":2.01,"still_hired":true,"avg_worked_seconds":353404008,"job_positions":["Senior Python Developer","Business Analyst","Tech Lead","Accountant"],"salary_change":[11.67,6.75,8.40]} +{"birth_date":"1962-11-19T00:00:00Z","emp_no":10086,"first_name":"Somnath","gender":"M","hire_date":"1990-02-16T00:00:00Z","languages":1,"last_name":"Foote","salary":68547,"height":1.74,"still_hired":true,"avg_worked_seconds":328580163,"job_positions":"Senior Python Developer","is_rehired":[false,true],"salary_change":13.61} +{"birth_date":"1959-07-23T00:00:00Z","emp_no":10087,"first_name":"Xinglin","gender":"F","hire_date":"1986-09-08T00:00:00Z","languages":5,"last_name":"Eugenio","salary":32272,"height":1.74,"still_hired":true,"avg_worked_seconds":305782871,"job_positions":["Junior Developer","Internship"],"is_rehired":[false,false],"salary_change":-2.05} +{"birth_date":"1954-02-25T00:00:00Z","emp_no":10088,"first_name":"Jungsoon","gender":"F","hire_date":"1988-09-02T00:00:00Z","languages":5,"last_name":"Syrzycki","salary":39638,"height":1.91,"still_hired":false,"avg_worked_seconds":330714423,"job_positions":["Reporting Analyst","Business Analyst","Tech Lead"],"is_rehired":true} +{"birth_date":"1963-03-21T00:00:00Z","emp_no":10089,"first_name":"Sudharsan","gender":"F","hire_date":"1986-08-12T00:00:00Z","languages":4,"last_name":"Flasterstein","salary":43602,"height":1.57,"still_hired":true,"avg_worked_seconds":232951673,"job_positions":["Junior Developer","Accountant"],"is_rehired":[true,false,false,false]} +{"birth_date":"1961-05-30T00:00:00Z","emp_no":10090,"first_name":"Kendra","gender":"M","hire_date":"1986-03-14T00:00:00Z","languages":2,"last_name":"Hofting","salary":44956,"height":2.03,"still_hired":true,"avg_worked_seconds":212460105,"is_rehired":[false,false,false,true],"salary_change":[7.15,-1.85,3.60]} +{"birth_date":"1955-10-04T00:00:00Z","emp_no":10091,"first_name":"Amabile","gender":"M","hire_date":"1992-11-18T00:00:00Z","languages":3,"last_name":"Gomatam","salary":38645,"height":2.09,"still_hired":true,"avg_worked_seconds":242582807,"job_positions":["Reporting Analyst","Python Developer"],"is_rehired":[true,true,false,false],"salary_change":[-9.23,7.50,5.85,5.19]} +{"birth_date":"1964-10-18T00:00:00Z","emp_no":10092,"first_name":"Valdiodio","gender":"F","hire_date":"1989-09-22T00:00:00Z","languages":1,"last_name":"Niizuma","salary":25976,"height":1.75,"still_hired":false,"avg_worked_seconds":313407352,"job_positions":["Junior Developer","Accountant"],"is_rehired":[false,false,true,true],"salary_change":[8.78,0.39,-6.77,8.30]} +{"birth_date":"1964-06-11T00:00:00Z","emp_no":10093,"first_name":"Sailaja","gender":"M","hire_date":"1996-11-05T00:00:00Z","languages":3,"last_name":"Desikan","salary":45656,"height":1.69,"still_hired":false,"avg_worked_seconds":315904921,"job_positions":["Reporting Analyst","Tech Lead","Principal Support Engineer","Purchase Manager"],"salary_change":-0.88} +{"birth_date":"1957-05-25T00:00:00Z","emp_no":10094,"first_name":"Arumugam","gender":"F","hire_date":"1987-04-18T00:00:00Z","languages":5,"last_name":"Ossenbruggen","salary":66817,"height":2.10,"still_hired":false,"avg_worked_seconds":332920135,"job_positions":["Senior Python Developer","Principal Support Engineer","Accountant"],"is_rehired":[true,false,true],"salary_change":[2.22,7.92]} +{"birth_date":"1965-01-03T00:00:00Z","emp_no":10095,"first_name":"Hilari","gender":"M","hire_date":"1986-07-15T00:00:00Z","languages":4,"last_name":"Morton","salary":37702,"height":1.55,"still_hired":false,"avg_worked_seconds":321850475,"is_rehired":[true,true,false,false],"salary_change":[-3.93,-6.66]} +{"birth_date":"1954-09-16T00:00:00Z","emp_no":10096,"first_name":"Jayson","gender":"M","hire_date":"1990-01-14T00:00:00Z","languages":4,"last_name":"Mandell","salary":43889,"height":1.94,"still_hired":false,"avg_worked_seconds":204381503,"job_positions":["Architect","Reporting Analyst"],"is_rehired":[false,false,false]} +{"birth_date":"1952-02-27T00:00:00Z","emp_no":10097,"first_name":"Remzi","gender":"M","hire_date":"1990-09-15T00:00:00Z","languages":3,"last_name":"Waschkowski","salary":71165,"height":1.53,"still_hired":false,"avg_worked_seconds":206258084,"job_positions":["Reporting Analyst","Tech Lead"],"is_rehired":[true,false],"salary_change":-1.12} +{"birth_date":"1961-09-23T00:00:00Z","emp_no":10098,"first_name":"Sreekrishna","gender":"F","hire_date":"1985-05-13T00:00:00Z","languages":4,"last_name":"Servieres","salary":44817,"height":2.00,"still_hired":false,"avg_worked_seconds":272392146,"job_positions":["Architect","Internship","Senior Team Lead"],"is_rehired":false,"salary_change":[-2.83,8.31,4.38]} +{"birth_date":"1956-05-25T00:00:00Z","emp_no":10099,"first_name":"Valter","gender":"F","hire_date":"1988-10-18T00:00:00Z","languages":2,"last_name":"Sullins","salary":73578,"height":1.81,"still_hired":true,"avg_worked_seconds":377713748,"is_rehired":[true,true],"salary_change":[10.71,14.26,-8.78,-3.98]} +{"birth_date":"1953-04-21T00:00:00Z","emp_no":10100,"first_name":"Hironobu","gender":"F","hire_date":"1987-09-21T00:00:00Z","languages":4,"last_name":"Haraldson","salary":68431,"height":1.77,"still_hired":true,"avg_worked_seconds":223910853,"job_positions":"Purchase Manager","is_rehired":[false,true,true,false],"salary_change":[13.97,-7.49]} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java index 6a8dbfec79433..f9219a6a7b8f6 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ConstantNullBlock.java @@ -198,11 +198,11 @@ public void closeInternal() { blockFactory().adjustBreaker(-ramBytesUsed()); } - static class Builder implements Block.Builder { + public static class Builder implements Block.Builder { final BlockFactory blockFactory; - Builder(BlockFactory blockFactory) { + public Builder(BlockFactory blockFactory) { this.blockFactory = blockFactory; } diff --git a/x-pack/plugin/esql/qa/server/mixed-cluster/build.gradle b/x-pack/plugin/esql/qa/server/mixed-cluster/build.gradle index 4c9094d509df5..1de92ab529986 100644 --- a/x-pack/plugin/esql/qa/server/mixed-cluster/build.gradle +++ b/x-pack/plugin/esql/qa/server/mixed-cluster/build.gradle @@ -37,6 +37,7 @@ dependencies { javaRestTestImplementation project(xpackModule('esql')) clusterPlugins project(xpackModule('esql-datasource-csv')) + clusterPlugins project(xpackModule('esql-datasource-ndjson')) clusterPlugins project(xpackModule('esql-datasource-http')) } diff --git a/x-pack/plugin/esql/qa/server/multi-clusters/build.gradle b/x-pack/plugin/esql/qa/server/multi-clusters/build.gradle index a82642e9e1c99..6f01f42bc2d33 100644 --- a/x-pack/plugin/esql/qa/server/multi-clusters/build.gradle +++ b/x-pack/plugin/esql/qa/server/multi-clusters/build.gradle @@ -24,6 +24,7 @@ dependencies { clusterPlugins project(':x-pack:plugin:inference:qa:test-service-plugin') clusterPlugins project(xpackModule('esql-datasource-csv')) + clusterPlugins project(xpackModule('esql-datasource-ndjson')) clusterPlugins project(xpackModule('esql-datasource-http')) } diff --git a/x-pack/plugin/esql/qa/server/multi-node/build.gradle b/x-pack/plugin/esql/qa/server/multi-node/build.gradle index 712697e49b436..50c3daed76725 100644 --- a/x-pack/plugin/esql/qa/server/multi-node/build.gradle +++ b/x-pack/plugin/esql/qa/server/multi-node/build.gradle @@ -19,6 +19,7 @@ dependencies { clusterPlugins project(':plugins:mapper-murmur3') clusterPlugins project(':x-pack:plugin:inference:qa:test-service-plugin') clusterPlugins project(xpackModule('esql-datasource-csv')) + clusterPlugins project(xpackModule('esql-datasource-ndjson')) clusterPlugins project(xpackModule('esql-datasource-http')) } diff --git a/x-pack/plugin/esql/qa/server/single-node/build.gradle b/x-pack/plugin/esql/qa/server/single-node/build.gradle index be16a0a44d6c3..13b3fc34d6097 100644 --- a/x-pack/plugin/esql/qa/server/single-node/build.gradle +++ b/x-pack/plugin/esql/qa/server/single-node/build.gradle @@ -33,6 +33,7 @@ dependencies { clusterPlugins project(':plugins:mapper-murmur3') clusterPlugins project(':x-pack:plugin:inference:qa:test-service-plugin') clusterPlugins project(xpackModule('esql-datasource-csv')) + clusterPlugins project(xpackModule('esql-datasource-ndjson')) clusterPlugins project(xpackModule('esql-datasource-http')) }