From 56f8aea66ae2b1a8315125f9f756cee10576fbad Mon Sep 17 00:00:00 2001 From: Walter Rafelsberger Date: Mon, 23 Jul 2018 16:18:59 +0200 Subject: [PATCH] [ML] Job validation uses fieldCaps to check aggregatable fields to avoid triggering Elasticsearch errors. --- .../__tests__/mock_field_caps.json | 18 +++++ .../__tests__/validate_cardinality.js | 76 +++++++++++++------ .../job_validation/validate_cardinality.js | 15 +++- 3 files changed, 84 insertions(+), 25 deletions(-) create mode 100644 x-pack/plugins/ml/server/models/job_validation/__tests__/mock_field_caps.json diff --git a/x-pack/plugins/ml/server/models/job_validation/__tests__/mock_field_caps.json b/x-pack/plugins/ml/server/models/job_validation/__tests__/mock_field_caps.json new file mode 100644 index 0000000000000..a283642341c16 --- /dev/null +++ b/x-pack/plugins/ml/server/models/job_validation/__tests__/mock_field_caps.json @@ -0,0 +1,18 @@ +{ + "fields": { + "_source": { + "_source": { + "type": "_source", + "searchable": false, + "aggregatable": false + } + }, + "airline": { + "keyword": { + "type": "keyword", + "searchable": false, + "aggregatable": true + } + } + } +} diff --git a/x-pack/plugins/ml/server/models/job_validation/__tests__/validate_cardinality.js b/x-pack/plugins/ml/server/models/job_validation/__tests__/validate_cardinality.js index beffa764b7e99..aa48d9f8df4ec 100644 --- a/x-pack/plugins/ml/server/models/job_validation/__tests__/validate_cardinality.js +++ b/x-pack/plugins/ml/server/models/job_validation/__tests__/validate_cardinality.js @@ -11,15 +11,22 @@ import expect from 'expect.js'; import { validateCardinality } from '../validate_cardinality'; import mockFareQuoteCardinality from './mock_farequote_cardinality'; +import mockFieldCaps from './mock_field_caps'; + +const mockResponses = { + search: mockFareQuoteCardinality, + fieldCaps: mockFieldCaps +}; // mock callWithRequestFactory -const callWithRequestFactory = (mockSearchResponse, fail = false) => { - return () => { +const callWithRequestFactory = (responses, fail = false) => { + return (requestName) => { return new Promise((resolve, reject) => { + const response = responses[requestName]; if (fail) { - reject(mockSearchResponse); + reject(response); } else { - resolve(mockSearchResponse); + resolve(response); } }); }; @@ -28,21 +35,21 @@ const callWithRequestFactory = (mockSearchResponse, fail = false) => { describe('ML - validateCardinality', () => { it('called without arguments', (done) => { - validateCardinality(callWithRequestFactory(mockFareQuoteCardinality)).then( + validateCardinality(callWithRequestFactory(mockResponses)).then( () => done(new Error('Promise should not resolve for this test without job argument.')), () => done() ); }); it('called with non-valid job argument #1, missing analysis_config', (done) => { - validateCardinality(callWithRequestFactory(mockFareQuoteCardinality), {}).then( + validateCardinality(callWithRequestFactory(mockResponses), {}).then( () => done(new Error('Promise should not resolve for this test without valid job argument.')), () => done() ); }); it('called with non-valid job argument #2, missing datafeed_config', (done) => { - validateCardinality(callWithRequestFactory(mockFareQuoteCardinality), { analysis_config: {} }).then( + validateCardinality(callWithRequestFactory(mockResponses), { analysis_config: {} }).then( () => done(new Error('Promise should not resolve for this test without valid job argument.')), () => done() ); @@ -50,7 +57,7 @@ describe('ML - validateCardinality', () => { it('called with non-valid job argument #3, missing datafeed_config.indices', (done) => { const job = { analysis_config: {}, datafeed_config: {} }; - validateCardinality(callWithRequestFactory(mockFareQuoteCardinality), job).then( + validateCardinality(callWithRequestFactory(mockResponses), job).then( () => done(new Error('Promise should not resolve for this test without valid job argument.')), () => done() ); @@ -58,7 +65,7 @@ describe('ML - validateCardinality', () => { it('called with non-valid job argument #4, missing data_description', (done) => { const job = { analysis_config: {}, datafeed_config: { indices: [] } }; - validateCardinality(callWithRequestFactory(mockFareQuoteCardinality), job).then( + validateCardinality(callWithRequestFactory(mockResponses), job).then( () => done(new Error('Promise should not resolve for this test without valid job argument.')), () => done() ); @@ -66,7 +73,7 @@ describe('ML - validateCardinality', () => { it('called with non-valid job argument #5, missing data_description.time_field', (done) => { const job = { analysis_config: {}, data_description: {}, datafeed_config: { indices: [] } }; - validateCardinality(callWithRequestFactory(mockFareQuoteCardinality), job).then( + validateCardinality(callWithRequestFactory(mockResponses), job).then( () => done(new Error('Promise should not resolve for this test without valid job argument.')), () => done() ); @@ -76,7 +83,7 @@ describe('ML - validateCardinality', () => { const job = { analysis_config: {}, datafeed_config: { indices: [] }, data_description: { time_field: '@timestamp' } }; - validateCardinality(callWithRequestFactory(mockFareQuoteCardinality), job).then( + validateCardinality(callWithRequestFactory(mockResponses), job).then( () => done(new Error('Promise should not resolve for this test without valid job argument.')), () => done() ); @@ -91,7 +98,7 @@ describe('ML - validateCardinality', () => { } }; - return validateCardinality(callWithRequestFactory(mockFareQuoteCardinality), job).then( + return validateCardinality(callWithRequestFactory(mockResponses), job).then( (messages) => { const ids = messages.map(m => m.id); expect(ids).to.eql([]); @@ -117,8 +124,8 @@ describe('ML - validateCardinality', () => { const testCardinality = (fieldName, cardinality, test) => { const job = getJobConfig(fieldName); - const mockCardinality = _.cloneDeep(mockFareQuoteCardinality); - mockCardinality.aggregations.airline_cardinality.value = cardinality; + const mockCardinality = _.cloneDeep(mockResponses); + mockCardinality.search.aggregations.airline_cardinality.value = cardinality; return validateCardinality(callWithRequestFactory(mockCardinality), job, {}).then( (messages) => { const ids = messages.map(m => m.id); @@ -127,6 +134,27 @@ describe('ML - validateCardinality', () => { ); }; + it(`field '_source' not aggregatable`, () => { + const job = getJobConfig('partition_field_name'); + job.analysis_config.detectors[0].partition_field_name = '_source'; + return validateCardinality(callWithRequestFactory(mockResponses), job).then( + (messages) => { + const ids = messages.map(m => m.id); + expect(ids).to.eql(['field_not_aggregatable']); + } + ); + }); + + it(`field 'airline' aggregatable`, () => { + const job = getJobConfig('partition_field_name'); + return validateCardinality(callWithRequestFactory(mockResponses), job).then( + (messages) => { + const ids = messages.map(m => m.id); + expect(ids).to.eql(['success_cardinality']); + } + ); + }); + it('field not aggregatable', () => { const job = getJobConfig('partition_field_name'); return validateCardinality(callWithRequestFactory({}), job).then( @@ -198,8 +226,8 @@ describe('ML - validateCardinality', () => { it(`disabled model_plot, over field cardinality of ${cardinality} doesn't trigger a warning`, () => { const job = getJobConfig('over_field_name'); job.model_plot_config = { enabled: false }; - const mockCardinality = _.cloneDeep(mockFareQuoteCardinality); - mockCardinality.aggregations.airline_cardinality.value = cardinality; + const mockCardinality = _.cloneDeep(mockResponses); + mockCardinality.search.aggregations.airline_cardinality.value = cardinality; return validateCardinality(callWithRequestFactory(mockCardinality), job).then( (messages) => { const ids = messages.map(m => m.id); @@ -211,8 +239,8 @@ describe('ML - validateCardinality', () => { it(`enabled model_plot, over field cardinality of ${cardinality} triggers a model plot warning`, () => { const job = getJobConfig('over_field_name'); job.model_plot_config = { enabled: true }; - const mockCardinality = _.cloneDeep(mockFareQuoteCardinality); - mockCardinality.aggregations.airline_cardinality.value = cardinality; + const mockCardinality = _.cloneDeep(mockResponses); + mockCardinality.search.aggregations.airline_cardinality.value = cardinality; return validateCardinality(callWithRequestFactory(mockCardinality), job).then( (messages) => { const ids = messages.map(m => m.id); @@ -224,8 +252,8 @@ describe('ML - validateCardinality', () => { it(`disabled model_plot, by field cardinality of ${cardinality} triggers a field cardinality warning`, () => { const job = getJobConfig('by_field_name'); job.model_plot_config = { enabled: false }; - const mockCardinality = _.cloneDeep(mockFareQuoteCardinality); - mockCardinality.aggregations.airline_cardinality.value = cardinality; + const mockCardinality = _.cloneDeep(mockResponses); + mockCardinality.search.aggregations.airline_cardinality.value = cardinality; return validateCardinality(callWithRequestFactory(mockCardinality), job).then( (messages) => { const ids = messages.map(m => m.id); @@ -237,8 +265,8 @@ describe('ML - validateCardinality', () => { it(`enabled model_plot, by field cardinality of ${cardinality} triggers a model plot warning and field cardinality warning`, () => { const job = getJobConfig('by_field_name'); job.model_plot_config = { enabled: true }; - const mockCardinality = _.cloneDeep(mockFareQuoteCardinality); - mockCardinality.aggregations.airline_cardinality.value = cardinality; + const mockCardinality = _.cloneDeep(mockResponses); + mockCardinality.search.aggregations.airline_cardinality.value = cardinality; return validateCardinality(callWithRequestFactory(mockCardinality), job).then( (messages) => { const ids = messages.map(m => m.id); @@ -250,8 +278,8 @@ describe('ML - validateCardinality', () => { it(`enabled model_plot with terms, by field cardinality of ${cardinality} triggers just field cardinality warning`, () => { const job = getJobConfig('by_field_name'); job.model_plot_config = { enabled: true, terms: 'AAL,AAB' }; - const mockCardinality = _.cloneDeep(mockFareQuoteCardinality); - mockCardinality.aggregations.airline_cardinality.value = cardinality; + const mockCardinality = _.cloneDeep(mockResponses); + mockCardinality.search.aggregations.airline_cardinality.value = cardinality; return validateCardinality(callWithRequestFactory(mockCardinality), job).then( (messages) => { const ids = messages.map(m => m.id); diff --git a/x-pack/plugins/ml/server/models/job_validation/validate_cardinality.js b/x-pack/plugins/ml/server/models/job_validation/validate_cardinality.js index dc6e4166b0c78..8290213eaede9 100644 --- a/x-pack/plugins/ml/server/models/job_validation/validate_cardinality.js +++ b/x-pack/plugins/ml/server/models/job_validation/validate_cardinality.js @@ -42,10 +42,23 @@ const validateFactory = (callWithRequest, job) => { if (relevantDetectors.length > 0) { try { const uniqueFieldNames = _.uniq(relevantDetectors.map(f => f[fieldName])); + + // use fieldCaps endpoint to get data about whether fields are aggregatable + const fieldCaps = await callWithRequest('fieldCaps', { + index: job.datafeed_config.indices.join(','), + fields: uniqueFieldNames + }); + + // parse fieldCaps to return an array of just the fields which are aggregatable + const aggregatableFieldNames = Object.keys(fieldCaps.fields).filter((field) => { + const fieldType = Object.keys(fieldCaps.fields[field])[0]; + return fieldCaps.fields[field][fieldType].aggregatable; + }); + const stats = await dv.checkAggregatableFieldsExist( job.datafeed_config.indices.join(','), job.datafeed_config.query, - uniqueFieldNames, + aggregatableFieldNames, 0, job.data_description.time_field );