elastic · alvarezmelissa87 · Apr 20, 2021 · Apr 15, 2021 · Apr 15, 2021 · Apr 19, 2021
diff --git a/x-pack/plugins/ml/public/application/data_frame_analytics/pages/analytics_creation/page.tsx b/x-pack/plugins/ml/public/application/data_frame_analytics/pages/analytics_creation/page.tsx
@@ -61,7 +61,7 @@ export const Page: FC<Props> = ({ jobId }) => {
 
   const createAnalyticsForm = useCreateAnalyticsForm();
   const { state } = createAnalyticsForm;
-  const { isAdvancedEditorEnabled, disableSwitchToForm } = state;
+  const { isAdvancedEditorEnabled, disableSwitchToForm, isJobCreated } = state;
   const { jobType } = state.form;
   const {
     initiateWizard,
@@ -217,7 +217,7 @@ export const Page: FC<Props> = ({ jobId }) => {
                 }
               >
                 <EuiSwitch
-                  disabled={jobType === undefined || disableSwitchToForm}
+                  disabled={jobType === undefined || disableSwitchToForm || isJobCreated}
                   label={i18n.translate(
                     'xpack.ml.dataframe.analytics.create.switchToJsonEditorSwitch',
                     {

diff --git a/x-pack/plugins/ml/server/models/data_frame_analytics/validation.ts b/x-pack/plugins/ml/server/models/data_frame_analytics/validation.ts
@@ -9,6 +9,7 @@ import { i18n } from '@kbn/i18n';
 import type { estypes } from '@elastic/elasticsearch';
 import { IScopedClusterClient } from 'kibana/server';
 import { getAnalysisType } from '../../../common/util/analytics_utils';
+import { ANALYSIS_CONFIG_TYPE } from '../../../common/constants/data_frame_analytics';
 import {
   ALL_CATEGORIES,
   FRACTION_EMPTY_LIMIT,
@@ -59,6 +60,12 @@ const analysisFieldsHeading = i18n.translate(
     defaultMessage: 'Analysis fields',
   }
 );
+const lowFieldCountHeading = i18n.translate(
+  'xpack.ml.models.dfaValidation.messages.lowFieldCountHeading',
+  {
+    defaultMessage: 'Insufficient fields',
+  }
+);
 const dependentVarHeading = i18n.translate(
   'xpack.ml.models.dfaValidation.messages.dependentVarHeading',
   {
@@ -77,6 +84,12 @@ const analysisFieldsWarningMessage = {
   status: VALIDATION_STATUS.WARNING,
   heading: analysisFieldsHeading,
 };
+const lowFieldCountWarningMessage = {
+  id: 'analysis_fields_count',
+  text: '',
+  status: VALIDATION_STATUS.WARNING,
+  heading: lowFieldCountHeading,
+};
 
 function getRegressionAndClassificationMessage(
   analysisConfig: AnalysisConfig,
@@ -94,7 +107,7 @@ function getRegressionAndClassificationMessage(
 
     if (trainingPercent) {
       const trainingDocs = totalDocs * (trainingPercent / 100);
-      const trainingPercentMessage = getTrainingPercentMessage(trainingDocs);
+      const trainingPercentMessage = getTrainingPercentMessage(trainingPercent, trainingDocs);
       if (trainingPercentMessage) {
         messages.push(trainingPercentMessage);
       }
@@ -128,7 +141,7 @@ function getRegressionAndClassificationMessage(
           id: 'num_top_classes',
           text: i18n.translate('xpack.ml.models.dfaValidation.messages.topClassesWarningMessage', {
             defaultMessage:
-              'Probabilities will be reported for {numCategories, plural, one {# category} other {# categories}}. There could be a significant effect on the size of your destination index.',
+              'Predicted probabilities will be reported for {numCategories, plural, one {# category} other {# categories}}. If you have a large number of categories, there could be a significant effect on the size of your destination index.',
             values: {
               numCategories: topClasses === ALL_CATEGORIES ? depVarCardinality : topClasses,
             },
@@ -143,7 +156,7 @@ function getRegressionAndClassificationMessage(
           id: 'num_top_classes',
           text: i18n.translate('xpack.ml.models.dfaValidation.messages.topClassesSuccessMessage', {
             defaultMessage:
-              'Probabilities will be reported for {numCategories, plural, one {# category} other {# categories}}.',
+              'Predicted probabilities will be reported for {numCategories, plural, one {# category} other {# categories}}.',
             values: {
               numCategories: topClasses === ALL_CATEGORIES ? depVarCardinality : topClasses,
             },
@@ -159,7 +172,21 @@ function getRegressionAndClassificationMessage(
   return messages;
 }
 
-function getTrainingPercentMessage(trainingDocs: number) {
+function getTrainingPercentMessage(trainingPercent: number, trainingDocs: number) {
+  if (trainingPercent === 100) {
+    return {
+      id: 'training_percent_hundred',
+      text: i18n.translate(
+        'xpack.ml.models.dfaValidation.messages.noTestingDataTrainingPercentWarning',
+        {
+          defaultMessage:
+            'All eligible documents will be used for training the model. In order to evaluate the model, provide testing data by reducing the training percent.',
+        }
+      ),
+      status: VALIDATION_STATUS.WARNING,
+      heading: trainingPercentHeading,
+    };
+  }
   if (trainingDocs >= TRAINING_DOCS_UPPER) {
     return {
       id: 'training_percent_high',
@@ -281,14 +308,27 @@ async function getValidationCheckMessages(
             );
           }
           if (depVarValid === true) {
-            messages.push({
-              id: 'dep_var_check',
-              text: i18n.translate('xpack.ml.models.dfaValidation.messages.depVarSuccess', {
-                defaultMessage: 'The dependent variable field contains useful values for analysis.',
-              }),
-              status: VALIDATION_STATUS.SUCCESS,
-              heading: dependentVarHeading,
-            });
+            if (analysisType === ANALYSIS_CONFIG_TYPE.REGRESSION) {
+              messages.push({
+                id: 'dep_var_check',
+                text: i18n.translate('xpack.ml.models.dfaValidation.messages.depVarRegSuccess', {
+                  defaultMessage:
+                    'The dependent variable field contains continuous values suitable for regression analysis.',
+                }),
+                status: VALIDATION_STATUS.SUCCESS,
+                heading: dependentVarHeading,
+              });
+            } else {
+              messages.push({
+                id: 'dep_var_check',
+                text: i18n.translate('xpack.ml.models.dfaValidation.messages.depVarClassSuccess', {
+                  defaultMessage:
+                    'The dependent variable field contains discrete values suitable for classification.',
+                }),
+                status: VALIDATION_STATUS.SUCCESS,
+                heading: dependentVarHeading,
+              });
+            }
           } else {
             messages.push(dependentVarWarningMessage);
           }
@@ -306,6 +346,33 @@ async function getValidationCheckMessages(
 
     if (analyzedFields.length && analyzedFields.length > INCLUDED_FIELDS_THRESHOLD) {
       analysisFieldsNumHigh = true;
+    } else {
+      if (analysisType === ANALYSIS_CONFIG_TYPE.OUTLIER_DETECTION && analyzedFields.length < 1) {
+        lowFieldCountWarningMessage.text = i18n.translate(
+          'xpack.ml.models.dfaValidation.messages.lowFieldCountOutlierWarningText',
+          {
+            defaultMessage:
+              'Outlier detection requires that at least one field is included in the analysis.',
+          }
+        );
+        messages.push(lowFieldCountWarningMessage);
+      } else if (
+        analysisType !== ANALYSIS_CONFIG_TYPE.OUTLIER_DETECTION &&
+        analyzedFields.length < 2
+      ) {
+        lowFieldCountWarningMessage.text = i18n.translate(
+          'xpack.ml.models.dfaValidation.messages.lowFieldCountWarningText',
+          {
+            defaultMessage:
+              '{analysisType} requires that at least two fields are included in the analysis.',
+            values: {
+              analysisType:
+                analysisType === ANALYSIS_CONFIG_TYPE.REGRESSION ? 'Regression' : 'Classification',
+            },
+          }
+        );
+        messages.push(lowFieldCountWarningMessage);
+      }
     }
 
     if (emptyFields.length) {
@@ -318,8 +385,11 @@ async function getValidationCheckMessages(
           'xpack.ml.models.dfaValidation.messages.analysisFieldsWarningText',
           {
             defaultMessage:
-              'Some fields included for analysis have at least {percentEmpty}% empty values. The number of selected fields is high and may result in increased resource usage and long-running jobs.',
-            values: { percentEmpty: percentEmptyLimit },
+              'Some fields included for analysis have at least {percentEmpty}% empty values. There are more than {includedFieldsThreshold} fields selected for analysis. This may result in increased resource usage and long-running jobs.',
+            values: {
+              percentEmpty: percentEmptyLimit,
+              includedFieldsThreshold: INCLUDED_FIELDS_THRESHOLD,
+            },
           }
         );
       } else if (analysisFieldsEmpty && !analysisFieldsNumHigh) {
@@ -336,7 +406,8 @@ async function getValidationCheckMessages(
           'xpack.ml.models.dfaValidation.messages.analysisFieldsHighWarningText',
           {
             defaultMessage:
-              'The number of selected fields is high and may result in increased resource usage and long-running jobs.',
+              'There are more than {includedFieldsThreshold} fields selected for analysis. This may result in increased resource usage and long-running jobs.',
+            values: { includedFieldsThreshold: INCLUDED_FIELDS_THRESHOLD },
           }
         );
       }
@@ -346,7 +417,8 @@ async function getValidationCheckMessages(
         id: 'analysis_fields',
         text: i18n.translate('xpack.ml.models.dfaValidation.messages.analysisFieldsSuccessText', {
           defaultMessage:
-            'The selected analysis fields are sufficiently populated and contain useful data for analysis.',
+            'The selected analysis fields are at least {percentPopulated}% populated.',
+          values: { percentPopulated: (1 - FRACTION_EMPTY_LIMIT) * 100 },
         }),
         status: VALIDATION_STATUS.SUCCESS,
         heading: analysisFieldsHeading,