[DOCS] Add custom feature processor example (#64681) (#64736)

lcawl · web-flow · commit 751a200e7e74 · 2020-11-06T11:07:32.000-08:00
diff --git a/docs/reference/ml/df-analytics/apis/put-dfanalytics.asciidoc b/docs/reference/ml/df-analytics/apis/put-dfanalytics.asciidoc
@@ -627,6 +627,95 @@ PUT _ml/data_frame/analytics/student_performance_mathematics_0.3
 <1> The percentage of the data set that is used for training the model.
 <2> The seed that is used to randomly pick which data is used for training.
 
+The following example uses custom feature processors to transform the
+categorical values for `DestWeather` into numerical values using one-hot,
+target-mean, and frequency encoding techniques:
+
+[source,console]
+--------------------------------------------------
+PUT _ml/data_frame/analytics/flight_prices
+{
+  "source": {
+    "index": [
+      "kibana_sample_data_flights"
+    ]
+  },
+  "dest": {
+    "index": "kibana_sample_flight_prices"
+  },
+  "analysis": {
+    "regression": {
+      "dependent_variable": "AvgTicketPrice",
+      "num_top_feature_importance_values": 2,
+      "feature_processors": [
+        {
+          "frequency_encoding": {
+            "field": "DestWeather",
+            "feature_name": "DestWeather_frequency",
+            "frequency_map": {
+              "Rain": 0.14604811155570188,
+              "Heavy Fog": 0.14604811155570188,
+              "Thunder & Lightning": 0.14604811155570188,
+              "Cloudy": 0.14604811155570188,
+              "Damaging Wind": 0.14604811155570188,
+              "Hail": 0.14604811155570188,
+              "Sunny": 0.14604811155570188,
+              "Clear": 0.14604811155570188
+            }
+          }
+        },
+        {
+          "target_mean_encoding": {
+            "field": "DestWeather",
+            "feature_name": "DestWeather_targetmean",
+            "target_map": {
+              "Rain": 626.5588814585794,
+              "Heavy Fog": 626.5588814585794,
+              "Thunder & Lightning": 626.5588814585794,
+              "Hail": 626.5588814585794,
+              "Damaging Wind": 626.5588814585794,
+              "Cloudy": 626.5588814585794,
+              "Clear": 626.5588814585794,
+              "Sunny": 626.5588814585794
+            },
+            "default_value": 624.0249512020454
+          }
+        },
+        {
+          "one_hot_encoding": {
+            "field": "DestWeather",
+            "hot_map": {
+              "Rain": "DestWeather_Rain",
+              "Heavy Fog": "DestWeather_Heavy Fog",
+              "Thunder & Lightning": "DestWeather_Thunder & Lightning",
+              "Cloudy": "DestWeather_Cloudy",
+              "Damaging Wind": "DestWeather_Damaging Wind",
+              "Hail": "DestWeather_Hail",
+              "Clear": "DestWeather_Clear",
+              "Sunny": "DestWeather_Sunny"
+            }
+          }
+        }
+      ]
+    }
+  },
+  "analyzed_fields": {
+    "includes": [
+      "AvgTicketPrice",
+      "Cancelled",
+      "DestWeather",
+      "FlightDelayMin",
+      "DistanceMiles"
+    ]
+  },
+  "model_memory_limit": "30mb"
+}
+--------------------------------------------------
+// TEST[skip:TBD]
+
+NOTE: These custom feature processors are optional; automatic
+{ml-docs}/ml-feature-encoding.html[feature encoding] still occurs for all
+categorical features.
 
 [[ml-put-dfanalytics-example-c]]
 === {classification-cap} example