@@ -627,6 +627,95 @@ PUT _ml/data_frame/analytics/student_performance_mathematics_0.3
627627<1> The percentage of the data set that is used for training the model.
628628<2> The seed that is used to randomly pick which data is used for training.
629629
630+ The following example uses custom feature processors to transform the
631+ categorical values for `DestWeather` into numerical values using one-hot,
632+ target-mean, and frequency encoding techniques:
633+
634+ [source,console]
635+ --------------------------------------------------
636+ PUT _ml/data_frame/analytics/flight_prices
637+ {
638+ "source": {
639+ "index": [
640+ "kibana_sample_data_flights"
641+ ]
642+ },
643+ "dest": {
644+ "index": "kibana_sample_flight_prices"
645+ },
646+ "analysis": {
647+ "regression": {
648+ "dependent_variable": "AvgTicketPrice",
649+ "num_top_feature_importance_values": 2,
650+ "feature_processors": [
651+ {
652+ "frequency_encoding": {
653+ "field": "DestWeather",
654+ "feature_name": "DestWeather_frequency",
655+ "frequency_map": {
656+ "Rain": 0.14604811155570188,
657+ "Heavy Fog": 0.14604811155570188,
658+ "Thunder & Lightning": 0.14604811155570188,
659+ "Cloudy": 0.14604811155570188,
660+ "Damaging Wind": 0.14604811155570188,
661+ "Hail": 0.14604811155570188,
662+ "Sunny": 0.14604811155570188,
663+ "Clear": 0.14604811155570188
664+ }
665+ }
666+ },
667+ {
668+ "target_mean_encoding": {
669+ "field": "DestWeather",
670+ "feature_name": "DestWeather_targetmean",
671+ "target_map": {
672+ "Rain": 626.5588814585794,
673+ "Heavy Fog": 626.5588814585794,
674+ "Thunder & Lightning": 626.5588814585794,
675+ "Hail": 626.5588814585794,
676+ "Damaging Wind": 626.5588814585794,
677+ "Cloudy": 626.5588814585794,
678+ "Clear": 626.5588814585794,
679+ "Sunny": 626.5588814585794
680+ },
681+ "default_value": 624.0249512020454
682+ }
683+ },
684+ {
685+ "one_hot_encoding": {
686+ "field": "DestWeather",
687+ "hot_map": {
688+ "Rain": "DestWeather_Rain",
689+ "Heavy Fog": "DestWeather_Heavy Fog",
690+ "Thunder & Lightning": "DestWeather_Thunder & Lightning",
691+ "Cloudy": "DestWeather_Cloudy",
692+ "Damaging Wind": "DestWeather_Damaging Wind",
693+ "Hail": "DestWeather_Hail",
694+ "Clear": "DestWeather_Clear",
695+ "Sunny": "DestWeather_Sunny"
696+ }
697+ }
698+ }
699+ ]
700+ }
701+ },
702+ "analyzed_fields": {
703+ "includes": [
704+ "AvgTicketPrice",
705+ "Cancelled",
706+ "DestWeather",
707+ "FlightDelayMin",
708+ "DistanceMiles"
709+ ]
710+ },
711+ "model_memory_limit": "30mb"
712+ }
713+ --------------------------------------------------
714+ // TEST[skip:TBD]
715+
716+ NOTE: These custom feature processors are optional; automatic
717+ {ml-docs}/ml-feature-encoding.html[feature encoding] still occurs for all
718+ categorical features.
630719
631720[[ml-put-dfanalytics-example-c]]
632721=== {classification-cap} example
0 commit comments