Skip to content

Commit 5dfe193

Browse files
authored
Add ECS schema for user-agent ingest processor (#37727)
This switches the format of the user agent processor to use the schema from [ECS](https://github.com/elastic/ecs). So rather than something like this: ``` { "patch" : "3538", "major" : "70", "minor" : "0", "os" : "Mac OS X 10.14.1", "os_minor" : "14", "os_major" : "10", "name" : "Chrome", "os_name" : "Mac OS X", "device" : "Other" } ``` The structure is now like this: ``` { "name" : "Chrome", "original" : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36", "os" : { "name" : "Mac OS X", "version" : "10.14.1", "full" : "Mac OS X 10.14.1" }, "device" : "Other", "version" : "70.0.3538.102" } ``` This new can be configured by setting `"ecs": true` in the processor configuration, and will be the default for 7.0. Leaving `ecs` unset or set as `false` is deprecated. Resolves #37329 This PR is against the 6.x branch and will be forward-ported with the deprecated parts removed (I will open a subsequent PR for that).
1 parent 1586cac commit 5dfe193

File tree

6 files changed

+294
-128
lines changed

6 files changed

+294
-128
lines changed

docs/reference/ingest/processors/user-agent.asciidoc

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ The ingest-user-agent module ships by default with the regexes.yaml made availab
1919
| `regex_file` | no | - | The name of the file in the `config/ingest-user-agent` directory containing the regular expressions for parsing the user agent string. Both the directory and the file have to be created before starting Elasticsearch. If not specified, ingest-user-agent will use the regexes.yaml from uap-core it ships with (see below).
2020
| `properties` | no | [`name`, `major`, `minor`, `patch`, `build`, `os`, `os_name`, `os_major`, `os_minor`, `device`] | Controls what properties are added to `target_field`.
2121
| `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document
22+
| `ecs` | no | `false` | Whether to return the output in Elastic Common Schema format. NOTE: ECS format will be the default in Elasticsearch 7.0 and non-ECS format is deprecated.
2223
|======
2324

2425
Here is an example that adds the user agent details to the `user_agent` field based on the `agent` field:
@@ -31,7 +32,8 @@ PUT _ingest/pipeline/user_agent
3132
"processors" : [
3233
{
3334
"user_agent" : {
34-
"field" : "agent"
35+
"field" : "agent",
36+
"ecs" : true
3537
}
3638
}
3739
]
@@ -60,13 +62,13 @@ Which returns
6062
"agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36",
6163
"user_agent": {
6264
"name": "Chrome",
63-
"major": "51",
64-
"minor": "0",
65-
"patch": "2704",
66-
"os_name": "Mac OS X",
67-
"os": "Mac OS X 10.10.5",
68-
"os_major": "10",
69-
"os_minor": "10",
65+
"original": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36",
66+
"version": "51.0.2704",
67+
"os": {
68+
"name": "Mac OS X",
69+
"version": "10.10.5",
70+
"full": "Mac OS X 10.10.5"
71+
},
7072
"device": "Other"
7173
}
7274
}

modules/ingest-user-agent/src/main/java/org/elasticsearch/ingest/useragent/UserAgentProcessor.java

Lines changed: 179 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,19 @@
1919

2020
package org.elasticsearch.ingest.useragent;
2121

22+
import org.apache.logging.log4j.LogManager;
23+
import org.elasticsearch.common.logging.DeprecationLogger;
2224
import org.elasticsearch.ingest.AbstractProcessor;
2325
import org.elasticsearch.ingest.IngestDocument;
2426
import org.elasticsearch.ingest.Processor;
2527
import org.elasticsearch.ingest.useragent.UserAgentParser.Details;
2628
import org.elasticsearch.ingest.useragent.UserAgentParser.VersionedName;
2729

30+
import java.lang.reflect.Field;
2831
import java.util.Arrays;
2932
import java.util.EnumSet;
3033
import java.util.HashMap;
34+
import java.util.HashSet;
3135
import java.util.List;
3236
import java.util.Locale;
3337
import java.util.Map;
@@ -40,30 +44,34 @@
4044

4145
public class UserAgentProcessor extends AbstractProcessor {
4246

47+
private static final DeprecationLogger deprecationLogger = new DeprecationLogger(LogManager.getLogger(UserAgentProcessor.class));
48+
4349
public static final String TYPE = "user_agent";
4450

4551
private final String field;
4652
private final String targetField;
4753
private final Set<Property> properties;
4854
private final UserAgentParser parser;
4955
private final boolean ignoreMissing;
56+
private final boolean useECS;
5057

5158
public UserAgentProcessor(String tag, String field, String targetField, UserAgentParser parser, Set<Property> properties,
52-
boolean ignoreMissing) {
59+
boolean ignoreMissing, boolean useECS) {
5360
super(tag);
5461
this.field = field;
5562
this.targetField = targetField;
5663
this.parser = parser;
5764
this.properties = properties;
5865
this.ignoreMissing = ignoreMissing;
66+
this.useECS = useECS;
5967
}
6068

6169
boolean isIgnoreMissing() {
6270
return ignoreMissing;
6371
}
6472

6573
@Override
66-
public IngestDocument execute(IngestDocument ingestDocument) throws Exception {
74+
public IngestDocument execute(IngestDocument ingestDocument) {
6775
String userAgent = ingestDocument.getFieldValue(field, String.class, ignoreMissing);
6876

6977
if (userAgent == null && ignoreMissing) {
@@ -75,71 +83,134 @@ public IngestDocument execute(IngestDocument ingestDocument) throws Exception {
7583
Details uaClient = parser.parse(userAgent);
7684

7785
Map<String, Object> uaDetails = new HashMap<>();
78-
for (Property property : this.properties) {
79-
switch (property) {
80-
case NAME:
81-
if (uaClient.userAgent != null && uaClient.userAgent.name != null) {
82-
uaDetails.put("name", uaClient.userAgent.name);
83-
}
84-
else {
85-
uaDetails.put("name", "Other");
86-
}
87-
break;
88-
case MAJOR:
89-
if (uaClient.userAgent != null && uaClient.userAgent.major != null) {
90-
uaDetails.put("major", uaClient.userAgent.major);
91-
}
92-
break;
93-
case MINOR:
94-
if (uaClient.userAgent != null && uaClient.userAgent.minor != null) {
95-
uaDetails.put("minor", uaClient.userAgent.minor);
96-
}
97-
break;
98-
case PATCH:
99-
if (uaClient.userAgent != null && uaClient.userAgent.patch != null) {
100-
uaDetails.put("patch", uaClient.userAgent.patch);
101-
}
102-
break;
103-
case BUILD:
104-
if (uaClient.userAgent != null && uaClient.userAgent.build != null) {
105-
uaDetails.put("build", uaClient.userAgent.build);
106-
}
107-
break;
108-
case OS:
109-
if (uaClient.operatingSystem != null) {
110-
uaDetails.put("os", buildFullOSName(uaClient.operatingSystem));
111-
}
112-
else {
113-
uaDetails.put("os", "Other");
114-
}
11586

116-
break;
117-
case OS_NAME:
118-
if (uaClient.operatingSystem != null && uaClient.operatingSystem.name != null) {
119-
uaDetails.put("os_name", uaClient.operatingSystem.name);
120-
}
121-
else {
122-
uaDetails.put("os_name", "Other");
123-
}
124-
break;
125-
case OS_MAJOR:
126-
if (uaClient.operatingSystem != null && uaClient.operatingSystem.major != null) {
127-
uaDetails.put("os_major", uaClient.operatingSystem.major);
128-
}
129-
break;
130-
case OS_MINOR:
131-
if (uaClient.operatingSystem != null && uaClient.operatingSystem.minor != null) {
132-
uaDetails.put("os_minor", uaClient.operatingSystem.minor);
133-
}
134-
break;
135-
case DEVICE:
136-
if (uaClient.device != null && uaClient.device.name != null) {
137-
uaDetails.put("device", uaClient.device.name);
138-
}
139-
else {
140-
uaDetails.put("device", "Other");
141-
}
142-
break;
87+
if (useECS) {
88+
// Parse the user agent in the ECS (Elastic Common Schema) format
89+
for (Property property : this.properties) {
90+
switch (property) {
91+
case ORIGINAL:
92+
uaDetails.put("original", userAgent);
93+
break;
94+
case NAME:
95+
if (uaClient.userAgent != null && uaClient.userAgent.name != null) {
96+
uaDetails.put("name", uaClient.userAgent.name);
97+
} else {
98+
uaDetails.put("name", "Other");
99+
}
100+
break;
101+
case VERSION:
102+
StringBuilder version = new StringBuilder();
103+
if (uaClient.userAgent != null && uaClient.userAgent.major != null) {
104+
version.append(uaClient.userAgent.major);
105+
if (uaClient.userAgent.minor != null) {
106+
version.append(".").append(uaClient.userAgent.minor);
107+
if (uaClient.userAgent.patch != null) {
108+
version.append(".").append(uaClient.userAgent.patch);
109+
if (uaClient.userAgent.build != null) {
110+
version.append(".").append(uaClient.userAgent.build);
111+
}
112+
}
113+
}
114+
uaDetails.put("version", version.toString());
115+
}
116+
break;
117+
case OS:
118+
if (uaClient.operatingSystem != null) {
119+
Map<String, String> osDetails = new HashMap<>(3);
120+
if (uaClient.operatingSystem.name != null) {
121+
osDetails.put("name", uaClient.operatingSystem.name);
122+
StringBuilder sb = new StringBuilder();
123+
if (uaClient.operatingSystem.major != null) {
124+
sb.append(uaClient.operatingSystem.major);
125+
if (uaClient.operatingSystem.minor != null) {
126+
sb.append(".").append(uaClient.operatingSystem.minor);
127+
if (uaClient.operatingSystem.patch != null) {
128+
sb.append(".").append(uaClient.operatingSystem.patch);
129+
if (uaClient.operatingSystem.build != null) {
130+
sb.append(".").append(uaClient.operatingSystem.build);
131+
}
132+
}
133+
}
134+
osDetails.put("version", sb.toString());
135+
osDetails.put("full", uaClient.operatingSystem.name + " " + sb.toString());
136+
}
137+
uaDetails.put("os", osDetails);
138+
}
139+
}
140+
break;
141+
case DEVICE:
142+
if (uaClient.device != null && uaClient.device.name != null) {
143+
uaDetails.put("device", uaClient.device.name);
144+
} else {
145+
uaDetails.put("device", "Other");
146+
}
147+
break;
148+
}
149+
}
150+
} else {
151+
// Deprecated format, removed in 7.0
152+
for (Property property : this.properties) {
153+
switch (property) {
154+
case NAME:
155+
if (uaClient.userAgent != null && uaClient.userAgent.name != null) {
156+
uaDetails.put("name", uaClient.userAgent.name);
157+
} else {
158+
uaDetails.put("name", "Other");
159+
}
160+
break;
161+
case MAJOR:
162+
if (uaClient.userAgent != null && uaClient.userAgent.major != null) {
163+
uaDetails.put("major", uaClient.userAgent.major);
164+
}
165+
break;
166+
case MINOR:
167+
if (uaClient.userAgent != null && uaClient.userAgent.minor != null) {
168+
uaDetails.put("minor", uaClient.userAgent.minor);
169+
}
170+
break;
171+
case PATCH:
172+
if (uaClient.userAgent != null && uaClient.userAgent.patch != null) {
173+
uaDetails.put("patch", uaClient.userAgent.patch);
174+
}
175+
break;
176+
case BUILD:
177+
if (uaClient.userAgent != null && uaClient.userAgent.build != null) {
178+
uaDetails.put("build", uaClient.userAgent.build);
179+
}
180+
break;
181+
case OS:
182+
if (uaClient.operatingSystem != null) {
183+
uaDetails.put("os", buildFullOSName(uaClient.operatingSystem));
184+
} else {
185+
uaDetails.put("os", "Other");
186+
}
187+
188+
break;
189+
case OS_NAME:
190+
if (uaClient.operatingSystem != null && uaClient.operatingSystem.name != null) {
191+
uaDetails.put("os_name", uaClient.operatingSystem.name);
192+
} else {
193+
uaDetails.put("os_name", "Other");
194+
}
195+
break;
196+
case OS_MAJOR:
197+
if (uaClient.operatingSystem != null && uaClient.operatingSystem.major != null) {
198+
uaDetails.put("os_major", uaClient.operatingSystem.major);
199+
}
200+
break;
201+
case OS_MINOR:
202+
if (uaClient.operatingSystem != null && uaClient.operatingSystem.minor != null) {
203+
uaDetails.put("os_minor", uaClient.operatingSystem.minor);
204+
}
205+
break;
206+
case DEVICE:
207+
if (uaClient.device != null && uaClient.device.name != null) {
208+
uaDetails.put("device", uaClient.device.name);
209+
} else {
210+
uaDetails.put("device", "Other");
211+
}
212+
break;
213+
}
143214
}
144215
}
145216

@@ -199,6 +270,10 @@ UserAgentParser getUaParser() {
199270
return parser;
200271
}
201272

273+
public boolean isUseECS() {
274+
return useECS;
275+
}
276+
202277
public static final class Factory implements Processor.Factory {
203278

204279
private final Map<String, UserAgentParser> userAgentParsers;
@@ -215,6 +290,7 @@ public UserAgentProcessor create(Map<String, Processor.Factory> factories, Strin
215290
String regexFilename = readStringProperty(TYPE, processorTag, config, "regex_file", IngestUserAgentPlugin.DEFAULT_PARSER_NAME);
216291
List<String> propertyNames = readOptionalList(TYPE, processorTag, config, "properties");
217292
boolean ignoreMissing = readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false);
293+
boolean useECS = readBooleanProperty(TYPE, processorTag, config, "ecs", false);
218294

219295
UserAgentParser parser = userAgentParsers.get(regexFilename);
220296
if (parser == null) {
@@ -236,17 +312,51 @@ public UserAgentProcessor create(Map<String, Processor.Factory> factories, Strin
236312
properties = EnumSet.allOf(Property.class);
237313
}
238314

239-
return new UserAgentProcessor(processorTag, field, targetField, parser, properties, ignoreMissing);
315+
if (useECS == false) {
316+
deprecationLogger.deprecated("setting [ecs] to false for non-common schema " +
317+
"format is deprecated and will be removed in 7.0, set to true to use the non-deprecated format");
318+
}
319+
320+
return new UserAgentProcessor(processorTag, field, targetField, parser, properties, ignoreMissing, useECS);
240321
}
241322
}
242323

243324
enum Property {
244325

245-
NAME, MAJOR, MINOR, PATCH, OS, OS_NAME, OS_MAJOR, OS_MINOR, DEVICE, BUILD;
326+
NAME,
327+
// Deprecated in 6.7 (superceded by VERSION), to be removed in 7.0
328+
@Deprecated MAJOR,
329+
@Deprecated MINOR,
330+
@Deprecated PATCH,
331+
OS,
332+
// Deprecated in 6.7 (superceded by just using OS), to be removed in 7.0
333+
@Deprecated OS_NAME,
334+
@Deprecated OS_MAJOR,
335+
@Deprecated OS_MINOR,
336+
DEVICE,
337+
@Deprecated BUILD, // Same deprecated as OS_* above
338+
ORIGINAL,
339+
VERSION;
340+
341+
private static Set<Property> DEPRECATED_PROPERTIES;
342+
343+
static {
344+
Set<Property> deprecated = new HashSet<>();
345+
for (Field field : Property.class.getFields()) {
346+
if (field.isEnumConstant() && field.isAnnotationPresent(Deprecated.class)) {
347+
deprecated.add(valueOf(field.getName()));
348+
}
349+
}
350+
DEPRECATED_PROPERTIES = deprecated;
351+
}
246352

247353
public static Property parseProperty(String propertyName) {
248354
try {
249-
return valueOf(propertyName.toUpperCase(Locale.ROOT));
355+
Property value = valueOf(propertyName.toUpperCase(Locale.ROOT));
356+
if (DEPRECATED_PROPERTIES.contains(value)) {
357+
deprecationLogger.deprecated("the [{}] property is deprecated for the user-agent processor", propertyName);
358+
}
359+
return value;
250360
}
251361
catch (IllegalArgumentException e) {
252362
throw new IllegalArgumentException("illegal property value [" + propertyName + "]. valid values are " +

0 commit comments

Comments
 (0)