Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/93179.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 93179
summary: Making `JsonProcessor` stricter so that it does not silently drop data
area: Ingest Node
type: bug
issues:
- 92898
1 change: 1 addition & 0 deletions docs/reference/ingest/processors/json.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Converts a JSON string into a structured JSON object.
| `add_to_root` | no | false | Flag that forces the parsed JSON to be added at the top level of the document. `target_field` must not be set when this option is chosen.
| `add_to_root_conflict_strategy` | no | `replace` | When set to `replace`, root fields that conflict with fields from the parsed JSON will be overridden. When set to `merge`, conflicting fields will be merged. Only applicable if `add_to_root` is set to `true`.
| `allow_duplicate_keys` | no | false | When set to `true`, the JSON parser will not fail if the JSON contains duplicate keys. Instead, the last encountered value for any duplicate key wins.
| `strict_json_parsing` | no | true | When set to `true`, the JSON parser will strictly parse the field value. When set to `false`, the JSON parser will be more lenient but also more likely to drop parts of the field value. For example if `strict_json_parsing` is set to `true` and the field value is `123 "foo"` then the processor will throw an IllegalArgumentException. But if `strict_json_parsing` is set to `false` then the field value will be parsed as `123`.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With the default being strict, does this count as a breaking change now or are we classifying this as a bug?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought we had agreed it was a bug.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Couldn't remember. If so, all good!

include::common-options.asciidoc[]
|======

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.core.Strings;
import org.elasticsearch.ingest.AbstractProcessor;
import org.elasticsearch.ingest.ConfigurationUtils;
import org.elasticsearch.ingest.IngestDocument;
Expand All @@ -32,12 +33,14 @@
public final class JsonProcessor extends AbstractProcessor {

public static final String TYPE = "json";
private static final String STRICT_JSON_PARSING_PARAMETER = "strict_json_parsing";

private final String field;
private final String targetField;
private final boolean addToRoot;
private final ConflictStrategy addToRootConflictStrategy;
private final boolean allowDuplicateKeys;
private final boolean strictJsonParsing;

JsonProcessor(
String tag,
Expand All @@ -47,13 +50,27 @@ public final class JsonProcessor extends AbstractProcessor {
boolean addToRoot,
ConflictStrategy addToRootConflictStrategy,
boolean allowDuplicateKeys
) {
this(tag, description, field, targetField, addToRoot, addToRootConflictStrategy, allowDuplicateKeys, true);
}

JsonProcessor(
String tag,
String description,
String field,
String targetField,
boolean addToRoot,
ConflictStrategy addToRootConflictStrategy,
boolean allowDuplicateKeys,
boolean strictJsonParsing
) {
super(tag, description);
this.field = field;
this.targetField = targetField;
this.addToRoot = addToRoot;
this.addToRootConflictStrategy = addToRootConflictStrategy;
this.allowDuplicateKeys = allowDuplicateKeys;
this.strictJsonParsing = strictJsonParsing;
}

public String getField() {
Expand All @@ -72,7 +89,7 @@ public ConflictStrategy getAddToRootConflictStrategy() {
return addToRootConflictStrategy;
}

public static Object apply(Object fieldValue, boolean allowDuplicateKeys) {
public static Object apply(Object fieldValue, boolean allowDuplicateKeys, boolean strictJsonParsing) {
BytesReference bytesRef = fieldValue == null ? new BytesArray("null") : new BytesArray(fieldValue.toString());
try (
InputStream stream = bytesRef.streamInput();
Expand All @@ -96,14 +113,42 @@ public static Object apply(Object fieldValue, boolean allowDuplicateKeys) {
} else if (token == XContentParser.Token.VALUE_EMBEDDED_OBJECT) {
throw new IllegalArgumentException("cannot read binary value");
}
if (strictJsonParsing) {
String errorMessage = Strings.format(
"The input %s is not valid JSON and the %s parameter is true",
fieldValue,
STRICT_JSON_PARSING_PARAMETER
);
/*
* If strict JSON parsing is disabled, then once we've found the first token then we move on. For example for the string
* "123 \"foo\"" we would just return the first token, 123. However, if strict parsing is enabled (which it is by default),
* then we check to see whether there are any more tokens at this point. We expect the next token to be null. If there is
* another token or if the parser blows up, then we know we had invalid JSON and we alert the user with an
* IllegalArgumentException.
*/
try {
token = parser.nextToken();
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(errorMessage, e);
}
if (token != null) {
throw new IllegalArgumentException(errorMessage);
}
}
return value;
} catch (IOException e) {
throw new IllegalArgumentException(e);
}
}

public static void apply(Map<String, Object> ctx, String fieldName, boolean allowDuplicateKeys, ConflictStrategy conflictStrategy) {
Object value = apply(ctx.get(fieldName), allowDuplicateKeys);
public static void apply(
Map<String, Object> ctx,
String fieldName,
boolean allowDuplicateKeys,
ConflictStrategy conflictStrategy,
boolean strictJsonParsing
) {
Object value = apply(ctx.get(fieldName), allowDuplicateKeys, strictJsonParsing);
if (value instanceof Map) {
@SuppressWarnings("unchecked")
Map<String, Object> map = (Map<String, Object>) value;
Expand Down Expand Up @@ -140,9 +185,9 @@ public static void recursiveMerge(Map<String, Object> target, Map<String, Object
@Override
public IngestDocument execute(IngestDocument document) throws Exception {
if (addToRoot) {
apply(document.getSourceAndMetadata(), field, allowDuplicateKeys, addToRootConflictStrategy);
apply(document.getSourceAndMetadata(), field, allowDuplicateKeys, addToRootConflictStrategy, strictJsonParsing);
} else {
document.setFieldValue(targetField, apply(document.getFieldValue(field, Object.class), allowDuplicateKeys));
document.setFieldValue(targetField, apply(document.getFieldValue(field, Object.class), allowDuplicateKeys, strictJsonParsing));
}
return document;
}
Expand Down Expand Up @@ -217,6 +262,7 @@ public JsonProcessor create(
"Cannot set `add_to_root_conflict_strategy` if `add_to_root` is false"
);
}
boolean strictParsing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, STRICT_JSON_PARSING_PARAMETER, true);

if (targetField == null) {
targetField = field;
Expand All @@ -229,7 +275,8 @@ public JsonProcessor create(
targetField,
addToRoot,
addToRootConflictStrategy,
allowDuplicateKeys
allowDuplicateKeys,
strictParsing
);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,19 @@ public static String uppercase(String value) {
* @return structured JSON object
*/
public static Object json(Object fieldValue) {
return JsonProcessor.apply(fieldValue, false);
return JsonProcessor.apply(fieldValue, false, true);
}

/**
* Uses {@link JsonProcessor} to convert a JSON string to a structured JSON
* object. This method is a more lenient version of {@link #json(Object)}. For example if given fieldValue "123 foo",
* this method will return 123 rather than throwing an IllegalArgumentException.
*
* @param fieldValue JSON string
* @return structured JSON object
*/
public static Object jsonLenient(Object fieldValue) {
return JsonProcessor.apply(fieldValue, false, false);
}

/**
Expand All @@ -72,7 +84,22 @@ public static Object json(Object fieldValue) {
* contains the JSON string
*/
public static void json(Map<String, Object> map, String field) {
JsonProcessor.apply(map, field, false, JsonProcessor.ConflictStrategy.REPLACE);
JsonProcessor.apply(map, field, false, JsonProcessor.ConflictStrategy.REPLACE, true);
}

/**
* Uses {@link JsonProcessor} to convert a JSON string to a structured JSON
* object. This method is a more lenient version of {@link #json(Map, String)}. For example if given fieldValue
* "{"foo":"bar"} 123",
* this method will return a map with key-vale pair "foo" and "bar" rather than throwing an IllegalArgumentException.
*
* @param map map that contains the JSON string and will receive the
* structured JSON content
* @param field key that identifies the entry in <code>map</code> that
* contains the JSON string
*/
public static void jsonLenient(Map<String, Object> map, String field) {
JsonProcessor.apply(map, field, false, JsonProcessor.ConflictStrategy.REPLACE, false);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ class org.elasticsearch.ingest.common.Processors {
String lowercase(String)
String uppercase(String)
Object json(Object)
Object jsonLenient(Object)
void json(Map, String)
void jsonLenient(Map, String)
String urlDecode(String)
String communityId(String, String, Object, Object, Object, Object, Object, Object, int)
String communityId(String, String, Object, Object, Object, Object, Object, Object)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@

import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.ingest.IngestDocument;
import org.elasticsearch.test.ESTestCase;

import java.util.HashMap;
import java.util.Map;

import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;

public class JsonProcessorFactoryTests extends ESTestCase {
Expand Down Expand Up @@ -68,6 +70,29 @@ public void testCreateWithMissingField() throws Exception {
assertThat(exception.getMessage(), equalTo("[field] required property is missing"));
}

public void testCreateWithStrictParsingParameter() throws Exception {
String fieldName = randomAlphaOfLength(10);
String processorTag = randomAlphaOfLength(10);
IngestDocument document = new IngestDocument("_index", "_id", 1, null, null, Map.of(fieldName, "123 \"foo\""));

{
Map<String, Object> strictConfig = new HashMap<>();
strictConfig.put("field", fieldName);
JsonProcessor strictProcessor = FACTORY.create(null, processorTag, null, strictConfig);
IllegalArgumentException exception = expectThrows(IllegalArgumentException.class, () -> strictProcessor.execute(document));
assertThat(exception.getMessage(), containsString("is not valid JSON and the strict_json_parsing parameter is true"));
}

{
Map<String, Object> lenientConfig = new HashMap<>();
lenientConfig.put("field", fieldName);
lenientConfig.put("strict_json_parsing", false);
JsonProcessor lenientProcessor = FACTORY.create(null, processorTag, null, lenientConfig);
IngestDocument result = lenientProcessor.execute(document);
assertThat(result.getSource().get(fieldName), equalTo(123));
}
}

public void testCreateWithBothTargetFieldAndAddToRoot() throws Exception {
String randomField = randomAlphaOfLength(10);
String randomTargetField = randomAlphaOfLength(5);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,14 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import static org.elasticsearch.ingest.common.JsonProcessor.ConflictStrategy.MERGE;
import static org.elasticsearch.ingest.common.JsonProcessor.ConflictStrategy.REPLACE;
import static org.hamcrest.Matchers.closeTo;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;

public class JsonProcessorTests extends ESTestCase {

Expand Down Expand Up @@ -234,4 +237,86 @@ public void testAddBoolToRoot() {
Exception exception = expectThrows(IllegalArgumentException.class, () -> jsonProcessor.execute(ingestDocument));
assertThat(exception.getMessage(), containsString("cannot add non-map fields to root of document"));
}

@SuppressWarnings({ "unchecked", "rawtypes" })
public void testApply() {
{
Object result = JsonProcessor.apply("{\"foo\":\"bar\"}", true, true);
assertThat(result, instanceOf(Map.class));
Map resultMap = (Map) result;
assertThat(resultMap.size(), equalTo(1));
assertThat(resultMap.get("foo"), equalTo("bar"));
}
{
Object result = JsonProcessor.apply("\"foo\"", true, true);
assertThat(result, instanceOf(String.class));
assertThat(result, equalTo("foo"));
}
{
boolean boolValue = randomBoolean();
Object result = JsonProcessor.apply(Boolean.toString(boolValue), true, true);
assertThat(result, instanceOf(Boolean.class));
assertThat(result, equalTo(boolValue));
}
{
double value = randomDouble();
Object result = JsonProcessor.apply(Double.toString(value), true, true);
assertThat(result, instanceOf(Double.class));
assertThat((double) result, closeTo(value, .001));
}
{
List<Double> list = randomList(10, ESTestCase::randomDouble);
String value = list.stream().map(val -> Double.toString(val)).collect(Collectors.joining(",", "[", "]"));
Object result = JsonProcessor.apply(value, true, true);
assertThat(result, instanceOf(List.class));
List<Double> resultList = (List<Double>) result;
assertThat(resultList.size(), equalTo(list.size()));
for (int i = 0; i < list.size(); i++) {
assertThat(resultList.get(i), closeTo(list.get(i), .001));
}
}
}

@SuppressWarnings({ "unchecked", "rawtypes" })
public void testApplyWithInvalidJson() {
/*
* The following fail whether strictJsonParsing is set to true or false. The reason is that even the first token cannot be parsed
* as JSON (since the first token is a not a primitive or an object -- just characters not in quotes).
*/
expectThrows(IllegalArgumentException.class, () -> JsonProcessor.apply("foo", true, true));
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are these expected to fail? A comment would help.

expectThrows(IllegalArgumentException.class, () -> JsonProcessor.apply("foo", true, false));
expectThrows(IllegalArgumentException.class, () -> JsonProcessor.apply("foo [360113.865822] wbrdg-0afe001ce", true, true));
expectThrows(IllegalArgumentException.class, () -> JsonProcessor.apply("foo [360113.865822] wbrdg-0afe001ce", true, false));

/*
* The following are examples of malformed json but the first part of each is valid json. Previously apply parsed just the first
* token and ignored the rest, but it now throw an IllegalArgumentException unless strictJsonParsing is set to false. See
* https://github.com/elastic/elasticsearch/issues/92898.
*/
expectThrows(IllegalArgumentException.class, () -> JsonProcessor.apply("123 foo", true, true));
expectThrows(IllegalArgumentException.class, () -> JsonProcessor.apply("45 this is {\"a\": \"json\"}", true, true));

{
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are all good. Maybe split into a separate test just to make it clear that we're testing for something different than the code above? Basically, maybe the "strict validation" stuff could be its own test.

expectThrows(IllegalArgumentException.class, () -> JsonProcessor.apply("[360113.865822] wbrdg-0afe001ce", true, true));
Object result = JsonProcessor.apply("[360113.865822] wbrdg-0afe001ce", true, false);
assertThat(result, instanceOf(List.class));
List<Double> resultList = (List<Double>) result;
assertThat(resultList.size(), equalTo(1));
assertThat(resultList.get(0), closeTo(360113.865822, .001));
}
{
expectThrows(IllegalArgumentException.class, () -> JsonProcessor.apply("{\"foo\":\"bar\"} wbrdg-0afe00e", true, true));
Object result = JsonProcessor.apply("{\"foo\":\"bar\"} wbrdg-0afe00e", true, false);
assertThat(result, instanceOf(Map.class));
Map resultMap = (Map) result;
assertThat(resultMap.size(), equalTo(1));
assertThat(resultMap.get("foo"), equalTo("bar"));
}
{
expectThrows(IllegalArgumentException.class, () -> JsonProcessor.apply(" 1268 : TimeOut = 123 : a", true, true));
Object result = JsonProcessor.apply(" 1268 : TimeOut = 123 : a", true, false);
assertThat(result, instanceOf(Integer.class));
assertThat(result, equalTo(1268));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ teardown:
ingest.delete_pipeline:
id: "3"
ignore: 404
- do:
ingest.delete_pipeline:
id: "4"
ignore: 404

---
"Test JSON Processor":
Expand Down Expand Up @@ -150,3 +154,35 @@ teardown:
id: "3"
- match: { _source.foo.bar: "baz" }
- match: { _source.foo.qux: "quux" }

---
"Test JSON Processor lenient parsing":
- do:
ingest.put_pipeline:
id: "4"
body: {
"processors": [
{
"json" : {
"field" : "message",
"strict_json_parsing": false
}
}
]
}
- match: { acknowledged: true }

- do:
index:
index: test
id: "4"
pipeline: "4"
body: {
message: "123 foo"
}

- do:
get:
index: test
id: "4"
- match: { _source.message: 123 }