From c7da98f36bfd4804ecfefac33064124767c2b137 Mon Sep 17 00:00:00 2001 From: Michael Froh Date: Fri, 20 Sep 2024 15:21:39 -0700 Subject: [PATCH] Avoid infinite loop in flat_object parsing (#15985) * Avoid infinite loop in flat_object parsing We had logic in flat_object parsing that would: 1. Try parsing a flat object field that is not an object or null. 2. Would see an END_ARRAY token, ignore it, and not advance the parser. Combined, this would create a scenario where passing an array of strings for a flat_object would parse the string values, then loop infinitely on the END_ARRAY token. Signed-off-by: Michael Froh * Remove some unused code and add more tests The removed code does not actually seem to affect the logic. Also, I want to be 100% sure that every call to parseToken is guaranteed to call parser.nextToken() at some point. Signed-off-by: Michael Froh * Remove unused parameter from parseToken Thanks for the reminder, @kkewwei! Signed-off-by: Michael Froh * Add skip for newly-added test The test fails on MixedClusterClientYamlTestSuiteIT because 2.x still has the infinite loop until backport. Signed-off-by: Michael Froh --------- Signed-off-by: Michael Froh (cherry picked from commit 05dab3b7eb54a361af3583a322f0a748d6412836) --- CHANGELOG.md | 1 + .../test/index/90_flat_object.yml | 48 +++++++++++++++- .../xcontent/JsonToStringXContentParser.java | 37 +++--------- .../index/mapper/FlatObjectFieldMapper.java | 57 +++++++++++-------- 4 files changed, 90 insertions(+), 53 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f8494f37a5cfd..b6ae0c3b83f30 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Fixed - Fix wildcard query containing escaped character ([#15737](https://github.com/opensearch-project/OpenSearch/pull/15737)) - Add validation for the search backpressure cancellation settings ([#15501](https://github.com/opensearch-project/OpenSearch/pull/15501)) +- Avoid infinite loop when `flat_object` field contains invalid token ([#15985](https://github.com/opensearch-project/OpenSearch/pull/15985)) ### Security diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml index 83d3d273ebd93..c0fc0090abedf 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/index/90_flat_object.yml @@ -62,7 +62,6 @@ setup: }, "required_matches": 1 } - # Do index refresh - do: indices.refresh: @@ -74,7 +73,52 @@ teardown: - do: indices.delete: index: test - +--- +"Invalid docs": + - skip: + version: "- 2.99.99" + reason: "parsing of these objects would infinite loop prior to 2.18" + # The following documents are invalid. + - do: + catch: /parsing_exception/ + index: + index: test + id: 3 + body: { + "ISBN13": "V12154942123242", + "catalog": [ "Arrays in Action" ], + "required_matches": 1 + } + - do: + catch: /parsing_exception/ + index: + index: test + id: 3 + body: { + "ISBN13": "V12154942123242", + "catalog": "Strings in Action", + "required_matches": 1 + } + - do: + catch: /parsing_exception/ + index: + index: test + id: 3 + body: { + "ISBN13": "V12154942123242", + "catalog": 12345, + "required_matches": 1 + } + - do: + catch: /parsing_exception/ + index: + index: test + id: 3 + body: { + "ISBN13": "V12154942123242", + "catalog": [ 12345 ], + "required_matches": 1 + } --- # Verify that mappings under the catalog field did not expand # and no dynamic fields were created. diff --git a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java index 2f60fc8f69f87..95a8d9c9495f2 100644 --- a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java +++ b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java @@ -9,6 +9,7 @@ package org.opensearch.common.xcontent; import org.opensearch.common.xcontent.json.JsonXContent; +import org.opensearch.core.common.ParsingException; import org.opensearch.core.common.Strings; import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.core.xcontent.AbstractXContentParser; @@ -73,7 +74,7 @@ public XContentParser parseObject() throws IOException { builder.startObject(); LinkedList path = new LinkedList<>(Collections.singleton(fieldTypeName)); while (currentToken() != Token.END_OBJECT) { - parseToken(path, null); + parseToken(path); } // deduplication the fieldName,valueList,valueAndPathList builder.field(this.fieldTypeName, new HashSet<>(keyList)); @@ -87,14 +88,11 @@ public XContentParser parseObject() throws IOException { /** * @return true if the child object contains no_null value, false otherwise */ - private boolean parseToken(Deque path, String currentFieldName) throws IOException { - if (path.size() == 1 && processNoNestedValue()) { - return true; - } + private boolean parseToken(Deque path) throws IOException { boolean isChildrenValueValid = false; boolean visitFieldName = false; if (this.parser.currentToken() == Token.FIELD_NAME) { - currentFieldName = this.parser.currentName(); + final String currentFieldName = this.parser.currentName(); path.addLast(currentFieldName); // Pushing onto the stack *must* be matched by pop visitFieldName = true; String parts = currentFieldName; @@ -106,23 +104,21 @@ private boolean parseToken(Deque path, String currentFieldName) throws I } this.keyList.add(parts); // parts has no dot, so either it's the original fieldName or it's the last part this.parser.nextToken(); // advance to the value of fieldName - isChildrenValueValid = parseToken(path, currentFieldName); // parse the value for fieldName (which will be an array, an object, - // or a primitive value) + isChildrenValueValid = parseToken(path); // parse the value for fieldName (which will be an array, an object, + // or a primitive value) path.removeLast(); // Here is where we pop fieldName from the stack (since we're done with the value of fieldName) // Note that whichever other branch we just passed through has already ended with nextToken(), so we // don't need to call it. } else if (this.parser.currentToken() == Token.START_ARRAY) { parser.nextToken(); while (this.parser.currentToken() != Token.END_ARRAY) { - isChildrenValueValid |= parseToken(path, currentFieldName); + isChildrenValueValid |= parseToken(path); } this.parser.nextToken(); - } else if (this.parser.currentToken() == Token.END_ARRAY) { - // skip } else if (this.parser.currentToken() == Token.START_OBJECT) { parser.nextToken(); while (this.parser.currentToken() != Token.END_OBJECT) { - isChildrenValueValid |= parseToken(path, currentFieldName); + isChildrenValueValid |= parseToken(path); } this.parser.nextToken(); } else { @@ -148,21 +144,6 @@ public void removeKeyOfNullValue() { this.keyList.remove(keyList.size() - 1); } - private boolean processNoNestedValue() throws IOException { - if (parser.currentToken() == Token.VALUE_NULL) { - return true; - } else if (this.parser.currentToken() == Token.VALUE_STRING - || this.parser.currentToken() == Token.VALUE_NUMBER - || this.parser.currentToken() == Token.VALUE_BOOLEAN) { - String value = this.parser.textOrNull(); - if (value != null) { - this.valueList.add(value); - } - return true; - } - return false; - } - private String parseValue() throws IOException { switch (this.parser.currentToken()) { case VALUE_BOOLEAN: @@ -172,7 +153,7 @@ private String parseValue() throws IOException { return this.parser.textOrNull(); // Handle other token types as needed default: - throw new IOException("Unsupported value token type [" + parser.currentToken() + "]"); + throw new ParsingException(parser.getTokenLocation(), "Unexpected value token type [" + parser.currentToken() + "]"); } } diff --git a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java index bf8f83e1b95df..738efcfafdca1 100644 --- a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java @@ -30,6 +30,7 @@ import org.opensearch.common.lucene.Lucene; import org.opensearch.common.lucene.search.AutomatonQueries; import org.opensearch.common.xcontent.JsonToStringXContentParser; +import org.opensearch.core.common.ParsingException; import org.opensearch.core.xcontent.DeprecationHandler; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.core.xcontent.XContentParser; @@ -568,31 +569,41 @@ protected void parseCreateField(ParseContext context) throws IOException { if (context.externalValueSet()) { String value = context.externalValue().toString(); parseValueAddFields(context, value, fieldType().name()); - } else if (context.parser().currentToken() != XContentParser.Token.VALUE_NULL) { - JsonToStringXContentParser jsonToStringParser = new JsonToStringXContentParser( - NamedXContentRegistry.EMPTY, - DeprecationHandler.IGNORE_DEPRECATIONS, - context.parser(), - fieldType().name() - ); - /* - JsonToStringParser is the main parser class to transform JSON into stringFields in a XContentParser - It reads the JSON object and parsed to a list of string - */ - XContentParser parser = jsonToStringParser.parseObject(); - - XContentParser.Token currentToken; - while ((currentToken = parser.nextToken()) != XContentParser.Token.END_OBJECT) { - switch (currentToken) { - case FIELD_NAME: - fieldName = parser.currentName(); - break; - case VALUE_STRING: - String value = parser.textOrNull(); - parseValueAddFields(context, value, fieldName); - break; + } else { + XContentParser ctxParser = context.parser(); + if (ctxParser.currentToken() != XContentParser.Token.VALUE_NULL) { + if (ctxParser.currentToken() != XContentParser.Token.START_OBJECT) { + throw new ParsingException( + ctxParser.getTokenLocation(), + "[" + this.name() + "] unexpected token [" + ctxParser.currentToken() + "] in flat_object field value" + ); } + JsonToStringXContentParser jsonToStringParser = new JsonToStringXContentParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.IGNORE_DEPRECATIONS, + ctxParser, + fieldType().name() + ); + /* + JsonToStringParser is the main parser class to transform JSON into stringFields in a XContentParser + It reads the JSON object and parsed to a list of string + */ + XContentParser parser = jsonToStringParser.parseObject(); + + XContentParser.Token currentToken; + while ((currentToken = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + switch (currentToken) { + case FIELD_NAME: + fieldName = parser.currentName(); + break; + case VALUE_STRING: + String value = parser.textOrNull(); + parseValueAddFields(context, value, fieldName); + break; + } + + } } }