diff --git a/presto-common/src/main/java/com/facebook/presto/common/function/SqlFunctionProperties.java b/presto-common/src/main/java/com/facebook/presto/common/function/SqlFunctionProperties.java index 0645910c7494..9df746537e00 100644 --- a/presto-common/src/main/java/com/facebook/presto/common/function/SqlFunctionProperties.java +++ b/presto-common/src/main/java/com/facebook/presto/common/function/SqlFunctionProperties.java @@ -37,6 +37,7 @@ public class SqlFunctionProperties private final boolean legacyJsonCast; private final Map extraCredentials; private final boolean warnOnCommonNanPatterns; + private final boolean canonicalizedJsonExtract; private SqlFunctionProperties( boolean parseDecimalLiteralAsDouble, @@ -50,7 +51,8 @@ private SqlFunctionProperties( boolean fieldNamesInJsonCastEnabled, boolean legacyJsonCast, Map extraCredentials, - boolean warnOnCommonNanPatterns) + boolean warnOnCommonNanPatterns, + boolean canonicalizedJsonExtract) { this.parseDecimalLiteralAsDouble = parseDecimalLiteralAsDouble; this.legacyRowFieldOrdinalAccessEnabled = legacyRowFieldOrdinalAccessEnabled; @@ -64,6 +66,7 @@ private SqlFunctionProperties( this.legacyJsonCast = legacyJsonCast; this.extraCredentials = requireNonNull(extraCredentials, "extraCredentials is null"); this.warnOnCommonNanPatterns = warnOnCommonNanPatterns; + this.canonicalizedJsonExtract = canonicalizedJsonExtract; } public boolean isParseDecimalLiteralAsDouble() @@ -127,6 +130,9 @@ public boolean shouldWarnOnCommonNanPatterns() return warnOnCommonNanPatterns; } + public boolean isCanonicalizedJsonExtract() + { return canonicalizedJsonExtract; } + @Override public boolean equals(Object o) { @@ -146,7 +152,8 @@ public boolean equals(Object o) Objects.equals(sessionLocale, that.sessionLocale) && Objects.equals(sessionUser, that.sessionUser) && Objects.equals(extraCredentials, that.extraCredentials) && - Objects.equals(legacyJsonCast, that.legacyJsonCast); + Objects.equals(legacyJsonCast, that.legacyJsonCast) && + Objects.equals(canonicalizedJsonExtract, that.canonicalizedJsonExtract); } @Override @@ -154,7 +161,7 @@ public int hashCode() { return Objects.hash(parseDecimalLiteralAsDouble, legacyRowFieldOrdinalAccessEnabled, timeZoneKey, legacyTimestamp, legacyMapSubscript, sessionStartTime, sessionLocale, sessionUser, - extraCredentials, legacyJsonCast); + extraCredentials, legacyJsonCast, canonicalizedJsonExtract); } public static Builder builder() @@ -176,6 +183,7 @@ public static class Builder private boolean legacyJsonCast; private Map extraCredentials = emptyMap(); private boolean warnOnCommonNanPatterns; + private boolean canonicalizedJsonExtract; private Builder() {} @@ -251,6 +259,12 @@ public Builder setWarnOnCommonNanPatterns(boolean warnOnCommonNanPatterns) return this; } + public Builder setCanonicalizedJsonExtract(boolean canonicalizedJsonExtract) + { + this.canonicalizedJsonExtract = canonicalizedJsonExtract; + return this; + } + public SqlFunctionProperties build() { return new SqlFunctionProperties( @@ -265,7 +279,8 @@ public SqlFunctionProperties build() fieldNamesInJsonCastEnabled, legacyJsonCast, extraCredentials, - warnOnCommonNanPatterns); + warnOnCommonNanPatterns, + canonicalizedJsonExtract); } } } diff --git a/presto-main-base/src/main/java/com/facebook/presto/Session.java b/presto-main-base/src/main/java/com/facebook/presto/Session.java index cc302e7253be..fd2d88c2707b 100644 --- a/presto-main-base/src/main/java/com/facebook/presto/Session.java +++ b/presto-main-base/src/main/java/com/facebook/presto/Session.java @@ -57,6 +57,7 @@ import java.util.stream.Collectors; import static com.facebook.presto.SystemSessionProperties.LEGACY_JSON_CAST; +import static com.facebook.presto.SystemSessionProperties.isCanonicalizedJsonExtract; import static com.facebook.presto.SystemSessionProperties.isFieldNameInJsonCastEnabled; import static com.facebook.presto.SystemSessionProperties.isLegacyMapSubscript; import static com.facebook.presto.SystemSessionProperties.isLegacyRowFieldOrdinalAccessEnabled; @@ -481,6 +482,7 @@ public SqlFunctionProperties getSqlFunctionProperties() .setLegacyJsonCast(legacyJsonCast) .setExtraCredentials(identity.getExtraCredentials()) .setWarnOnCommonNanPatterns(warnOnCommonNanPatterns(this)) + .setCanonicalizedJsonExtract(isCanonicalizedJsonExtract(this)) .build(); } diff --git a/presto-main-base/src/main/java/com/facebook/presto/SystemSessionProperties.java b/presto-main-base/src/main/java/com/facebook/presto/SystemSessionProperties.java index 2b2e935c044e..fe1b35cc3c36 100644 --- a/presto-main-base/src/main/java/com/facebook/presto/SystemSessionProperties.java +++ b/presto-main-base/src/main/java/com/facebook/presto/SystemSessionProperties.java @@ -305,6 +305,7 @@ public final class SystemSessionProperties public static final String REWRITE_CASE_TO_MAP_ENABLED = "rewrite_case_to_map_enabled"; public static final String FIELD_NAMES_IN_JSON_CAST_ENABLED = "field_names_in_json_cast_enabled"; public static final String LEGACY_JSON_CAST = "legacy_json_cast"; + public static final String CANONICALIZED_JSON_EXTRACT = "canonicalized_json_extract"; public static final String PULL_EXPRESSION_FROM_LAMBDA_ENABLED = "pull_expression_from_lambda_enabled"; public static final String REWRITE_CONSTANT_ARRAY_CONTAINS_TO_IN_EXPRESSION = "rewrite_constant_array_contains_to_in_expression"; public static final String INFER_INEQUALITY_PREDICATES = "infer_inequality_predicates"; @@ -1640,6 +1641,11 @@ public SystemSessionProperties( "Keep the legacy json cast behavior, do not reserve the case for field names when casting to row type", functionsConfig.isLegacyJsonCast(), true), + booleanProperty( + CANONICALIZED_JSON_EXTRACT, + "Extracts json data in a canonicalized manner, and raises a PrestoException when encountering invalid json structures within the input json path", + functionsConfig.isCanonicalizedJsonExtract(), + true), booleanProperty( OPTIMIZE_JOIN_PROBE_FOR_EMPTY_BUILD_RUNTIME, "Optimize join probe at runtime if build side is empty", @@ -3178,4 +3184,9 @@ public static boolean isEnabledAddExchangeBelowGroupId(Session session) { return session.getSystemProperty(ADD_EXCHANGE_BELOW_PARTIAL_AGGREGATION_OVER_GROUP_ID, Boolean.class); } + + public static boolean isCanonicalizedJsonExtract(Session session) + { + return session.getSystemProperty(CANONICALIZED_JSON_EXTRACT, Boolean.class); + } } diff --git a/presto-main-base/src/main/java/com/facebook/presto/operator/scalar/JsonExtract.java b/presto-main-base/src/main/java/com/facebook/presto/operator/scalar/JsonExtract.java index dcd04cee7b88..d2ffb8b65df5 100644 --- a/presto-main-base/src/main/java/com/facebook/presto/operator/scalar/JsonExtract.java +++ b/presto-main-base/src/main/java/com/facebook/presto/operator/scalar/JsonExtract.java @@ -13,6 +13,8 @@ */ package com.facebook.presto.operator.scalar; +import com.facebook.airlift.json.JsonObjectMapperProvider; +import com.facebook.presto.common.function.SqlFunctionProperties; import com.facebook.presto.spi.PrestoException; import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; @@ -20,12 +22,14 @@ import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonToken; import com.fasterxml.jackson.core.io.SerializedString; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import io.airlift.slice.DynamicSliceOutput; import io.airlift.slice.Slice; import java.io.IOException; import java.io.InputStream; +import java.io.OutputStream; import java.io.UncheckedIOException; import static com.facebook.presto.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT; @@ -38,6 +42,7 @@ import static com.fasterxml.jackson.core.JsonToken.START_ARRAY; import static com.fasterxml.jackson.core.JsonToken.START_OBJECT; import static com.fasterxml.jackson.core.JsonToken.VALUE_NULL; +import static com.fasterxml.jackson.databind.SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS; import static io.airlift.slice.Slices.utf8Slice; import static java.util.Objects.requireNonNull; @@ -121,13 +126,15 @@ public final class JsonExtract private static final JsonFactory JSON_FACTORY = new JsonFactory() .disable(CANONICALIZE_FIELD_NAMES); + private static final ObjectMapper SORTED_MAPPER = new JsonObjectMapperProvider().get().configure(ORDER_MAP_ENTRIES_BY_KEYS, true); + private JsonExtract() {} - public static T extract(Slice jsonInput, JsonExtractor jsonExtractor) + public static T extract(Slice jsonInput, JsonExtractor jsonExtractor, SqlFunctionProperties properties) { requireNonNull(jsonInput, "jsonInput is null"); try { - return jsonExtractor.extract(jsonInput.getInput()); + return jsonExtractor.extract(jsonInput.getInput(), properties); } catch (JsonParseException e) { // Return null if we failed to parse something @@ -156,7 +163,7 @@ public static PrestoJsonExtractor generateExtractor(String path, PrestoJs public interface JsonExtractor { - T extract(InputStream inputStream) + T extract(InputStream inputStream, SqlFunctionProperties properties) throws IOException; } @@ -174,11 +181,11 @@ public abstract static class PrestoJsonExtractor * * @return the value, or null if not applicable */ - abstract T extract(JsonParser jsonParser) + abstract T extract(JsonParser jsonParser, SqlFunctionProperties properties) throws IOException; @Override - public T extract(InputStream inputStream) + public T extract(InputStream inputStream, SqlFunctionProperties properties) throws IOException { try (JsonParser jsonParser = createJsonParser(JSON_FACTORY, inputStream)) { @@ -187,7 +194,7 @@ public T extract(InputStream inputStream) return null; } - return extract(jsonParser); + return extract(jsonParser, properties); } } } @@ -214,21 +221,21 @@ public ObjectFieldJsonExtractor(String fieldName, PrestoJsonExtractor { @Override - public Slice extract(JsonParser jsonParser) + public Slice extract(JsonParser jsonParser, SqlFunctionProperties properties) throws IOException { JsonToken token = jsonParser.getCurrentToken(); @@ -296,13 +303,31 @@ public static class JsonValueJsonExtractor extends PrestoJsonExtractor { @Override - public Slice extract(JsonParser jsonParser) + public Slice extract(JsonParser jsonParser, SqlFunctionProperties properties) throws IOException { if (!jsonParser.hasCurrentToken()) { throw new JsonParseException(jsonParser, "Unexpected end of value"); } + if (!properties.isCanonicalizedJsonExtract()) { + return legacyExtract(jsonParser); + } + DynamicSliceOutput dynamicSliceOutput = new DynamicSliceOutput(ESTIMATED_JSON_OUTPUT_SIZE); + // Write the JSON to output stream with sorted keys + SORTED_MAPPER.writeValue((OutputStream) dynamicSliceOutput, SORTED_MAPPER.readValue(jsonParser, Object.class)); + // nextToken will throw an exception if there are trailing characters. + try { + jsonParser.nextToken(); + } + catch (JsonParseException e) { + throw new PrestoException(INVALID_FUNCTION_ARGUMENT, e.getMessage()); + } + return dynamicSliceOutput.slice(); + } + public Slice legacyExtract(JsonParser jsonParser) + throws IOException + { DynamicSliceOutput dynamicSliceOutput = new DynamicSliceOutput(ESTIMATED_JSON_OUTPUT_SIZE); try (JsonGenerator jsonGenerator = createJsonGenerator(JSON_FACTORY, dynamicSliceOutput)) { jsonGenerator.copyCurrentStructure(jsonParser); @@ -315,7 +340,7 @@ public static class JsonSizeExtractor extends PrestoJsonExtractor { @Override - public Long extract(JsonParser jsonParser) + public Long extract(JsonParser jsonParser, SqlFunctionProperties properties) throws IOException { if (!jsonParser.hasCurrentToken()) { diff --git a/presto-main-base/src/main/java/com/facebook/presto/operator/scalar/JsonFunctions.java b/presto-main-base/src/main/java/com/facebook/presto/operator/scalar/JsonFunctions.java index 146ac74b908f..339ff5c10dba 100644 --- a/presto-main-base/src/main/java/com/facebook/presto/operator/scalar/JsonFunctions.java +++ b/presto-main-base/src/main/java/com/facebook/presto/operator/scalar/JsonFunctions.java @@ -435,51 +435,51 @@ public static Slice jsonArrayGet(@SqlType(StandardTypes.JSON) Slice json, @SqlTy @SqlNullable @LiteralParameters("x") @SqlType("varchar(x)") - public static Slice varcharJsonExtractScalar(@SqlType("varchar(x)") Slice json, @SqlType(JsonPathType.NAME) JsonPath jsonPath) + public static Slice varcharJsonExtractScalar(SqlFunctionProperties properties, @SqlType("varchar(x)") Slice json, @SqlType(JsonPathType.NAME) JsonPath jsonPath) { - return JsonExtract.extract(json, jsonPath.getScalarExtractor()); + return JsonExtract.extract(json, jsonPath.getScalarExtractor(), properties); } @ScalarFunction @SqlNullable @SqlType(StandardTypes.VARCHAR) - public static Slice jsonExtractScalar(@SqlType(StandardTypes.JSON) Slice json, @SqlType(JsonPathType.NAME) JsonPath jsonPath) + public static Slice jsonExtractScalar(SqlFunctionProperties properties, @SqlType(StandardTypes.JSON) Slice json, @SqlType(JsonPathType.NAME) JsonPath jsonPath) { - return JsonExtract.extract(json, jsonPath.getScalarExtractor()); + return JsonExtract.extract(json, jsonPath.getScalarExtractor(), properties); } @ScalarFunction("json_extract") @LiteralParameters("x") @SqlNullable @SqlType(StandardTypes.JSON) - public static Slice varcharJsonExtract(@SqlType("varchar(x)") Slice json, @SqlType(JsonPathType.NAME) JsonPath jsonPath) + public static Slice varcharJsonExtract(SqlFunctionProperties properties, @SqlType("varchar(x)") Slice json, @SqlType(JsonPathType.NAME) JsonPath jsonPath) { - return JsonExtract.extract(json, jsonPath.getObjectExtractor()); + return JsonExtract.extract(json, jsonPath.getObjectExtractor(), properties); } @ScalarFunction @SqlNullable @SqlType(StandardTypes.JSON) - public static Slice jsonExtract(@SqlType(StandardTypes.JSON) Slice json, @SqlType(JsonPathType.NAME) JsonPath jsonPath) + public static Slice jsonExtract(SqlFunctionProperties properties, @SqlType(StandardTypes.JSON) Slice json, @SqlType(JsonPathType.NAME) JsonPath jsonPath) { - return JsonExtract.extract(json, jsonPath.getObjectExtractor()); + return JsonExtract.extract(json, jsonPath.getObjectExtractor(), properties); } @ScalarFunction("json_size") @LiteralParameters("x") @SqlNullable @SqlType(StandardTypes.BIGINT) - public static Long varcharJsonSize(@SqlType("varchar(x)") Slice json, @SqlType(JsonPathType.NAME) JsonPath jsonPath) + public static Long varcharJsonSize(SqlFunctionProperties properties, @SqlType("varchar(x)") Slice json, @SqlType(JsonPathType.NAME) JsonPath jsonPath) { - return JsonExtract.extract(json, jsonPath.getSizeExtractor()); + return JsonExtract.extract(json, jsonPath.getSizeExtractor(), properties); } @ScalarFunction @SqlNullable @SqlType(StandardTypes.BIGINT) - public static Long jsonSize(@SqlType(StandardTypes.JSON) Slice json, @SqlType(JsonPathType.NAME) JsonPath jsonPath) + public static Long jsonSize(SqlFunctionProperties properties, @SqlType(StandardTypes.JSON) Slice json, @SqlType(JsonPathType.NAME) JsonPath jsonPath) { - return JsonExtract.extract(json, jsonPath.getSizeExtractor()); + return JsonExtract.extract(json, jsonPath.getSizeExtractor(), properties); } public static Object getJsonObjectValue(Type valueType, SqlFunctionProperties properties, Block block, int position) diff --git a/presto-main-base/src/main/java/com/facebook/presto/operator/scalar/JsonPath.java b/presto-main-base/src/main/java/com/facebook/presto/operator/scalar/JsonPath.java index 2fc980e67b9d..fedafda1eb46 100644 --- a/presto-main-base/src/main/java/com/facebook/presto/operator/scalar/JsonPath.java +++ b/presto-main-base/src/main/java/com/facebook/presto/operator/scalar/JsonPath.java @@ -13,6 +13,7 @@ */ package com.facebook.presto.operator.scalar; +import com.facebook.presto.common.function.SqlFunctionProperties; import com.facebook.presto.spi.PrestoException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; @@ -42,7 +43,6 @@ private static JsonExtract.JsonExtractor getScalarExtractorForJayway(com. { return new JsonExtract.JsonExtractor() { - @Override public Slice extract(InputStream inputStream) throws IOException { @@ -52,6 +52,13 @@ public Slice extract(InputStream inputStream) } return utf8Slice(node.asText()); } + + @Override + public Slice extract(InputStream inputStream, SqlFunctionProperties properties) + throws IOException + { + return extract(inputStream); + } }; } @@ -59,7 +66,6 @@ private static JsonExtract.JsonExtractor getObjectExtractorForJayway(com. { return new JsonExtract.JsonExtractor() { - @Override public Slice extract(InputStream inputStream) throws IOException { @@ -69,6 +75,13 @@ public Slice extract(InputStream inputStream) } return utf8Slice(node.toString()); } + + @Override + public Slice extract(InputStream inputStream, SqlFunctionProperties properties) + throws IOException + { + return extract(inputStream); + } }; } @@ -76,7 +89,6 @@ private static JsonExtract.JsonExtractor getSizeExtractorForJayway(com.jay { return new JsonExtract.JsonExtractor() { - @Override public Long extract(InputStream inputStream) throws IOException { @@ -86,6 +98,13 @@ public Long extract(InputStream inputStream) } return (long) node.size(); // Jackson correctly returns 0 for scalar nodes } + + @Override + public Long extract(InputStream inputStream, SqlFunctionProperties properties) + throws IOException + { + return extract(inputStream); + } }; } diff --git a/presto-main-base/src/main/java/com/facebook/presto/sql/analyzer/FunctionsConfig.java b/presto-main-base/src/main/java/com/facebook/presto/sql/analyzer/FunctionsConfig.java index 542d09ba0047..4e64a20d467d 100644 --- a/presto-main-base/src/main/java/com/facebook/presto/sql/analyzer/FunctionsConfig.java +++ b/presto-main-base/src/main/java/com/facebook/presto/sql/analyzer/FunctionsConfig.java @@ -18,6 +18,7 @@ import com.facebook.presto.operator.aggregation.arrayagg.ArrayAggGroupImplementation; import com.facebook.presto.operator.aggregation.histogram.HistogramGroupImplementation; import com.facebook.presto.operator.aggregation.multimapagg.MultimapAggGroupImplementation; +import com.facebook.presto.spi.function.Description; import javax.validation.constraints.Min; @@ -47,6 +48,7 @@ public class FunctionsConfig private boolean warnOnPossibleNans; private boolean legacyCharToVarcharCoercion; private boolean legacyJsonCast = true; + private boolean canonicalizedJsonExtract; private String defaultNamespacePrefix = JAVA_BUILTIN_NAMESPACE.toString(); @Config("deprecated.legacy-array-agg") @@ -308,6 +310,19 @@ public boolean isLegacyJsonCast() return legacyJsonCast; } + @Config("canonicalized-json-extract") + @Description("Extracts json data in a canonicalized manner, and raises a PrestoException when encountering invalid json structures within the input json path") + public FunctionsConfig setCanonicalizedJsonExtract(boolean canonicalizedJsonExtract) + { + this.canonicalizedJsonExtract = canonicalizedJsonExtract; + return this; + } + + public boolean isCanonicalizedJsonExtract() + { + return canonicalizedJsonExtract; + } + @Config("presto.default-namespace") @ConfigDescription("Specifies the default function namespace prefix") public FunctionsConfig setDefaultNamespacePrefix(String defaultNamespacePrefix) diff --git a/presto-main-base/src/test/java/com/facebook/presto/operator/scalar/BenchmarkJsonExtract.java b/presto-main-base/src/test/java/com/facebook/presto/operator/scalar/BenchmarkJsonExtract.java new file mode 100644 index 000000000000..2c934134d517 --- /dev/null +++ b/presto-main-base/src/test/java/com/facebook/presto/operator/scalar/BenchmarkJsonExtract.java @@ -0,0 +1,258 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.facebook.presto.operator.scalar; + +import com.facebook.presto.Session; +import com.facebook.presto.common.Page; +import com.facebook.presto.common.block.Block; +import com.facebook.presto.common.block.BlockBuilder; +import com.facebook.presto.common.function.SqlFunctionProperties; +import com.facebook.presto.common.type.Type; +import com.facebook.presto.metadata.Metadata; +import com.facebook.presto.metadata.MetadataManager; +import com.facebook.presto.operator.DriverYieldSignal; +import com.facebook.presto.operator.project.PageProcessor; +import com.facebook.presto.spi.ConnectorSession; +import com.facebook.presto.spi.WarningCollector; +import com.facebook.presto.spi.relation.RowExpression; +import com.facebook.presto.spi.relation.VariableReferenceExpression; +import com.facebook.presto.sql.gen.ExpressionCompiler; +import com.facebook.presto.sql.gen.PageFunctionCompiler; +import com.facebook.presto.sql.parser.SqlParser; +import com.facebook.presto.sql.planner.TypeProvider; +import com.facebook.presto.sql.relational.RowExpressionOptimizer; +import com.facebook.presto.sql.relational.SqlToRowExpressionTranslator; +import com.facebook.presto.sql.tree.Expression; +import com.facebook.presto.sql.tree.NodeRef; +import com.facebook.presto.testing.TestingConnectorSession; +import com.facebook.presto.testing.TestingSession; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import io.airlift.slice.DynamicSliceOutput; +import io.airlift.slice.SliceOutput; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; +import org.openjdk.jmh.runner.options.VerboseMode; +import org.openjdk.jmh.runner.options.WarmupMode; +import org.testng.annotations.Test; + +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.TimeUnit; + +import static com.facebook.presto.common.type.JsonType.JSON; +import static com.facebook.presto.common.type.TimeZoneKey.UTC_KEY; +import static com.facebook.presto.common.type.VarcharType.VARCHAR; +import static com.facebook.presto.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext; +import static com.facebook.presto.metadata.MetadataManager.createTestMetadataManager; +import static com.facebook.presto.operator.scalar.FunctionAssertions.createExpression; +import static com.facebook.presto.spi.relation.ExpressionOptimizer.Level.OPTIMIZED; +import static com.facebook.presto.sql.analyzer.ExpressionAnalyzer.getExpressionTypes; +import static java.util.Collections.emptyMap; +import static java.util.Locale.ENGLISH; + +@SuppressWarnings("MethodMayBeStatic") +@State(Scope.Thread) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Fork(10) +@BenchmarkMode(Mode.AverageTime) +public class BenchmarkJsonExtract +{ + private static final SqlParser SQL_PARSER = new SqlParser(); + private static final Metadata METADATA = createTestMetadataManager(); + private static final Session TEST_SESSION = TestingSession.testSessionBuilder().build(); + public static final ConnectorSession SESSION = new TestingConnectorSession(ImmutableList.of()); + + private static final int POSITION_COUNT = 100_000; + private static final int ARRAY_SIZE = 20; + private static final String CHARACTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; + + private PageProcessor pageProcessor; + private Page inputPage; + private Map symbolTypes; + private Map sourceLayout; + + @Param({"true", "false"}) + boolean isCanonicalizedJsonExtract; + + @Setup + public void setup() + { + VariableReferenceExpression variable = new VariableReferenceExpression(Optional.empty(), VARCHAR.getDisplayName().toLowerCase(ENGLISH) + "0", VARCHAR); + symbolTypes = ImmutableMap.of(variable.getName(), VARCHAR); + sourceLayout = ImmutableMap.of(variable, 0); + inputPage = new Page(createChannel()); + List projections = ImmutableList.of(rowExpression("json_extract(varchar0, '$.key1')"), rowExpression("json_extract(varchar0, '$.key2')")); + MetadataManager metadata = createTestMetadataManager(); + PageFunctionCompiler pageFunctionCompiler = new PageFunctionCompiler(metadata, 0); + ExpressionCompiler expressionCompiler = new ExpressionCompiler(metadata, pageFunctionCompiler); + pageProcessor = expressionCompiler.compilePageProcessor(TEST_SESSION.getSqlFunctionProperties(), Optional.empty(), projections).get(); + } + + @Benchmark + public List> computePage() + { + SqlFunctionProperties sqlFunctionProperties = SqlFunctionProperties.builder() + .setTimeZoneKey(UTC_KEY) + .setLegacyTimestamp(true) + .setSessionStartTime(0) + .setSessionLocale(ENGLISH).setSessionUser("user") + .setCanonicalizedJsonExtract(isCanonicalizedJsonExtract) + .build(); + + return ImmutableList.copyOf( + pageProcessor.process( + sqlFunctionProperties, + new DriverYieldSignal(), + newSimpleAggregatedMemoryContext().newLocalMemoryContext(PageProcessor.class.getSimpleName()), + inputPage)); + } + + private RowExpression rowExpression(String value) + { + Expression expression = createExpression(TEST_SESSION, value, METADATA, TypeProvider.copyOf(symbolTypes)); + Map, Type> expressionTypes = getExpressionTypes(TEST_SESSION, METADATA, SQL_PARSER, TypeProvider.copyOf(symbolTypes), expression, emptyMap(), WarningCollector.NOOP); + RowExpression rowExpression = SqlToRowExpressionTranslator.translate(expression, expressionTypes, sourceLayout, METADATA.getFunctionAndTypeManager(), TEST_SESSION); + RowExpressionOptimizer optimizer = new RowExpressionOptimizer(METADATA); + return optimizer.optimize(rowExpression, OPTIMIZED, TEST_SESSION.toConnectorSession()); + } + + private static Block createChannel() + { + BlockBuilder blockBuilder = JSON.createBlockBuilder(null, BenchmarkJsonExtract.POSITION_COUNT); + for (int position = 0; position < BenchmarkJsonExtract.POSITION_COUNT; position++) { + try (SliceOutput jsonSlice = new DynamicSliceOutput(20 * BenchmarkJsonExtract.ARRAY_SIZE)) { + jsonSlice.appendByte('{'); + int k1Index = ThreadLocalRandom.current().nextInt(ARRAY_SIZE); + int k2Index = ThreadLocalRandom.current().nextInt(ARRAY_SIZE); + while (k2Index == k1Index) { + k2Index = ThreadLocalRandom.current().nextInt(ARRAY_SIZE); + } + + for (int i = 0; i < ARRAY_SIZE; i++) { + String key; + if (i == k1Index) { + key = "key1"; + } + else if (i == k2Index) { + key = "key2"; + } + else { + key = generateRandomKey(ThreadLocalRandom.current().nextInt(5) + 1); + } + jsonSlice.appendBytes("\"".getBytes()); + jsonSlice.appendBytes(key.getBytes()); + jsonSlice.appendBytes("\"".getBytes()); + jsonSlice.appendByte(':'); + String value; + if (key.equals("key1") || key.equals("key2") || (ThreadLocalRandom.current().nextInt(10) & 1) == 0) { + value = generateNestedJsonValue(); + } + else { + value = generateRandomJsonValue(); + } + jsonSlice.appendBytes(value.getBytes()); + if (i < ARRAY_SIZE - 1) { + jsonSlice.appendByte(','); // Add a comma between JSON objects + } + } + + jsonSlice.appendByte('}'); + JSON.writeSlice(blockBuilder, jsonSlice.slice()); + } + catch (Exception ignore) { + // Ignore... + } + } + return blockBuilder.build(); + } + + private static String generateRandomJsonValue() + { + int length = ThreadLocalRandom.current().nextInt(10) + 1; + StringBuilder builder = new StringBuilder(length + 2); + builder.append('"'); + for (int i = 0; i < length; i++) { + char c = CHARACTERS.charAt(ThreadLocalRandom.current().nextInt(CHARACTERS.length())); + if (c == '"') { + builder.append('\\'); // escape double quote + } + builder.append(c); + } + builder.append('"'); + return builder.toString(); + } + + private static String generateNestedJsonValue() + { + int size = ThreadLocalRandom.current().nextInt(5) + 1; + StringBuilder builder = new StringBuilder(size * 10); + builder.append('{'); + for (int i = 0; i < size; i++) { + String key = generateRandomKey(ThreadLocalRandom.current().nextInt(5) + 2); + builder.append("\"").append(key).append("\":"); + builder.append(generateRandomJsonValue()); + if (i < size - 1) { + builder.append(","); + } + } + builder.append('}'); + return builder.toString(); + } + + private static String generateRandomKey(int len) + { + StringBuilder builder = new StringBuilder(len); + for (int i = 0; i < len; i++) { + builder.append(CHARACTERS.charAt(ThreadLocalRandom.current().nextInt(CHARACTERS.length()))); + } + return builder.toString(); + } + + @Test + public void verify() + { + BenchmarkJsonToArrayCast.BenchmarkData data = new BenchmarkJsonToArrayCast.BenchmarkData(); + data.setup(); + new BenchmarkJsonToArrayCast().benchmark(data); + } + + public static void main(String[] args) + throws Throwable + { + // assure the benchmarks are valid before running + BenchmarkJsonToArrayCast.BenchmarkData data = new BenchmarkJsonToArrayCast.BenchmarkData(); + data.setup(); + new BenchmarkJsonToArrayCast().benchmark(data); + + Options options = new OptionsBuilder() + .verbosity(VerboseMode.NORMAL) + .include(".*" + BenchmarkJsonExtract.class.getSimpleName() + ".*") + .warmupMode(WarmupMode.BULK_INDI) + .build(); + new Runner(options).run(); + } +} diff --git a/presto-main-base/src/test/java/com/facebook/presto/operator/scalar/TestJsonExtract.java b/presto-main-base/src/test/java/com/facebook/presto/operator/scalar/TestJsonExtract.java index 07ea50939d41..5090b7cd7a64 100644 --- a/presto-main-base/src/test/java/com/facebook/presto/operator/scalar/TestJsonExtract.java +++ b/presto-main-base/src/test/java/com/facebook/presto/operator/scalar/TestJsonExtract.java @@ -13,6 +13,7 @@ */ package com.facebook.presto.operator.scalar; +import com.facebook.presto.common.function.SqlFunctionProperties; import com.facebook.presto.spi.PrestoException; import com.google.common.collect.ImmutableList; import io.airlift.slice.Slice; @@ -23,6 +24,7 @@ import java.io.IOException; import java.util.List; +import static com.facebook.presto.common.type.TimeZoneKey.UTC_KEY; import static com.facebook.presto.common.type.VarcharType.VARCHAR; import static com.facebook.presto.operator.scalar.JsonExtract.JsonExtractor; import static com.facebook.presto.operator.scalar.JsonExtract.JsonValueJsonExtractor; @@ -30,6 +32,7 @@ import static com.facebook.presto.operator.scalar.JsonExtract.ScalarValueJsonExtractor; import static com.facebook.presto.operator.scalar.JsonExtract.generateExtractor; import static com.facebook.presto.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT; +import static java.util.Locale.ENGLISH; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNull; import static org.testng.Assert.assertTrue; @@ -38,6 +41,11 @@ public class TestJsonExtract extends AbstractTestFunctions { + public static final SqlFunctionProperties PROPERTIES_CANONICALIZED_EXTRACT_ENABLED = + SqlFunctionProperties.builder().setTimeZoneKey(UTC_KEY).setLegacyTimestamp(true).setSessionStartTime(0).setSessionLocale(ENGLISH).setSessionUser("user").setCanonicalizedJsonExtract(true).build(); + + public static final SqlFunctionProperties PROPERTIES_CANONICALIZED_EXTRACT_DISABLED = + SqlFunctionProperties.builder().setTimeZoneKey(UTC_KEY).setLegacyTimestamp(true).setSessionStartTime(0).setSessionLocale(ENGLISH).setSessionUser("user").setCanonicalizedJsonExtract(false).build(); @BeforeClass public void setUp() { @@ -148,20 +156,30 @@ public void testScalarValueJsonExtractor() ScalarValueJsonExtractor extractor = new ScalarValueJsonExtractor(); // Check scalar values - assertEquals(doExtract(extractor, "123"), "123"); - assertEquals(doExtract(extractor, "-1"), "-1"); - assertEquals(doExtract(extractor, "0.01"), "0.01"); - assertEquals(doExtract(extractor, "\"abc\""), "abc"); - assertEquals(doExtract(extractor, "\"\""), ""); - assertEquals(doExtract(extractor, "null"), null); + assertEquals(doExtractLegacy(extractor, "123"), "123"); + assertEquals(doExtractLegacy(extractor, "-1"), "-1"); + assertEquals(doExtractLegacy(extractor, "0.01"), "0.01"); + assertEquals(doExtractLegacy(extractor, "\"abc\""), "abc"); + assertEquals(doExtractLegacy(extractor, "\"\""), ""); + assertNull(doExtractLegacy(extractor, "null")); + assertEquals(doCanonicalizedExtract(extractor, "123"), "123"); + assertEquals(doCanonicalizedExtract(extractor, "-1"), "-1"); + assertEquals(doCanonicalizedExtract(extractor, "0.01"), "0.01"); + assertEquals(doCanonicalizedExtract(extractor, "\"abc\""), "abc"); + assertEquals(doCanonicalizedExtract(extractor, "\"\""), ""); + assertNull(doCanonicalizedExtract(extractor, "null")); // Test character escaped values - assertEquals(doExtract(extractor, "\"ab\\u0001c\""), "ab\001c"); - assertEquals(doExtract(extractor, "\"ab\\u0002c\""), "ab\002c"); + assertEquals(doExtractLegacy(extractor, "\"ab\\u0001c\""), "ab\001c"); + assertEquals(doExtractLegacy(extractor, "\"ab\\u0002c\""), "ab\002c"); + assertEquals(doCanonicalizedExtract(extractor, "\"ab\\u0001c\""), "ab\001c"); + assertEquals(doCanonicalizedExtract(extractor, "\"ab\\u0002c\""), "ab\002c"); // Complex types should return null - assertNull(doExtract(extractor, "[1, 2, 3]")); - assertNull(doExtract(extractor, "{\"a\": 1}")); + assertNull(doExtractLegacy(extractor, "[1, 2, 3]")); + assertNull(doExtractLegacy(extractor, "{\"a\": 1}")); + assertNull(doCanonicalizedExtract(extractor, "[1, 2, 3]")); + assertNull(doCanonicalizedExtract(extractor, "{\"a\": 1}")); } @Test @@ -171,20 +189,30 @@ public void testJsonValueJsonExtractor() JsonValueJsonExtractor extractor = new JsonValueJsonExtractor(); // Check scalar values - assertEquals(doExtract(extractor, "123"), "123"); - assertEquals(doExtract(extractor, "-1"), "-1"); - assertEquals(doExtract(extractor, "0.01"), "0.01"); - assertEquals(doExtract(extractor, "\"abc\""), "\"abc\""); - assertEquals(doExtract(extractor, "\"\""), "\"\""); - assertEquals(doExtract(extractor, "null"), "null"); + assertEquals(doExtractLegacy(extractor, "123"), "123"); + assertEquals(doExtractLegacy(extractor, "-1"), "-1"); + assertEquals(doExtractLegacy(extractor, "0.01"), "0.01"); + assertEquals(doExtractLegacy(extractor, "\"abc\""), "\"abc\""); + assertEquals(doExtractLegacy(extractor, "\"\""), "\"\""); + assertEquals(doExtractLegacy(extractor, "null"), "null"); + assertEquals(doCanonicalizedExtract(extractor, "123"), "123"); + assertEquals(doCanonicalizedExtract(extractor, "-1"), "-1"); + assertEquals(doCanonicalizedExtract(extractor, "0.01"), "0.01"); + assertEquals(doCanonicalizedExtract(extractor, "\"abc\""), "\"abc\""); + assertEquals(doCanonicalizedExtract(extractor, "\"\""), "\"\""); + assertEquals(doCanonicalizedExtract(extractor, "null"), "null"); // Test character escaped values - assertEquals(doExtract(extractor, "\"ab\\u0001c\""), "\"ab\\u0001c\""); - assertEquals(doExtract(extractor, "\"ab\\u0002c\""), "\"ab\\u0002c\""); + assertEquals(doExtractLegacy(extractor, "\"ab\\u0001c\""), "\"ab\\u0001c\""); + assertEquals(doExtractLegacy(extractor, "\"ab\\u0002c\""), "\"ab\\u0002c\""); + assertEquals(doCanonicalizedExtract(extractor, "\"ab\\u0001c\""), "\"ab\\u0001c\""); + assertEquals(doCanonicalizedExtract(extractor, "\"ab\\u0002c\""), "\"ab\\u0002c\""); // Complex types should return json values - assertEquals(doExtract(extractor, "[1, 2, 3]"), "[1,2,3]"); - assertEquals(doExtract(extractor, "{\"a\": 1}"), "{\"a\":1}"); + assertEquals(doExtractLegacy(extractor, "[1, 2, 3]"), "[1,2,3]"); + assertEquals(doExtractLegacy(extractor, "{\"a\": 1}"), "{\"a\":1}"); + assertEquals(doCanonicalizedExtract(extractor, "[1, 2, 3]"), "[1,2,3]"); + assertEquals(doCanonicalizedExtract(extractor, "{\"a\": 1}"), "{\"a\":1}"); } @Test @@ -194,14 +222,20 @@ public void testArrayElementJsonExtractor() ObjectFieldJsonExtractor firstExtractor = new ObjectFieldJsonExtractor<>("0", new ScalarValueJsonExtractor()); ObjectFieldJsonExtractor secondExtractor = new ObjectFieldJsonExtractor<>("1", new ScalarValueJsonExtractor()); - assertNull(doExtract(firstExtractor, "[]")); - assertEquals(doExtract(firstExtractor, "[1, 2, 3]"), "1"); - assertEquals(doExtract(secondExtractor, "[1, 2]"), "2"); - assertNull(doExtract(secondExtractor, "[1, null]")); + assertNull(doExtractLegacy(firstExtractor, "[]")); + assertEquals(doExtractLegacy(firstExtractor, "[1, 2, 3]"), "1"); + assertEquals(doExtractLegacy(secondExtractor, "[1, 2]"), "2"); + assertNull(doExtractLegacy(secondExtractor, "[1, null]")); + assertNull(doCanonicalizedExtract(firstExtractor, "[]")); + assertEquals(doCanonicalizedExtract(firstExtractor, "[1, 2, 3]"), "1"); + assertEquals(doCanonicalizedExtract(secondExtractor, "[1, 2]"), "2"); + assertNull(doCanonicalizedExtract(secondExtractor, "[1, null]")); // Out of bounds - assertNull(doExtract(secondExtractor, "[1]")); + assertNull(doExtractLegacy(secondExtractor, "[1]")); + assertNull(doCanonicalizedExtract(secondExtractor, "[1]")); // Check skipping complex structures - assertEquals(doExtract(secondExtractor, "[{\"a\": 1}, 2, 3]"), "2"); + assertEquals(doExtractLegacy(secondExtractor, "[{\"a\": 1}, 2, 3]"), "2"); + assertEquals(doCanonicalizedExtract(secondExtractor, "[{\"a\": 1}, 2, 3]"), "2"); } @Test @@ -210,131 +244,232 @@ public void testObjectFieldJsonExtractor() { ObjectFieldJsonExtractor extractor = new ObjectFieldJsonExtractor<>("fuu", new ScalarValueJsonExtractor()); - assertNull(doExtract(extractor, "{}")); - assertNull(doExtract(extractor, "{\"a\": 1}")); - assertEquals(doExtract(extractor, "{\"fuu\": 1}"), "1"); - assertEquals(doExtract(extractor, "{\"a\": 0, \"fuu\": 1}"), "1"); + assertNull(doExtractLegacy(extractor, "{}")); + assertNull(doExtractLegacy(extractor, "{\"a\": 1}")); + assertEquals(doExtractLegacy(extractor, "{\"fuu\": 1}"), "1"); + assertEquals(doExtractLegacy(extractor, "{\"a\": 0, \"fuu\": 1}"), "1"); + assertNull(doCanonicalizedExtract(extractor, "{}")); + assertNull(doCanonicalizedExtract(extractor, "{\"a\": 1}")); + assertEquals(doCanonicalizedExtract(extractor, "{\"fuu\": 1}"), "1"); + assertEquals(doCanonicalizedExtract(extractor, "{\"a\": 0, \"fuu\": 1}"), "1"); // Check skipping complex structures - assertEquals(doExtract(extractor, "{\"a\": [1, 2, 3], \"fuu\": 1}"), "1"); + assertEquals(doCanonicalizedExtract(extractor, "{\"a\": [1, 2, 3], \"fuu\": 1}"), "1"); } @Test public void testFullScalarExtract() { - assertNull(doScalarExtract("{}", "$")); - assertEquals(doScalarExtract("{\"fuu\": {\"bar\": 1}}", "$.fuu"), null); // Null b/c value is complex type - assertEquals(doScalarExtract("{\"fuu\": 1}", "$.fuu"), "1"); - assertEquals(doScalarExtract("{\"fuu\": 1}", "$[fuu]"), "1"); - assertEquals(doScalarExtract("{\"fuu\": 1}", "$[\"fuu\"]"), "1"); - assertNull(doScalarExtract("{\"fuu\": null}", "$.fuu")); - assertEquals(doScalarExtract("{\"fuu\": 1}", "$.bar"), null); - assertEquals(doScalarExtract("{\"fuu\": [\"\\u0001\"]}", "$.fuu[0]"), "\001"); // Test escaped characters - assertEquals(doScalarExtract("{\"fuu\": 1, \"bar\": \"abc\"}", "$.bar"), "abc"); - assertEquals(doScalarExtract("{\"fuu\": [0.1, 1, 2]}", "$.fuu[0]"), "0.1"); - assertEquals(doScalarExtract("{\"fuu\": [0, [100, 101], 2]}", "$.fuu[1]"), null); // Null b/c value is complex type - assertEquals(doScalarExtract("{\"fuu\": [0, [100, 101], 2]}", "$.fuu[1][1]"), "101"); - assertEquals(doScalarExtract("{\"fuu\": [0, {\"bar\": {\"key\" : [\"value\"]}}, 2]}", "$.fuu[1].bar.key[0]"), "value"); + assertNull(doScalarExtractLegacy("{}", "$")); + assertNull(doScalarExtractLegacy("{\"fuu\": {\"bar\": 1}}", "$.fuu")); // Null b/c value is complex type + assertEquals(doScalarExtractLegacy("{\"fuu\": 1}", "$.fuu"), "1"); + assertEquals(doScalarExtractLegacy("{\"fuu\": 1}", "$[fuu]"), "1"); + assertEquals(doScalarExtractLegacy("{\"fuu\": 1}", "$[\"fuu\"]"), "1"); + assertNull(doScalarExtractLegacy("{\"fuu\": null}", "$.fuu")); + assertNull(doScalarExtractLegacy("{\"fuu\": 1}", "$.bar")); + assertEquals(doScalarExtractLegacy("{\"fuu\": [\"\\u0001\"]}", "$.fuu[0]"), "\001"); // Test escaped characters + assertEquals(doScalarExtractLegacy("{\"fuu\": 1, \"bar\": \"abc\"}", "$.bar"), "abc"); + assertEquals(doScalarExtractLegacy("{\"fuu\": [0.1, 1, 2]}", "$.fuu[0]"), "0.1"); + assertNull(doScalarExtractLegacy("{\"fuu\": [0, [100, 101], 2]}", "$.fuu[1]")); // Null b/c value is complex type + assertEquals(doScalarExtractLegacy("{\"fuu\": [0, [100, 101], 2]}", "$.fuu[1][1]"), "101"); + assertEquals(doScalarExtractLegacy("{\"fuu\": [0, {\"bar\": {\"key\" : [\"value\"]}}, 2]}", "$.fuu[1].bar.key[0]"), "value"); + + assertNull(doScalarCanonicalizedExtract("{}", "$")); + assertNull(doScalarCanonicalizedExtract("{\"fuu\": {\"bar\": 1}}", "$.fuu")); // Null b/c value is complex type + assertEquals(doScalarCanonicalizedExtract("{\"fuu\": 1}", "$.fuu"), "1"); + assertEquals(doScalarCanonicalizedExtract("{\"fuu\": 1}", "$[fuu]"), "1"); + assertEquals(doScalarCanonicalizedExtract("{\"fuu\": 1}", "$[\"fuu\"]"), "1"); + assertNull(doScalarCanonicalizedExtract("{\"fuu\": null}", "$.fuu")); + assertNull(doScalarCanonicalizedExtract("{\"fuu\": 1}", "$.bar")); + assertEquals(doScalarCanonicalizedExtract("{\"fuu\": [\"\\u0001\"]}", "$.fuu[0]"), "\001"); // Test escaped characters + assertEquals(doScalarCanonicalizedExtract("{\"fuu\": 1, \"bar\": \"abc\"}", "$.bar"), "abc"); + assertEquals(doScalarCanonicalizedExtract("{\"fuu\": [0.1, 1, 2]}", "$.fuu[0]"), "0.1"); + assertNull(doScalarCanonicalizedExtract("{\"fuu\": [0, [100, 101], 2]}", "$.fuu[1]")); // Null b/c value is complex type + assertEquals(doScalarCanonicalizedExtract("{\"fuu\": [0, [100, 101], 2]}", "$.fuu[1][1]"), "101"); + assertEquals(doScalarCanonicalizedExtract("{\"fuu\": [0, {\"bar\": {\"key\" : [\"value\"]}}, 2]}", "$.fuu[1].bar.key[0]"), "value"); // Test non-object extraction - assertEquals(doScalarExtract("[0, 1, 2]", "$[0]"), "0"); - assertEquals(doScalarExtract("\"abc\"", "$"), "abc"); - assertEquals(doScalarExtract("123", "$"), "123"); - assertEquals(doScalarExtract("null", "$"), null); + assertEquals(doScalarExtractLegacy("[0, 1, 2]", "$[0]"), "0"); + assertEquals(doScalarExtractLegacy("\"abc\"", "$"), "abc"); + assertEquals(doScalarExtractLegacy("123", "$"), "123"); + assertNull(doScalarExtractLegacy("null", "$")); + + assertEquals(doScalarCanonicalizedExtract("[0, 1, 2]", "$[0]"), "0"); + assertEquals(doScalarCanonicalizedExtract("\"abc\"", "$"), "abc"); + assertEquals(doScalarCanonicalizedExtract("123", "$"), "123"); + assertNull(doScalarCanonicalizedExtract("null", "$")); // Test numeric path expression matches arrays and objects - assertEquals(doScalarExtract("[0, 1, 2]", "$.1"), "1"); - assertEquals(doScalarExtract("[0, 1, 2]", "$[1]"), "1"); - assertEquals(doScalarExtract("[0, 1, 2]", "$[\"1\"]"), "1"); - assertEquals(doScalarExtract("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$.1"), "1"); - assertEquals(doScalarExtract("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$[1]"), "1"); - assertEquals(doScalarExtract("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$[\"1\"]"), "1"); + assertEquals(doScalarExtractLegacy("[0, 1, 2]", "$.1"), "1"); + assertEquals(doScalarExtractLegacy("[0, 1, 2]", "$[1]"), "1"); + assertEquals(doScalarExtractLegacy("[0, 1, 2]", "$[\"1\"]"), "1"); + assertEquals(doScalarExtractLegacy("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$.1"), "1"); + assertEquals(doScalarExtractLegacy("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$[1]"), "1"); + assertEquals(doScalarExtractLegacy("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$[\"1\"]"), "1"); + + assertEquals(doScalarCanonicalizedExtract("[0, 1, 2]", "$.1"), "1"); + assertEquals(doScalarCanonicalizedExtract("[0, 1, 2]", "$[1]"), "1"); + assertEquals(doScalarCanonicalizedExtract("[0, 1, 2]", "$[\"1\"]"), "1"); + assertEquals(doScalarCanonicalizedExtract("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$.1"), "1"); + assertEquals(doScalarCanonicalizedExtract("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$[1]"), "1"); + assertEquals(doScalarCanonicalizedExtract("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$[\"1\"]"), "1"); // Test fields starting with a digit - assertEquals(doScalarExtract("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$.30day"), "1"); - assertEquals(doScalarExtract("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$[30day]"), "1"); - assertEquals(doScalarExtract("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$[\"30day\"]"), "1"); + assertEquals(doScalarExtractLegacy("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$.30day"), "1"); + assertEquals(doScalarExtractLegacy("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$[30day]"), "1"); + assertEquals(doScalarExtractLegacy("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$[\"30day\"]"), "1"); + + assertEquals(doScalarCanonicalizedExtract("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$.30day"), "1"); + assertEquals(doScalarCanonicalizedExtract("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$[30day]"), "1"); + assertEquals(doScalarCanonicalizedExtract("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$[\"30day\"]"), "1"); } @Test public void testFullJsonExtract() { - assertEquals(doJsonExtract("{}", "$"), "{}"); - assertEquals(doJsonExtract("{\"fuu\": {\"bar\": 1}}", "$.fuu"), "{\"bar\":1}"); - assertEquals(doJsonExtract("{\"fuu\": 1}", "$.fuu"), "1"); - assertEquals(doJsonExtract("{\"fuu\": 1}", "$[fuu]"), "1"); - assertEquals(doJsonExtract("{\"fuu\": 1}", "$[\"fuu\"]"), "1"); - assertEquals(doJsonExtract("{\"fuu\": null}", "$.fuu"), "null"); - assertNull(doJsonExtract("{\"fuu\": 1}", "$.bar")); - assertEquals(doJsonExtract("{\"fuu\": [\"\\u0001\"]}", "$.fuu[0]"), "\"\\u0001\""); // Test escaped characters - assertEquals(doJsonExtract("{\"fuu\": 1, \"bar\": \"abc\"}", "$.bar"), "\"abc\""); - assertEquals(doJsonExtract("{\"fuu\": [0.1, 1, 2]}", "$.fuu[0]"), "0.1"); - assertEquals(doJsonExtract("{\"fuu\": [0, [100, 101], 2]}", "$.fuu[1]"), "[100,101]"); - assertEquals(doJsonExtract("{\"fuu\": [0, [100, 101], 2]}", "$.fuu[1][1]"), "101"); + assertEquals(doJsonExtractLegacy("{}", "$"), "{}"); + assertEquals(doJsonExtractLegacy("{\"fuu\": {\"bar\": 1}}", "$.fuu"), "{\"bar\":1}"); + assertEquals(doJsonExtractLegacy("{\"fuu\": 1}", "$.fuu"), "1"); + assertEquals(doJsonExtractLegacy("{\"fuu\": 1}", "$[fuu]"), "1"); + assertEquals(doJsonExtractLegacy("{\"fuu\": 1}", "$[\"fuu\"]"), "1"); + assertEquals(doJsonExtractLegacy("{\"fuu\": null}", "$.fuu"), "null"); + assertNull(doJsonExtractLegacy("{\"fuu\": 1}", "$.bar")); + assertEquals(doJsonExtractLegacy("{\"fuu\": [\"\\u0001\"]}", "$.fuu[0]"), "\"\\u0001\""); // Test escaped characters + assertEquals(doJsonExtractLegacy("{\"fuu\": 1, \"bar\": \"abc\"}", "$.bar"), "\"abc\""); + assertEquals(doJsonExtractLegacy("{\"fuu\": [0.1, 1, 2]}", "$.fuu[0]"), "0.1"); + assertEquals(doJsonExtractLegacy("{\"fuu\": [0, [100, 101], 2]}", "$.fuu[1]"), "[100,101]"); + assertEquals(doJsonExtractLegacy("{\"fuu\": [0, [100, 101], 2]}", "$.fuu[1][1]"), "101"); + + assertEquals(doJsonCanonicalizedExtract("{}", "$"), "{}"); + assertEquals(doJsonCanonicalizedExtract("{\"fuu\": {\"bar\": 1}}", "$.fuu"), "{\"bar\":1}"); + assertEquals(doJsonCanonicalizedExtract("{\"fuu\": 1}", "$.fuu"), "1"); + assertEquals(doJsonCanonicalizedExtract("{\"fuu\": 1}", "$[fuu]"), "1"); + assertEquals(doJsonCanonicalizedExtract("{\"fuu\": 1}", "$[\"fuu\"]"), "1"); + assertEquals(doJsonCanonicalizedExtract("{\"fuu\": null}", "$.fuu"), "null"); + assertNull(doJsonCanonicalizedExtract("{\"fuu\": 1}", "$.bar")); + assertEquals(doJsonCanonicalizedExtract("{\"fuu\": [\"\\u0001\"]}", "$.fuu[0]"), "\"\\u0001\""); // Test escaped characters + assertEquals(doJsonCanonicalizedExtract("{\"fuu\": 1, \"bar\": \"abc\"}", "$.bar"), "\"abc\""); + assertEquals(doJsonCanonicalizedExtract("{\"fuu\": [0.1, 1, 2]}", "$.fuu[0]"), "0.1"); + assertEquals(doJsonCanonicalizedExtract("{\"fuu\": [0, [100, 101], 2]}", "$.fuu[1]"), "[100,101]"); + assertEquals(doJsonCanonicalizedExtract("{\"fuu\": [0, [100, 101], 2]}", "$.fuu[1][1]"), "101"); // Test non-object extraction - assertEquals(doJsonExtract("[0, 1, 2]", "$[0]"), "0"); - assertEquals(doJsonExtract("\"abc\"", "$"), "\"abc\""); - assertEquals(doJsonExtract("123", "$"), "123"); - assertEquals(doJsonExtract("null", "$"), "null"); + assertEquals(doJsonExtractLegacy("[0, 1, 2]", "$[0]"), "0"); + assertEquals(doJsonExtractLegacy("\"abc\"", "$"), "\"abc\""); + assertEquals(doJsonExtractLegacy("123", "$"), "123"); + assertEquals(doJsonExtractLegacy("null", "$"), "null"); + + assertEquals(doJsonCanonicalizedExtract("[0, 1, 2]", "$[0]"), "0"); + assertEquals(doJsonCanonicalizedExtract("\"abc\"", "$"), "\"abc\""); + assertEquals(doJsonCanonicalizedExtract("123", "$"), "123"); + assertEquals(doJsonCanonicalizedExtract("null", "$"), "null"); // Test extraction using bracket json path - assertEquals(doJsonExtract("{\"fuu\": {\"bar\": 1}}", "$[\"fuu\"]"), "{\"bar\":1}"); - assertEquals(doJsonExtract("{\"fuu\": {\"bar\": 1}}", "$[\"fuu\"][\"bar\"]"), "1"); - assertEquals(doJsonExtract("{\"fuu\": 1}", "$[\"fuu\"]"), "1"); - assertEquals(doJsonExtract("{\"fuu\": null}", "$[\"fuu\"]"), "null"); - assertNull(doJsonExtract("{\"fuu\": 1}", "$[\"bar\"]")); - assertEquals(doJsonExtract("{\"fuu\": [\"\\u0001\"]}", "$[\"fuu\"][0]"), "\"\\u0001\""); // Test escaped characters - assertEquals(doJsonExtract("{\"fuu\": 1, \"bar\": \"abc\"}", "$[\"bar\"]"), "\"abc\""); - assertEquals(doJsonExtract("{\"fuu\": [0.1, 1, 2]}", "$[\"fuu\"][0]"), "0.1"); - assertEquals(doJsonExtract("{\"fuu\": [0, [100, 101], 2]}", "$[\"fuu\"][1]"), "[100,101]"); - assertEquals(doJsonExtract("{\"fuu\": [0, [100, 101], 2]}", "$[\"fuu\"][1][1]"), "101"); + assertEquals(doJsonExtractLegacy("{\"fuu\": {\"bar\": 1}}", "$[\"fuu\"]"), "{\"bar\":1}"); + assertEquals(doJsonExtractLegacy("{\"fuu\": {\"bar\": 1}}", "$[\"fuu\"][\"bar\"]"), "1"); + assertEquals(doJsonExtractLegacy("{\"fuu\": 1}", "$[\"fuu\"]"), "1"); + assertEquals(doJsonExtractLegacy("{\"fuu\": null}", "$[\"fuu\"]"), "null"); + assertNull(doJsonExtractLegacy("{\"fuu\": 1}", "$[\"bar\"]")); + assertEquals(doJsonExtractLegacy("{\"fuu\": [\"\\u0001\"]}", "$[\"fuu\"][0]"), "\"\\u0001\""); // Test escaped characters + assertEquals(doJsonExtractLegacy("{\"fuu\": 1, \"bar\": \"abc\"}", "$[\"bar\"]"), "\"abc\""); + assertEquals(doJsonExtractLegacy("{\"fuu\": [0.1, 1, 2]}", "$[\"fuu\"][0]"), "0.1"); + assertEquals(doJsonExtractLegacy("{\"fuu\": [0, [100, 101], 2]}", "$[\"fuu\"][1]"), "[100,101]"); + assertEquals(doJsonExtractLegacy("{\"fuu\": [0, [100, 101], 2]}", "$[\"fuu\"][1][1]"), "101"); + + assertEquals(doJsonCanonicalizedExtract("{\"fuu\": {\"bar\": 1}}", "$[\"fuu\"]"), "{\"bar\":1}"); + assertEquals(doJsonCanonicalizedExtract("{\"fuu\": {\"bar\": 1}}", "$[\"fuu\"][\"bar\"]"), "1"); + assertEquals(doJsonCanonicalizedExtract("{\"fuu\": 1}", "$[\"fuu\"]"), "1"); + assertEquals(doJsonCanonicalizedExtract("{\"fuu\": null}", "$[\"fuu\"]"), "null"); + assertNull(doJsonCanonicalizedExtract("{\"fuu\": 1}", "$[\"bar\"]")); + assertEquals(doJsonCanonicalizedExtract("{\"fuu\": [\"\\u0001\"]}", "$[\"fuu\"][0]"), "\"\\u0001\""); // Test escaped characters + assertEquals(doJsonCanonicalizedExtract("{\"fuu\": 1, \"bar\": \"abc\"}", "$[\"bar\"]"), "\"abc\""); + assertEquals(doJsonCanonicalizedExtract("{\"fuu\": [0.1, 1, 2]}", "$[\"fuu\"][0]"), "0.1"); + assertEquals(doJsonCanonicalizedExtract("{\"fuu\": [0, [100, 101], 2]}", "$[\"fuu\"][1]"), "[100,101]"); + assertEquals(doJsonCanonicalizedExtract("{\"fuu\": [0, [100, 101], 2]}", "$[\"fuu\"][1][1]"), "101"); // Test extraction using bracket json path with special json characters in path - assertEquals(doJsonExtract("{\"@$fuu\": {\".b.ar\": 1}}", "$[\"@$fuu\"]"), "{\".b.ar\":1}"); - assertEquals(doJsonExtract("{\"fuu..\": 1}", "$[\"fuu..\"]"), "1"); - assertEquals(doJsonExtract("{\"fu*u\": null}", "$[\"fu*u\"]"), "null"); - assertNull(doJsonExtract("{\",fuu\": 1}", "$[\"bar\"]")); - assertEquals(doJsonExtract("{\",fuu\": [\"\\u0001\"]}", "$[\",fuu\"][0]"), "\"\\u0001\""); // Test escaped characters - assertEquals(doJsonExtract("{\":fu:u:\": 1, \":b:ar:\": \"abc\"}", "$[\":b:ar:\"]"), "\"abc\""); - assertEquals(doJsonExtract("{\"?()fuu\": [0.1, 1, 2]}", "$[\"?()fuu\"][0]"), "0.1"); - assertEquals(doJsonExtract("{\"f?uu\": [0, [100, 101], 2]}", "$[\"f?uu\"][1]"), "[100,101]"); - assertEquals(doJsonExtract("{\"fuu()\": [0, [100, 101], 2]}", "$[\"fuu()\"][1][1]"), "101"); + assertEquals(doJsonExtractLegacy("{\"@$fuu\": {\".b.ar\": 1}}", "$[\"@$fuu\"]"), "{\".b.ar\":1}"); + assertEquals(doJsonExtractLegacy("{\"fuu..\": 1}", "$[\"fuu..\"]"), "1"); + assertEquals(doJsonExtractLegacy("{\"fu*u\": null}", "$[\"fu*u\"]"), "null"); + assertNull(doJsonExtractLegacy("{\",fuu\": 1}", "$[\"bar\"]")); + assertEquals(doJsonExtractLegacy("{\",fuu\": [\"\\u0001\"]}", "$[\",fuu\"][0]"), "\"\\u0001\""); // Test escaped characters + assertEquals(doJsonExtractLegacy("{\":fu:u:\": 1, \":b:ar:\": \"abc\"}", "$[\":b:ar:\"]"), "\"abc\""); + assertEquals(doJsonExtractLegacy("{\"?()fuu\": [0.1, 1, 2]}", "$[\"?()fuu\"][0]"), "0.1"); + assertEquals(doJsonExtractLegacy("{\"f?uu\": [0, [100, 101], 2]}", "$[\"f?uu\"][1]"), "[100,101]"); + assertEquals(doJsonExtractLegacy("{\"fuu()\": [0, [100, 101], 2]}", "$[\"fuu()\"][1][1]"), "101"); + + assertEquals(doJsonCanonicalizedExtract("{\"@$fuu\": {\".b.ar\": 1}}", "$[\"@$fuu\"]"), "{\".b.ar\":1}"); + assertEquals(doJsonCanonicalizedExtract("{\"fuu..\": 1}", "$[\"fuu..\"]"), "1"); + assertEquals(doJsonCanonicalizedExtract("{\"fu*u\": null}", "$[\"fu*u\"]"), "null"); + assertNull(doJsonCanonicalizedExtract("{\",fuu\": 1}", "$[\"bar\"]")); + assertEquals(doJsonCanonicalizedExtract("{\",fuu\": [\"\\u0001\"]}", "$[\",fuu\"][0]"), "\"\\u0001\""); // Test escaped characters + assertEquals(doJsonCanonicalizedExtract("{\":fu:u:\": 1, \":b:ar:\": \"abc\"}", "$[\":b:ar:\"]"), "\"abc\""); + assertEquals(doJsonCanonicalizedExtract("{\"?()fuu\": [0.1, 1, 2]}", "$[\"?()fuu\"][0]"), "0.1"); + assertEquals(doJsonCanonicalizedExtract("{\"f?uu\": [0, [100, 101], 2]}", "$[\"f?uu\"][1]"), "[100,101]"); + assertEquals(doJsonCanonicalizedExtract("{\"fuu()\": [0, [100, 101], 2]}", "$[\"fuu()\"][1][1]"), "101"); // Test extraction using mix of bracket and dot notation json path - assertEquals(doJsonExtract("{\"fuu\": {\"bar\": 1}}", "$[\"fuu\"].bar"), "1"); - assertEquals(doJsonExtract("{\"fuu\": {\"bar\": 1}}", "$.fuu[\"bar\"]"), "1"); - assertEquals(doJsonExtract("{\"fuu\": [\"\\u0001\"]}", "$[\"fuu\"][0]"), "\"\\u0001\""); // Test escaped characters - assertEquals(doJsonExtract("{\"fuu\": [\"\\u0001\"]}", "$.fuu[0]"), "\"\\u0001\""); // Test escaped characters + assertEquals(doJsonExtractLegacy("{\"fuu\": {\"bar\": 1}}", "$[\"fuu\"].bar"), "1"); + assertEquals(doJsonExtractLegacy("{\"fuu\": {\"bar\": 1}}", "$.fuu[\"bar\"]"), "1"); + assertEquals(doJsonExtractLegacy("{\"fuu\": [\"\\u0001\"]}", "$[\"fuu\"][0]"), "\"\\u0001\""); // Test escaped characters + assertEquals(doJsonExtractLegacy("{\"fuu\": [\"\\u0001\"]}", "$.fuu[0]"), "\"\\u0001\""); // Test escaped characters + + assertEquals(doJsonCanonicalizedExtract("{\"fuu\": {\"bar\": 1}}", "$[\"fuu\"].bar"), "1"); + assertEquals(doJsonCanonicalizedExtract("{\"fuu\": {\"bar\": 1}}", "$.fuu[\"bar\"]"), "1"); + assertEquals(doJsonCanonicalizedExtract("{\"fuu\": [\"\\u0001\"]}", "$[\"fuu\"][0]"), "\"\\u0001\""); // Test escaped characters + assertEquals(doJsonCanonicalizedExtract("{\"fuu\": [\"\\u0001\"]}", "$.fuu[0]"), "\"\\u0001\""); // Test escaped characters // Test extraction using mix of bracket and dot notation json path with special json characters in path - assertEquals(doJsonExtract("{\"@$fuu\": {\"bar\": 1}}", "$[\"@$fuu\"].bar"), "1"); - assertEquals(doJsonExtract("{\",fuu\": {\"bar\": [\"\\u0001\"]}}", "$[\",fuu\"].bar[0]"), "\"\\u0001\""); // Test escaped characters + assertEquals(doJsonExtractLegacy("{\"@$fuu\": {\"bar\": 1}}", "$[\"@$fuu\"].bar"), "1"); + assertEquals(doJsonExtractLegacy("{\",fuu\": {\"bar\": [\"\\u0001\"]}}", "$[\",fuu\"].bar[0]"), "\"\\u0001\""); // Test escaped characters + + assertEquals(doJsonCanonicalizedExtract("{\"@$fuu\": {\"bar\": 1}}", "$[\"@$fuu\"].bar"), "1"); + assertEquals(doJsonCanonicalizedExtract("{\",fuu\": {\"bar\": [\"\\u0001\"]}}", "$[\",fuu\"].bar[0]"), "\"\\u0001\""); // Test escaped characters // Test numeric path expression matches arrays and objects - assertEquals(doJsonExtract("[0, 1, 2]", "$.1"), "1"); - assertEquals(doJsonExtract("[0, 1, 2]", "$[1]"), "1"); - assertEquals(doJsonExtract("[0, 1, 2]", "$[\"1\"]"), "1"); - assertEquals(doJsonExtract("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$.1"), "1"); - assertEquals(doJsonExtract("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$[1]"), "1"); - assertEquals(doJsonExtract("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$[\"1\"]"), "1"); + assertEquals(doJsonExtractLegacy("[0, 1, 2]", "$.1"), "1"); + assertEquals(doJsonExtractLegacy("[0, 1, 2]", "$[1]"), "1"); + assertEquals(doJsonExtractLegacy("[0, 1, 2]", "$[\"1\"]"), "1"); + assertEquals(doJsonExtractLegacy("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$.1"), "1"); + assertEquals(doJsonExtractLegacy("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$[1]"), "1"); + assertEquals(doJsonExtractLegacy("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$[\"1\"]"), "1"); + + assertEquals(doJsonCanonicalizedExtract("[0, 1, 2]", "$.1"), "1"); + assertEquals(doJsonCanonicalizedExtract("[0, 1, 2]", "$[1]"), "1"); + assertEquals(doJsonCanonicalizedExtract("[0, 1, 2]", "$[\"1\"]"), "1"); + assertEquals(doJsonCanonicalizedExtract("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$.1"), "1"); + assertEquals(doJsonCanonicalizedExtract("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$[1]"), "1"); + assertEquals(doJsonCanonicalizedExtract("{\"0\" : 0, \"1\" : 1, \"2\" : 2, }", "$[\"1\"]"), "1"); // Test fields starting with a digit - assertEquals(doJsonExtract("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$.30day"), "1"); - assertEquals(doJsonExtract("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$[30day]"), "1"); - assertEquals(doJsonExtract("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$[\"30day\"]"), "1"); + assertEquals(doJsonExtractLegacy("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$.30day"), "1"); + assertEquals(doJsonExtractLegacy("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$[30day]"), "1"); + assertEquals(doJsonExtractLegacy("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$[\"30day\"]"), "1"); + + assertEquals(doJsonCanonicalizedExtract("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$.30day"), "1"); + assertEquals(doJsonCanonicalizedExtract("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$[30day]"), "1"); + assertEquals(doJsonCanonicalizedExtract("{\"15day\" : 0, \"30day\" : 1, \"90day\" : 2, }", "$[\"30day\"]"), "1"); } @Test public void testInvalidExtracts() { - assertInvalidExtract("", "", "Invalid JSON path: ''"); - assertInvalidExtract("{}", "$.bar[2][-1]", "Invalid JSON path: '$.bar[2][-1]'"); - assertInvalidExtract("{}", "$.fuu..bar", "Invalid JSON path: '$.fuu..bar'"); - assertInvalidExtract("{}", "$.", "Invalid JSON path: '$.'"); - assertInvalidExtract("", "$$", "Invalid JSON path: '$$'"); - assertInvalidExtract("", " ", "Invalid JSON path: ' '"); - assertInvalidExtract("", ".", "Invalid JSON path: '.'"); - assertInvalidExtract("{ \"store\": { \"book\": [{ \"title\": \"title\" }] } }", "$.store.book[", "Invalid JSON path: '$.store.book['"); + assertInvalidLegacyExtract("", "", "Invalid JSON path: ''"); + assertInvalidLegacyExtract("{}", "$.bar[2][-1]", "Invalid JSON path: '$.bar[2][-1]'"); + assertInvalidLegacyExtract("{}", "$.fuu..bar", "Invalid JSON path: '$.fuu..bar'"); + assertInvalidLegacyExtract("{}", "$.", "Invalid JSON path: '$.'"); + assertInvalidLegacyExtract("", "$$", "Invalid JSON path: '$$'"); + assertInvalidLegacyExtract("", " ", "Invalid JSON path: ' '"); + assertInvalidLegacyExtract("", ".", "Invalid JSON path: '.'"); + assertInvalidLegacyExtract("{ \"store\": { \"book\": [{ \"title\": \"title\" }] } }", "$.store.book[", "Invalid JSON path: '$.store.book['"); + + assertInvalidCanonicalizedExtract("", "", "Invalid JSON path: ''"); + assertInvalidCanonicalizedExtract("{}", "$.bar[2][-1]", "Invalid JSON path: '$.bar[2][-1]'"); + assertInvalidCanonicalizedExtract("{}", "$.fuu..bar", "Invalid JSON path: '$.fuu..bar'"); + assertInvalidCanonicalizedExtract("{}", "$.", "Invalid JSON path: '$.'"); + assertInvalidCanonicalizedExtract("", "$$", "Invalid JSON path: '$$'"); + assertInvalidCanonicalizedExtract("", " ", "Invalid JSON path: ' '"); + assertInvalidCanonicalizedExtract("", ".", "Invalid JSON path: '.'"); + assertInvalidCanonicalizedExtract("{ \"store\": { \"book\": [{ \"title\": \"title\" }] } }", "$.store.book[", "Invalid JSON path: '$.store.book['"); } @Test @@ -344,22 +479,41 @@ public void testNoAutomaticEncodingDetection() assertFunction("JSON_EXTRACT_SCALAR(UTF8(X'00 00 00 00 7b 22 72 22'), '$.x')", VARCHAR, null); } - private static String doExtract(JsonExtractor jsonExtractor, String json) + private static String doExtractLegacy(JsonExtractor jsonExtractor, String json) throws IOException { - Slice extract = jsonExtractor.extract(Slices.utf8Slice(json).getInput()); + Slice extract = jsonExtractor.extract(Slices.utf8Slice(json).getInput(), PROPERTIES_CANONICALIZED_EXTRACT_DISABLED); return (extract == null) ? null : extract.toStringUtf8(); } - private static String doScalarExtract(String inputJson, String jsonPath) + private static String doCanonicalizedExtract(JsonExtractor jsonExtractor, String json) + throws IOException { - Slice value = JsonExtract.extract(Slices.utf8Slice(inputJson), generateExtractor(jsonPath, new ScalarValueJsonExtractor())); + Slice extract = jsonExtractor.extract(Slices.utf8Slice(json).getInput(), PROPERTIES_CANONICALIZED_EXTRACT_ENABLED); + return (extract == null) ? null : extract.toStringUtf8(); + } + + private static String doScalarExtractLegacy(String inputJson, String jsonPath) + { + Slice value = JsonExtract.extract(Slices.utf8Slice(inputJson), generateExtractor(jsonPath, new ScalarValueJsonExtractor()), PROPERTIES_CANONICALIZED_EXTRACT_DISABLED); return (value == null) ? null : value.toStringUtf8(); } - private static String doJsonExtract(String inputJson, String jsonPath) + private static String doScalarCanonicalizedExtract(String inputJson, String jsonPath) { - Slice value = JsonExtract.extract(Slices.utf8Slice(inputJson), generateExtractor(jsonPath, new JsonValueJsonExtractor())); + Slice value = JsonExtract.extract(Slices.utf8Slice(inputJson), generateExtractor(jsonPath, new ScalarValueJsonExtractor()), PROPERTIES_CANONICALIZED_EXTRACT_ENABLED); + return (value == null) ? null : value.toStringUtf8(); + } + + private static String doJsonExtractLegacy(String inputJson, String jsonPath) + { + Slice value = JsonExtract.extract(Slices.utf8Slice(inputJson), generateExtractor(jsonPath, new JsonValueJsonExtractor()), PROPERTIES_CANONICALIZED_EXTRACT_DISABLED); + return (value == null) ? null : value.toStringUtf8(); + } + + private static String doJsonCanonicalizedExtract(String inputJson, String jsonPath) + { + Slice value = JsonExtract.extract(Slices.utf8Slice(inputJson), generateExtractor(jsonPath, new JsonValueJsonExtractor()), PROPERTIES_CANONICALIZED_EXTRACT_ENABLED); return (value == null) ? null : value.toStringUtf8(); } @@ -368,10 +522,21 @@ private static List tokenizePath(String path) return ImmutableList.copyOf(new JsonPathTokenizer(path)); } - private static void assertInvalidExtract(String inputJson, String jsonPath, String message) + private static void assertInvalidLegacyExtract(String inputJson, String jsonPath, String message) + { + try { + doJsonExtractLegacy(inputJson, jsonPath); + } + catch (PrestoException e) { + assertEquals(e.getErrorCode(), INVALID_FUNCTION_ARGUMENT.toErrorCode()); + assertEquals(e.getMessage(), message); + } + } + + private static void assertInvalidCanonicalizedExtract(String inputJson, String jsonPath, String message) { try { - doJsonExtract(inputJson, jsonPath); + doJsonCanonicalizedExtract(inputJson, jsonPath); } catch (PrestoException e) { assertEquals(e.getErrorCode(), INVALID_FUNCTION_ARGUMENT.toErrorCode()); diff --git a/presto-main-base/src/test/java/com/facebook/presto/operator/scalar/TestJsonExtractFunctions.java b/presto-main-base/src/test/java/com/facebook/presto/operator/scalar/TestJsonExtractFunctions.java index c349e2a47a8c..b51294d9b6a2 100644 --- a/presto-main-base/src/test/java/com/facebook/presto/operator/scalar/TestJsonExtractFunctions.java +++ b/presto-main-base/src/test/java/com/facebook/presto/operator/scalar/TestJsonExtractFunctions.java @@ -13,11 +13,15 @@ */ package com.facebook.presto.operator.scalar; +import com.facebook.presto.sql.analyzer.FeaturesConfig; +import com.facebook.presto.sql.analyzer.FunctionsConfig; +import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; import static com.facebook.presto.common.type.BigintType.BIGINT; import static com.facebook.presto.common.type.JsonType.JSON; import static com.facebook.presto.common.type.VarcharType.VARCHAR; +import static com.facebook.presto.spi.StandardErrorCode.INVALID_FUNCTION_ARGUMENT; import static java.lang.String.format; public class TestJsonExtractFunctions @@ -61,6 +65,21 @@ public class TestJsonExtractFunctions " \"expensive\": 10\n" + "}"; + private static FunctionAssertions canonicalizedJsonExtractDisabled; + private static FunctionAssertions canonicalizedJsonExtractEnabled; + + @BeforeClass + public void setUp() + { + registerScalar(getClass()); + FunctionsConfig featuresConfigWithCanonicalizedJsonExtractDisabled = new FunctionsConfig() + .setCanonicalizedJsonExtract(false); + canonicalizedJsonExtractDisabled = new FunctionAssertions(session, new FeaturesConfig(), featuresConfigWithCanonicalizedJsonExtractDisabled, true); + FunctionsConfig featuresConfigWithCanonicalizedJsonExtractEnabled = new FunctionsConfig() + .setCanonicalizedJsonExtract(true); + canonicalizedJsonExtractEnabled = new FunctionAssertions(session, new FeaturesConfig(), featuresConfigWithCanonicalizedJsonExtractEnabled, true); + } + @Test public void testJsonExtract() { @@ -75,6 +94,16 @@ public void testJsonExtract() assertFunction(format("JSON_EXTRACT('%s', '%s')", "INVALID_JSON", "$"), JSON, null); assertInvalidFunction(format("JSON_EXTRACT('%s', '%s')", "{\"\":\"\"}", ""), "Invalid JSON path: ''"); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_EXTRACT('%s', '%s')", "{\"x\": {\"a\" : 1, \"b\" : 2} }", "$"), JSON, "{\"x\":{\"a\":1,\"b\":2}}"); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_EXTRACT('%s', '%s')", "{\"x\": {\"a\" : 1, \"b\" : 2} }", "$.x"), JSON, "{\"a\":1,\"b\":2}"); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_EXTRACT('%s', '%s')", "{\"x\": {\"a\" : 1, \"b\" : 2} }", "$.x.a"), JSON, "1"); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_EXTRACT('%s', '%s')", "{\"x\": {\"a\" : 1, \"b\" : 2} }", "$.x.c"), JSON, null); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_EXTRACT('%s', '%s')", "{\"x\": {\"a\" : 1, \"b\" : [2, 3]} }", "$.x.b[1]"), JSON, "3"); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_EXTRACT('%s', '%s')", "[1,2,3]", "$[1]"), JSON, "2"); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_EXTRACT('%s', '%s')", "[1,null,3]", "$[1]"), JSON, "null"); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_EXTRACT('%s', '%s')", "INVALID_JSON", "$"), JSON, null); + canonicalizedJsonExtractEnabled.assertInvalidFunction(format("JSON_EXTRACT('%s', '%s')", "{\"\":\"\"}", ""), "Invalid JSON path: ''"); + // complex expressions (should run on Jayway) assertFunction(format("JSON_EXTRACT('%s', '%s')", json, "$.store.book[*].isbn"), JSON, "[\"0-553-21311-3\",\"0-395-19395-8\"]"); assertFunction(format("JSON_EXTRACT('%s', '%s')", json, "$..price"), JSON, "[8.95,12.99,8.99,22.99,19.95]"); @@ -83,10 +112,49 @@ public void testJsonExtract() assertFunction(format("JSON_EXTRACT('%s', '%s')", json, "concat($..category)"), JSON, "\"referencefictionfictionfiction\""); assertFunction(format("JSON_EXTRACT('%s', '%s')", json, "$.store.keys()"), JSON, "[\"book\",\"bicycle\"]"); assertFunction(format("JSON_EXTRACT('%s', '%s')", json, "$.store.book[1].author"), JSON, "\"Evelyn Waugh\""); - assertInvalidFunction(format("JSON_EXTRACT('%s', '%s')", json, "$...invalid"), "Invalid JSON path: '$...invalid'"); } + @Test + public void testExtractJsonWithCanonicalOutput() + { + // Test with simple JSON object + String json = "{\"key_2\": 2, \"key_3\": 3, \"key_1\": 1}"; + String path = "$"; + String expected = "{\"key_1\":1,\"key_2\":2,\"key_3\":3}"; + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_EXTRACT('%s', '%s')", json, path), JSON, expected); + + // Test with nested JSON object + json = "{\"key_1\": {\"nested_key_2\": \"value_2\", \"nested_key_1\": \"value_1\"}, \"key_2\": 2}"; + path = "$.key_1"; + expected = "{\"nested_key_1\":\"value_1\",\"nested_key_2\":\"value_2\"}"; + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_EXTRACT('%s', '%s')", json, path), JSON, expected); + + // Test with Array of JSON objects + json = "[{\"key_b\":\"v_b\",\"key_a\":\"v_a\"}, {\"key_2\": \"value_2\"}]"; + path = "$[0]"; + expected = "{\"key_a\":\"v_a\",\"key_b\":\"v_b\"}"; + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_EXTRACT('%s', '%s')", json, path), JSON, expected); + } + + @Test + public void testInvalidFunctionIfJsonInvalid() + { + // Unbalanced quotes + String json = "{ \"key_2\": 2, \"key_1\": \"z\"a1\" }"; + String path = "$.key_1"; + assertFunction(format("JSON_EXTRACT('%s', '%s')", json, path), JSON, "\"z\""); + canonicalizedJsonExtractDisabled.assertFunction(format("JSON_EXTRACT('%s', '%s')", json, path), JSON, "\"z\""); + canonicalizedJsonExtractEnabled.assertInvalidFunction(format("JSON_EXTRACT('%s', '%s')", json, path), INVALID_FUNCTION_ARGUMENT); + + // Extra comma + json = "{ \"key_2\": 2, \"key_1\": \"value_1\", }"; + path = "$.key_1"; + assertFunction(format("JSON_EXTRACT('%s', '%s')", json, path), JSON, "\"value_1\""); + canonicalizedJsonExtractDisabled.assertFunction(format("JSON_EXTRACT('%s', '%s')", json, path), JSON, "\"value_1\""); + canonicalizedJsonExtractEnabled.assertInvalidFunction(format("JSON_EXTRACT('%s', '%s')", json, path), INVALID_FUNCTION_ARGUMENT); + } + @Test public void testJsonSize() { @@ -108,11 +176,33 @@ public void testJsonSize() assertFunction(format("JSON_SIZE(null, '%s')", "$"), BIGINT, null); assertFunction(format("JSON_SIZE(JSON '%s', null)", "[1,2,3]"), BIGINT, null); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_SIZE('%s', '%s')", "{\"x\": {\"a\" : 1, \"b\" : 2} }", "$"), BIGINT, 1L); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_SIZE('%s', '%s')", "{\"x\": {\"a\" : 1, \"b\" : 2} }", "$.x"), BIGINT, 2L); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_SIZE('%s', '%s')", "{\"x\": {\"a\" : 1, \"b\" : [1,2,3], \"c\" : {\"w\":9}} }", "$.x"), BIGINT, 3L); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_SIZE('%s', '%s')", "{\"x\": {\"a\" : 1, \"b\" : 2} }", "$.x.a"), BIGINT, 0L); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_SIZE('%s', '%s')", "[1,2,3]", "$"), BIGINT, 3L); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_SIZE('%s', CHAR '%s')", "[1,2,3]", "$"), BIGINT, 3L); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_SIZE(null, '%s')", "$"), BIGINT, null); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_SIZE('%s', '%s')", "INVALID_JSON", "$"), BIGINT, null); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_SIZE('%s', null)", "[1,2,3]"), BIGINT, null); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_SIZE(JSON '%s', '%s')", "{\"x\": {\"a\" : 1, \"b\" : 2} }", "$"), BIGINT, 1L); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_SIZE(JSON '%s', '%s')", "{\"x\": {\"a\" : 1, \"b\" : 2} }", "$.x"), BIGINT, 2L); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_SIZE(JSON '%s', '%s')", "{\"x\": {\"a\" : 1, \"b\" : [1,2,3], \"c\" : {\"w\":9}} }", "$.x"), BIGINT, 3L); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_SIZE(JSON '%s', '%s')", "{\"x\": {\"a\" : 1, \"b\" : 2} }", "$.x.a"), BIGINT, 0L); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_SIZE(JSON '%s', '%s')", "[1,2,3]", "$"), BIGINT, 3L); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_SIZE(null, '%s')", "$"), BIGINT, null); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_SIZE(JSON '%s', null)", "[1,2,3]"), BIGINT, null); + assertInvalidFunction(format("JSON_SIZE('%s', '%s')", "{\"\":\"\"}", ""), "Invalid JSON path: ''"); assertInvalidFunction(format("JSON_SIZE('%s', CHAR '%s')", "{\"\":\"\"}", " "), "Invalid JSON path: ' '"); assertInvalidFunction(format("JSON_SIZE('%s', '%s')", "{\"\":\"\"}", "."), "Invalid JSON path: '.'"); assertInvalidFunction(format("JSON_SIZE('%s', '%s')", "{\"\":\"\"}", "..."), "Invalid JSON path: '...'"); + canonicalizedJsonExtractEnabled.assertInvalidFunction(format("JSON_SIZE('%s', '%s')", "{\"\":\"\"}", ""), "Invalid JSON path: ''"); + canonicalizedJsonExtractEnabled.assertInvalidFunction(format("JSON_SIZE('%s', CHAR '%s')", "{\"\":\"\"}", " "), "Invalid JSON path: ' '"); + canonicalizedJsonExtractEnabled.assertInvalidFunction(format("JSON_SIZE('%s', '%s')", "{\"\":\"\"}", "."), "Invalid JSON path: '.'"); + canonicalizedJsonExtractEnabled.assertInvalidFunction(format("JSON_SIZE('%s', '%s')", "{\"\":\"\"}", "..."), "Invalid JSON path: '...'"); + // complex expressions (should run on Jayway) assertFunction(format("JSON_SIZE('%s', '%s')", json, "$.store.book[*].isbn"), BIGINT, 2L); assertFunction(format("JSON_SIZE('%s', '%s')", json, "$..price"), BIGINT, 5L); @@ -122,7 +212,16 @@ public void testJsonSize() assertFunction(format("JSON_SIZE('%s', '%s')", json, "$.store.keys()"), BIGINT, 2L); assertFunction(format("JSON_SIZE('%s', '%s')", json, "$.store.book[1].author"), BIGINT, 0L); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_SIZE('%s', '%s')", json, "$.store.book[*].isbn"), BIGINT, 2L); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_SIZE('%s', '%s')", json, "$..price"), BIGINT, 5L); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_SIZE('%s', '%s')", json, "$.store.book[?(@.price < 10)].title"), BIGINT, 2L); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_SIZE('%s', '%s')", json, "max($..price)"), BIGINT, 0L); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_SIZE('%s', '%s')", json, "concat($..category)"), BIGINT, 0L); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_SIZE('%s', '%s')", json, "$.store.keys()"), BIGINT, 2L); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_SIZE('%s', '%s')", json, "$.store.book[1].author"), BIGINT, 0L); + assertInvalidFunction(format("JSON_SIZE('%s', '%s')", json, "$...invalid"), "Invalid JSON path: '$...invalid'"); + canonicalizedJsonExtractEnabled.assertInvalidFunction(format("JSON_SIZE('%s', '%s')", json, "$...invalid"), "Invalid JSON path: '$...invalid'"); } @Test @@ -136,6 +235,13 @@ public void testJsonExtractScalar() assertFunction(format("JSON_EXTRACT_SCALAR(JSON'%s', '%s')", "[1,2,3]", "$[1]"), VARCHAR, "2"); assertInvalidFunction(format("JSON_EXTRACT_SCALAR(JSON'%s', '%s')", "{\"\":\"\"}", ""), "Invalid JSON path: ''"); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_EXTRACT_SCALAR(JSON'%s', '%s')", "{\"x\": {\"a\" : 1, \"b\" : 2} }", "$"), VARCHAR, null); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_EXTRACT_SCALAR(JSON'%s', '%s')", "{\"x\": {\"a\" : 1, \"b\" : 2} }", "$.x"), VARCHAR, null); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_EXTRACT_SCALAR(JSON'%s', '%s')", "{\"x\": {\"a\" : 1, \"b\" : 2} }", "$.x.a"), VARCHAR, "1"); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_EXTRACT_SCALAR(JSON'%s', '%s')", "{\"x\": {\"a\" : 1, \"b\" : [2, 3]} }", "$.x.b[1]"), VARCHAR, "3"); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_EXTRACT_SCALAR(JSON'%s', '%s')", "[1,2,3]", "$[1]"), VARCHAR, "2"); + canonicalizedJsonExtractEnabled.assertInvalidFunction(format("JSON_EXTRACT_SCALAR(JSON'%s', '%s')", "{\"\":\"\"}", ""), "Invalid JSON path: ''"); + // complex expressions (should run on Jayway) assertFunction(format("JSON_EXTRACT_SCALAR(JSON'%s', '%s')", json, "$.store.book[*].isbn"), VARCHAR, null); assertFunction(format("JSON_EXTRACT_SCALAR(JSON'%s', '%s')", json, "$..price"), VARCHAR, null); @@ -145,6 +251,15 @@ public void testJsonExtractScalar() assertFunction(format("JSON_EXTRACT_SCALAR(JSON'%s', '%s')", json, "$.store.keys()"), VARCHAR, null); assertFunction(format("JSON_EXTRACT_SCALAR(JSON'%s', '%s')", json, "$.store.book[1].author"), VARCHAR, "Evelyn Waugh"); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_EXTRACT_SCALAR(JSON'%s', '%s')", json, "$.store.book[*].isbn"), VARCHAR, null); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_EXTRACT_SCALAR(JSON'%s', '%s')", json, "$..price"), VARCHAR, null); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_EXTRACT_SCALAR(JSON'%s', '%s')", json, "$.store.book[?(@.price < 10)].title"), VARCHAR, null); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_EXTRACT_SCALAR(JSON'%s', '%s')", json, "max($..price)"), VARCHAR, "22.99"); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_EXTRACT_SCALAR(JSON'%s', '%s')", json, "concat($..category)"), VARCHAR, "referencefictionfictionfiction"); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_EXTRACT_SCALAR(JSON'%s', '%s')", json, "$.store.keys()"), VARCHAR, null); + canonicalizedJsonExtractEnabled.assertFunction(format("JSON_EXTRACT_SCALAR(JSON'%s', '%s')", json, "$.store.book[1].author"), VARCHAR, "Evelyn Waugh"); + assertInvalidFunction(format("JSON_EXTRACT_SCALAR('%s', '%s')", json, "$...invalid"), "Invalid JSON path: '$...invalid'"); + canonicalizedJsonExtractEnabled.assertInvalidFunction(format("JSON_EXTRACT_SCALAR('%s', '%s')", json, "$...invalid"), "Invalid JSON path: '$...invalid'"); } } diff --git a/presto-main-base/src/test/java/com/facebook/presto/sql/analyzer/TestFunctionsConfig.java b/presto-main-base/src/test/java/com/facebook/presto/sql/analyzer/TestFunctionsConfig.java index e5f8cd752e2a..ad7fc277d265 100644 --- a/presto-main-base/src/test/java/com/facebook/presto/sql/analyzer/TestFunctionsConfig.java +++ b/presto-main-base/src/test/java/com/facebook/presto/sql/analyzer/TestFunctionsConfig.java @@ -54,6 +54,7 @@ public void testDefaults() .setWarnOnCommonNanPatterns(false) .setLegacyCharToVarcharCoercion(false) .setLegacyJsonCast(true) + .setCanonicalizedJsonExtract(false) .setDefaultNamespacePrefix(JAVA_BUILTIN_NAMESPACE.toString())); } @@ -83,6 +84,7 @@ public void testExplicitPropertyMappings() .put("deprecated.legacy-char-to-varchar-coercion", "true") .put("legacy-json-cast", "false") .put("presto.default-namespace", "native.default") + .put("canonicalized-json-extract", "true") .build(); FunctionsConfig expected = new FunctionsConfig() @@ -107,7 +109,8 @@ public void testExplicitPropertyMappings() .setWarnOnCommonNanPatterns(true) .setLegacyCharToVarcharCoercion(true) .setLegacyJsonCast(false) - .setDefaultNamespacePrefix("native.default"); + .setDefaultNamespacePrefix("native.default") + .setCanonicalizedJsonExtract(true); assertFullMapping(properties, expected); } } diff --git a/presto-main-base/src/test/java/com/facebook/presto/sql/gen/TestExpressionCompiler.java b/presto-main-base/src/test/java/com/facebook/presto/sql/gen/TestExpressionCompiler.java index 69b48113380d..219b16b96138 100644 --- a/presto-main-base/src/test/java/com/facebook/presto/sql/gen/TestExpressionCompiler.java +++ b/presto-main-base/src/test/java/com/facebook/presto/sql/gen/TestExpressionCompiler.java @@ -1446,17 +1446,17 @@ public void testFunctionCallJson() for (String pattern : jsonPatterns) { assertExecute(generateExpression("json_extract(%s, %s)", value, pattern), JSON, - value == null || pattern == null ? null : JsonFunctions.jsonExtract(utf8Slice(value), JsonPath.build(pattern))); + value == null || pattern == null ? null : JsonFunctions.jsonExtract(session.getSqlFunctionProperties(), utf8Slice(value), JsonPath.build(pattern))); assertExecute(generateExpression("json_extract_scalar(%s, %s)", value, pattern), value == null ? createUnboundedVarcharType() : createVarcharType(value.length()), - value == null || pattern == null ? null : JsonFunctions.jsonExtractScalar(utf8Slice(value), JsonPath.build(pattern))); + value == null || pattern == null ? null : JsonFunctions.jsonExtractScalar(session.getSqlFunctionProperties(), utf8Slice(value), JsonPath.build(pattern))); assertExecute(generateExpression("json_extract(%s, %s || '')", value, pattern), JSON, - value == null || pattern == null ? null : JsonFunctions.jsonExtract(utf8Slice(value), JsonPath.build(pattern))); + value == null || pattern == null ? null : JsonFunctions.jsonExtract(session.getSqlFunctionProperties(), utf8Slice(value), JsonPath.build(pattern))); assertExecute(generateExpression("json_extract_scalar(%s, %s || '')", value, pattern), value == null ? createUnboundedVarcharType() : createVarcharType(value.length()), - value == null || pattern == null ? null : JsonFunctions.jsonExtractScalar(utf8Slice(value), JsonPath.build(pattern))); + value == null || pattern == null ? null : JsonFunctions.jsonExtractScalar(session.getSqlFunctionProperties(), utf8Slice(value), JsonPath.build(pattern))); } } diff --git a/presto-main-base/src/test/java/com/facebook/presto/type/TestRowOperators.java b/presto-main-base/src/test/java/com/facebook/presto/type/TestRowOperators.java index c21b4068f88f..64f41b3a27c3 100644 --- a/presto-main-base/src/test/java/com/facebook/presto/type/TestRowOperators.java +++ b/presto-main-base/src/test/java/com/facebook/presto/type/TestRowOperators.java @@ -78,6 +78,8 @@ public class TestRowOperators private static FunctionAssertions fieldNameInJsonCastEnabled; private static FunctionAssertions legacyJsonCastEnabled; private static FunctionAssertions legacyJsonCastDisabled; + private static FunctionAssertions canonicalizedJsonExtractDisabled; + private static FunctionAssertions canonicalizedJsonExtractEnabled; @BeforeClass public void setUp() @@ -100,6 +102,13 @@ public void setUp() FunctionsConfig featuresConfigWithLegacyJsonCastDisabled = new FunctionsConfig() .setLegacyJsonCast(false); legacyJsonCastDisabled = new FunctionAssertions(session, new FeaturesConfig(), featuresConfigWithLegacyJsonCastDisabled, true); + + FunctionsConfig featuresConfigWithCanonicalizedJsonExtractDisabled = new FunctionsConfig() + .setCanonicalizedJsonExtract(false); + canonicalizedJsonExtractDisabled = new FunctionAssertions(session, new FeaturesConfig(), featuresConfigWithCanonicalizedJsonExtractDisabled, true); + FunctionsConfig featuresConfigWithCanonicalizedJsonExtractEnabled = new FunctionsConfig() + .setCanonicalizedJsonExtract(true); + canonicalizedJsonExtractEnabled = new FunctionAssertions(session, new FeaturesConfig(), featuresConfigWithCanonicalizedJsonExtractEnabled, true); } @AfterClass(alwaysRun = true) @@ -113,6 +122,10 @@ public final void tearDown() legacyJsonCastEnabled = null; legacyJsonCastDisabled.close(); legacyJsonCastDisabled = null; + canonicalizedJsonExtractDisabled.close(); + canonicalizedJsonExtractDisabled = null; + canonicalizedJsonExtractEnabled.close(); + canonicalizedJsonExtractEnabled = null; } @ScalarFunction @@ -539,6 +552,13 @@ public void testJsonToRow() assertInvalidCast("CAST(json_extract('{\"1\":[{\"key1\": \"John\", \"KEY1\":\"Johnny\"}]}', '$') AS MAP>)", "Cannot cast to map(bigint,array(row(key1 varchar))). Duplicate field: KEY1\n" + "{\"1\":[{\"key1\":\"John\",\"KEY1\":\"Johnny\"}]}"); + canonicalizedJsonExtractDisabled.assertInvalidCast("CAST(json_extract('{\"1\":[{\"key1\": \"John\", \"KEY1\":\"Johnny\"}]}', '$') AS MAP>)", + "Cannot cast to map(bigint,array(row(key1 varchar))). Duplicate field: KEY1\n" + + "{\"1\":[{\"key1\":\"John\",\"KEY1\":\"Johnny\"}]}"); + canonicalizedJsonExtractEnabled.assertInvalidCast("CAST(json_extract('{\"1\":[{\"key1\": \"John\", \"KEY1\":\"Johnny\"}]}', '$') AS MAP>)", + "Cannot cast to map(bigint,array(row(key1 varchar))). Duplicate field: key1\n" + + "{\"1\":[{\"KEY1\":\"Johnny\",\"key1\":\"John\"}]}"); + assertInvalidCast("CAST(unchecked_to_json('{\"a\":1,\"b\":2,\"a\":3}') AS ROW(a BIGINT, b BIGINT))", "Cannot cast to row(a bigint,b bigint). Duplicate field: a\n{\"a\":1,\"b\":2,\"a\":3}"); assertInvalidCast("CAST(unchecked_to_json('[{\"a\":1,\"b\":2,\"a\":3}]') AS ARRAY)", "Cannot cast to array(row(a bigint,b bigint)). Duplicate field: a\n[{\"a\":1,\"b\":2,\"a\":3}]"); } @@ -780,4 +800,18 @@ private void assertComparisonCombination(String base, String greater) assertFunction(greater + operator + base, BOOLEAN, greaterOrInequalityOperators.contains(operator)); } } + + @Test + public void testRowNestedJsonExtractNullVarchar() + { + assertInvalidCast("CAST(json_extract('{\"1\":[{\"key1\": \"John\", \"KEY1\":\"Johnny\"}]}', '$') AS MAP>)", + "Cannot cast to map(bigint,array(row(key1 varchar))). Duplicate field: KEY1\n" + + "{\"1\":[{\"key1\":\"John\",\"KEY1\":\"Johnny\"}]}"); + canonicalizedJsonExtractDisabled.assertInvalidCast("CAST(json_extract('{\"1\":[{\"key1\": \"John\", \"KEY1\":\"Johnny\"}]}', '$') AS MAP>)", + "Cannot cast to map(bigint,array(row(key1 varchar))). Duplicate field: KEY1\n" + + "{\"1\":[{\"key1\":\"John\",\"KEY1\":\"Johnny\"}]}"); + canonicalizedJsonExtractEnabled.assertInvalidCast("CAST(json_extract('{\"1\":[{\"key1\": \"John\", \"KEY1\":\"Johnny\"}]}', '$') AS MAP>)", + "Cannot cast to map(bigint,array(row(key1 varchar))). Duplicate field: key1\n" + + "{\"1\":[{\"KEY1\":\"Johnny\",\"key1\":\"John\"}]}"); + } }