Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions core/trino-main/src/main/java/io/trino/util/JsonUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -697,10 +697,7 @@ public static Slice currentTokenAsVarchar(JsonParser parser)
return switch (parser.currentToken()) {
case VALUE_NULL -> null;
case VALUE_STRING, FIELD_NAME -> utf8Slice(parser.getText());
// Avoidance of loss of precision does not seem to be possible here because of Jackson implementation.
case VALUE_NUMBER_FLOAT -> DoubleOperators.castToVarchar(UNBOUNDED_LENGTH, parser.getDoubleValue());
// An alternative is calling getLongValue and then BigintOperators.castToVarchar.
// It doesn't work as well because it can result in overflow and underflow exceptions for large integral numbers.
case VALUE_NUMBER_FLOAT -> utf8Slice(parser.getDecimalValue().toString());
case VALUE_NUMBER_INT -> utf8Slice(parser.getText());
case VALUE_TRUE -> BooleanOperators.castToVarchar(UNBOUNDED_LENGTH, true);
case VALUE_FALSE -> BooleanOperators.castToVarchar(UNBOUNDED_LENGTH, false);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.type;

import io.airlift.slice.Slice;
import io.trino.operator.scalar.JsonOperators;
import io.trino.spi.type.VarcharType;
import org.junit.jupiter.api.Test;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.runner.RunnerException;

import java.util.concurrent.TimeUnit;

import static io.airlift.slice.Slices.utf8Slice;
import static io.trino.jmh.Benchmarks.benchmark;
import static java.lang.Integer.parseInt;
import static org.assertj.core.api.Assertions.assertThat;

@State(Scope.Thread)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@BenchmarkMode(Mode.AverageTime)
@Fork(3)
@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
public class BenchmarkJsonOperators
{
private static final String UNBOUNDED_LENGTH = "2147483647";

@Benchmark
public Slice benchmarkCastToVarchar(BenchmarkData data)
{
return JsonOperators.castToVarchar(data.varcharLength, data.jsonSlice);
}

@State(Scope.Thread)
public static class BenchmarkData
{
@Param
private JsonType jsonType;

@Param({"10000", UNBOUNDED_LENGTH})
private long varcharLength = -1 /* invalid value */;

private Slice jsonSlice;

@Setup
public void setup()
{
String jsonString = switch (jsonType) {
case STRING_SHORT -> "\"hello world\"";
case STRING_MEDIUM -> "\"" + "The quick brown fox jumps over the lazy dog. ".repeat(5) + "\"";
case STRING_LONG -> "\"" + "abcdefghijklmnopqrstuvwxyz0123456789".repeat(50) + "\"";
case STRING_WITH_UNICODE -> "\"Hello \\u4e16\\u754c \\ud83d\\ude00 \\u03b1\\u03b2\\u03b3\"";
case NUMBER_INTEGER -> "123456789";
case NUMBER_DECIMAL -> "123456.789012";
case NUMBER_SCIENTIFIC -> "1.23456789E8";
case BOOLEAN_TRUE -> "true";
case BOOLEAN_FALSE -> "false";
case NULL -> "null";
};
jsonSlice = utf8Slice(jsonString);
}
}

public enum JsonType
{
STRING_SHORT,
STRING_MEDIUM,
STRING_LONG,
STRING_WITH_UNICODE,
NUMBER_INTEGER,
NUMBER_DECIMAL,
NUMBER_SCIENTIFIC,
BOOLEAN_TRUE,
BOOLEAN_FALSE,
NULL
}

@Test
public void verify()
{
assertThat(parseInt(UNBOUNDED_LENGTH)).isEqualTo(VarcharType.UNBOUNDED_LENGTH);

BenchmarkData data = new BenchmarkData();
for (JsonType type : JsonType.values()) {
data.jsonType = type;
data.varcharLength = 10000;
data.setup();
new BenchmarkJsonOperators().benchmarkCastToVarchar(data);
}
}

static void main()
throws RunnerException
{
benchmark(BenchmarkJsonOperators.class).run();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,7 @@ public void testJsonToArraySmoke()
assertThat(assertions.expression("CAST(a AS array(VARCHAR))")
.binding("a", "JSON '[true, false, 12, 12.3, \"puppies\", \"kittens\", \"null\", \"\", null]'"))
.hasType(new ArrayType(VARCHAR))
.isEqualTo(asList("true", "false", "12", "1.23E1", "puppies", "kittens", "null", "", null));
.isEqualTo(asList("true", "false", "12", "12.3", "puppies", "kittens", "null", "", null));

assertThat(assertions.expression("CAST(a AS array(JSON))")
.binding("a", "JSON '[5, 3.14, [1, 2, 3], \"e\", {\"a\": \"b\"}, null, \"null\", [null]]'"))
Expand Down Expand Up @@ -583,7 +583,7 @@ public void testJsonToArraySmoke()

assertTrinoExceptionThrownBy(() -> assertions.expression("CAST(a AS array(INTEGER))")
.binding("a", "JSON '[1234567890123.456]'").evaluate())
.hasMessage("Cannot cast to array(integer). Out of range for integer: 1.234567890123456E12\n[1.234567890123456E12]")
.hasMessage("Cannot cast to array(integer). Out of range for integer: 1.234567890123456E12\n[1234567890123.456]")
.hasErrorCode(INVALID_CAST_ARGUMENT);

assertThat(assertions.expression("CAST(a AS array(DECIMAL(10,5)))")
Expand Down Expand Up @@ -1082,11 +1082,10 @@ public void testCastJsonToArrayDecimal()
.matches("CAST(ARRAY[DECIMAL '12345.88'] AS ARRAY(DECIMAL(7,2)))");

// array with large decimal
// TODO precision loss!
assertThat(assertions.expression("cast(a as ARRAY(DECIMAL(38,8)))")
.binding("a", "JSON '[123456789012345678901234567890.12345678]'"))
.hasType(new ArrayType(createDecimalType(38, 8)))
.matches("CAST(ARRAY[DECIMAL '123456789012345680000000000000.00000000'] AS ARRAY(DECIMAL(38,8)))");
.matches("CAST(ARRAY[DECIMAL '123456789012345678901234567890.12345678'] AS ARRAY(DECIMAL(38,8)))");

// non-array JSON should fail
assertTrinoExceptionThrownBy(() -> assertions.expression("cast(a as ARRAY(DECIMAL(10,3)))")
Expand Down Expand Up @@ -1158,10 +1157,34 @@ public void testCastJsonToArrayVarchar()
.matches("CAST(ARRAY['test', '', 'data'] AS ARRAY(VARCHAR))");

// array with various types including scientific notation and string "null"
String inputJsonArray = "[true, false, 12, 12.3, 1.23E1, 0, 0.000000000000000, 0e1000, 0e-1000, 1, 100000000000000000000000000000000000000000000000000000000000000000000e-68, 0.100000000000000, \"puppies\", \"kittens\", \"null\", null]";
String expectedVarcharArray = "ARRAY[VARCHAR 'true', 'false', '12', '12.3', '12.3', '0', '0E-15', '0E+1000', '0E-1000', '1', '1.00000000000000000000000000000000000000000000000000000000000000000000', '0.100000000000000', 'puppies', 'kittens', 'null', null]";
assertThat(assertions.expression("cast(a as ARRAY(VARCHAR))")
.binding("a", "JSON '[true, false, 12, 12.3, 1.23E1, \"puppies\", \"kittens\", \"null\", null]'"))
.binding("a", "JSON '" + inputJsonArray + "'"))
.hasType(new ArrayType(VARCHAR))
.matches("CAST(ARRAY['true', 'false', '12', '1.23E1', '1.23E1', 'puppies', 'kittens', 'null', null] AS ARRAY(VARCHAR))");
.matches(expectedVarcharArray);
// Same with json_parse, exercising SpecializeCastWithJsonParse
assertThat(assertions.expression("cast(json_parse(a) as ARRAY(VARCHAR))")
.binding("a", "'" + inputJsonArray + "'"))
.hasType(new ArrayType(VARCHAR))
.matches(expectedVarcharArray);

// Number with leading zeros
assertTrinoExceptionThrownBy(assertions.expression("cast(a as ARRAY(VARCHAR))")
.binding("a", "JSON '[000]'")::evaluate)
.hasMessage("line 3:16: '[000]' is not a valid JSON literal");
assertTrinoExceptionThrownBy(assertions.expression("cast(a as ARRAY(VARCHAR))")
.binding("a", "JSON '[000.0]'")::evaluate)
.hasMessage("line 3:16: '[000.0]' is not a valid JSON literal");
// Number with leading zeros with json_parse, exercising SpecializeCastWithJsonParse
assertTrinoExceptionThrownBy(assertions.expression("cast(json_parse(a) as ARRAY(VARCHAR))")
.binding("a", "'[000]'")::evaluate)
// TODO the exception message could be better
.hasMessage("Cannot cast to array(varchar).\n[000]");
assertTrinoExceptionThrownBy(assertions.expression("cast(json_parse(a) as ARRAY(VARCHAR))")
.binding("a", "'[000.0]'")::evaluate)
// TODO the exception message could be better
.hasMessage("Cannot cast to array(varchar).\n[000.0]");

// non-array JSON should fail
assertTrinoExceptionThrownBy(() -> assertions.expression("cast(a as ARRAY(VARCHAR))")
Expand Down
67 changes: 53 additions & 14 deletions core/trino-main/src/test/java/io/trino/type/TestJsonOperators.java
Original file line number Diff line number Diff line change
Expand Up @@ -705,11 +705,10 @@ public void testCastToDecimal()
.hasType(createDecimalType(10, 3))
.isEqualTo(decimal("128.000", createDecimalType(10, 3)));

// TODO precision loss!
assertThat(assertions.expression("cast(a as DECIMAL(38,8))")
.binding("a", "JSON '123456789012345678901234567890.12345678'"))
.hasType(createDecimalType(38, 8))
.isEqualTo(decimal("123456789012345680000000000000.00000000", createDecimalType(38, 8)));
.isEqualTo(decimal("123456789012345678901234567890.12345678", createDecimalType(38, 8)));

assertThat(assertions.expression("cast(a as DECIMAL(38,8))")
.binding("a", "cast(DECIMAL '123456789012345678901234567890.12345678' as JSON)"))
Expand Down Expand Up @@ -796,10 +795,10 @@ public void testCastToBoolean()
.binding("a", "JSON '1e-324'"))
.isEqualTo(false);

// overflow
assertTrinoExceptionThrownBy(() -> assertions.expression("cast(a as BOOLEAN)")
.binding("a", "JSON '1e309'").evaluate())
.hasErrorCode(INVALID_CAST_ARGUMENT);
// overflow if parsed as double
assertThat(assertions.expression("cast(a as BOOLEAN)")
.binding("a", "JSON '1e309'"))
.isEqualTo(true);

assertThat(assertions.expression("cast(a as BOOLEAN)")
.binding("a", "JSON 'true'"))
Expand Down Expand Up @@ -873,7 +872,42 @@ public void testCastToVarchar()
.hasType(VARCHAR)
.isEqualTo("128");

// overflow, no loss of precision
assertThat(assertions.expression("cast(a as VARCHAR)")
.binding("a", "JSON '0'"))
.hasType(VARCHAR)
.isEqualTo("0");

assertThat(assertions.expression("cast(a as VARCHAR)")
.binding("a", "JSON '0.000000000000000'"))
.hasType(VARCHAR)
.isEqualTo("0E-15");

assertThat(assertions.expression("cast(a as VARCHAR)")
.binding("a", "JSON '0e1000'"))
.hasType(VARCHAR)
.isEqualTo("0E+1000");

assertThat(assertions.expression("cast(a as VARCHAR)")
.binding("a", "JSON '0e-1000'"))
.hasType(VARCHAR)
.isEqualTo("0E-1000");

assertThat(assertions.expression("cast(a as VARCHAR)")
.binding("a", "JSON '1'"))
.hasType(VARCHAR)
.isEqualTo("1");

assertThat(assertions.expression("cast(a as VARCHAR)")
.binding("a", "JSON '100000000000000000000000000000000000000000000000000000000000000000000e-68'"))
.hasType(VARCHAR)
.isEqualTo("1.00000000000000000000000000000000000000000000000000000000000000000000");

assertThat(assertions.expression("cast(a as VARCHAR)")
.binding("a", "JSON '0.100000000000000'"))
.hasType(VARCHAR)
.isEqualTo("0.100000000000000");

// overflow if parsed as long, no loss of precision
assertThat(assertions.expression("cast(a as VARCHAR)")
.binding("a", "JSON '12345678901234567890'"))
.hasType(VARCHAR)
Expand All @@ -882,25 +916,30 @@ public void testCastToVarchar()
assertThat(assertions.expression("cast(a as VARCHAR)")
.binding("a", "JSON '128.9'"))
.hasType(VARCHAR)
.isEqualTo("1.289E2");
.isEqualTo("128.9");

// smaller than minimum subnormal positive
// smaller than double's minimum subnormal positive
assertThat(assertions.expression("cast(a as VARCHAR)")
.binding("a", "JSON '1e-324'"))
.hasType(VARCHAR)
.isEqualTo("0E0");
.isEqualTo("1E-324");

// overflow
assertThat(assertions.expression("cast(a as VARCHAR)")
.binding("a", "JSON '123456789012345678901234567890.123456789012345678901234567890'"))
.hasType(VARCHAR)
.isEqualTo("123456789012345678901234567890.123456789012345678901234567890");

// overflow if parsed as double
assertThat(assertions.expression("cast(a as VARCHAR)")
.binding("a", "JSON '1e309'"))
.hasType(VARCHAR)
.isEqualTo("Infinity");
.isEqualTo("1E+309");

// underflow
// underflow if parsed as double
assertThat(assertions.expression("cast(a as VARCHAR)")
.binding("a", "JSON '-1e309'"))
.hasType(VARCHAR)
.isEqualTo("-Infinity");
.isEqualTo("-1E+309");

assertThat(assertions.expression("cast(a as VARCHAR)")
.binding("a", "JSON 'true'"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,7 @@ public void testJsonToMap()
.hasType(mapType(BIGINT, VARCHAR))
.isEqualTo(asMap(
ImmutableList.of(1L, 2L, 3L, 5L, 8L, 13L, 21L, 34L, 55L),
asList("true", "false", "12", "1.23E1", "puppies", "kittens", "null", "", null)));
asList("true", "false", "12", "12.3", "puppies", "kittens", "null", "", null)));

assertThat(assertions.expression("cast(a as MAP(VARCHAR, JSON))")
.binding("a", "JSON '{\"k1\": 5, \"k2\": 3.14, \"k3\":[1, 2, 3], \"k4\":\"e\", \"k5\":{\"a\": \"b\"}, \"k6\":null, \"k7\":\"null\", \"k8\":[null]}'"))
Expand Down Expand Up @@ -683,7 +683,7 @@ public void testJsonToMap()

assertTrinoExceptionThrownBy(() -> assertions.expression("cast(a as MAP(VARCHAR, INTEGER))")
.binding("a", "JSON '{\"a\": 1234567890123.456}'").evaluate())
.hasMessage("Cannot cast to map(varchar, integer). Out of range for integer: 1.234567890123456E12\n{\"a\":1.234567890123456E12}")
.hasMessage("Cannot cast to map(varchar, integer). Out of range for integer: 1.234567890123456E12\n{\"a\":1234567890123.456}")
.hasErrorCode(INVALID_CAST_ARGUMENT);

assertTrinoExceptionThrownBy(() -> assertions.expression("cast(a as MAP(BIGINT, BIGINT))")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import java.util.StringJoiner;

import static com.fasterxml.jackson.core.JsonFactory.Feature.CANONICALIZE_FIELD_NAMES;
import static com.fasterxml.jackson.databind.DeserializationFeature.USE_BIG_DECIMAL_FOR_FLOATS;
import static com.fasterxml.jackson.databind.SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS;
import static com.google.common.base.Preconditions.checkState;
import static io.trino.plugin.base.util.JsonUtils.jsonFactoryBuilder;
Expand All @@ -52,6 +53,7 @@ public final class JsonTypeUtil
private static final JsonMapper SORTED_MAPPER = new JsonMapperProvider().get()
.rebuild()
.configure(ORDER_MAP_ENTRIES_BY_KEYS, true)
.configure(USE_BIG_DECIMAL_FOR_FLOATS, true)
.build();

private JsonTypeUtil() {}
Expand Down
Loading