Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
-- SPARK-23179: SQL ANSI 2011 states that in case of overflow during arithmetic operations,
-- an exception should be thrown instead of returning NULL.
-- This is what most of the SQL DBs do (eg. SQLServer, DB2).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since inputs/decimalArithmeticOperations.sql has some nondeterministic output with ansi=true, I inlined the ANSI-related queries in it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is surprising. what causes the nondeterminice?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For example, the queries below in inputs/decimalArithmeticOperations.sql throws multiple exceptions with a different error message in executors with ansi=true;

sql("SET spark.sql.decimalOperations.allowPrecisionLoss=false")
sql("SET spark.sql.ansi.enabled=true")
sql("create table decimals_test(id int, a decimal(38,18), b decimal(38,18)) using parquet")
sql("insert into decimals_test values(1, 100.0, 999.0), (2, 12345.123, 12345.123), (3, 0.1234567891011, 1234.1), (4, 123456789123456789.0, 1.123456789123456789)")
sql("select id, a+b, a-b, a*b, a/b from decimals_test order by id").show()

java.lang.ArithmeticException: Decimal(expanded,138698367904130467.51562262075019052100,38,20}) cannot be represented as Decimal(38, 36).
	at org.apache.spark.sql.types.Decimal.toPrecision(Decimal.scala:357)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
...
java.lang.ArithmeticException: Decimal(expanded,99900.000000000000000000000000000000000,38,33}) cannot be represented as Decimal(38, 36).
	at org.apache.spark.sql.types.Decimal.toPrecision(Decimal.scala:357)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
...
java.lang.ArithmeticException: Decimal(expanded,152.35802342966751000000000000000000000,38,35}) cannot be represented as Decimal(38, 36).
	at org.apache.spark.sql.types.Decimal.toPrecision(Decimal.scala:357)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
...
<more exceptions below>

So, the output string printed in decimalArithmeticOperations.sql.out depends on timing.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah this is nasty, but no better ideas.

Copy link
Member Author

@maropu maropu Nov 14, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, I have no idea, too.

-- tests for decimals handling in operations
create table decimals_test(id int, a decimal(38,18), b decimal(38,18)) using parquet;

insert into decimals_test values(1, 100.0, 999.0), (2, 12345.123, 12345.123),
(3, 0.1234567891011, 1234.1), (4, 123456789123456789.0, 1.123456789123456789);

-- test operations between decimals and constants
select id, a*10, b/10 from decimals_test order by id;

-- test operations on constants
select 10.3 * 3.0;
select 10.3000 * 3.0;
select 10.30000 * 30.0;
select 10.300000000000000000 * 3.000000000000000000;
select 10.300000000000000000 * 3.0000000000000000000;

-- arithmetic operations causing an overflow throw exception
select (5e36 + 0.1) + 5e36;
select (-4e36 - 0.1) - 7e36;
select 12345678901234567890.0 * 12345678901234567890.0;
select 1e35 / 0.1;

-- arithmetic operations causing a precision loss throw exception
select 123456789123456789.1234567890 * 1.123456789123456789;
select 123456789123456789.1234567890 * 1.123456789123456789;
select 12345678912345.123456789123 / 0.000000012345678;

drop table decimals_test;
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
--import higher-order-functions.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
-- Turns on ANSI mode
SET spark.sql.ansi.enabled=true;

select
'1' second,
2 seconds,
Expand Down Expand Up @@ -184,5 +181,7 @@ select date '2012-01-01' + interval (a + 1) day from t;

select date '2012-01-01' + (a + 1) day from t;

-- Turns off ANSI mode
SET spark.sql.ansi.enabled=false;
-- malformed interval literal with ansi mode
select 1 year to month;
select '1' year to second;
select 1 year '2-1' year to month;
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
--- malformed interval literal with ansi mode
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, do we still have interval related tests in literals.sql?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it ok to move all the interval-related queries here into interval.sql or ansi/interval.sql?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think so. @yaooqinn do you have time to take it later? IIRC there are a few interval tests in group-by.sql as well.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok. As for literals.sql, I'll move all the interval-related queries into correct files in this pr.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

some cases are using interval but to verify literals such as

-- awareness of the negative sign before type
select -integer '7';
select -date '1999-01-01';
select -timestamp '1999-01-01';
select -x'2379ACFe';
select +integer '7';
select +interval '1 second';

the group-by.sql contains avg and sum support, which should be move to interval.sql

--import literals.sql
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This also still fails, too.

[info] - literals.sql *** FAILED *** (1 second, 989 milliseconds)

Original file line number Diff line number Diff line change
Expand Up @@ -83,28 +83,4 @@ select 12345678912345678912345678912.1234567 + 9999999999999999999999999999999.1
select 123456789123456789.1234567890 * 1.123456789123456789;
select 12345678912345.123456789123 / 0.000000012345678;

-- throw an exception instead of returning NULL, according to SQL ANSI 2011
set spark.sql.ansi.enabled=true;

-- test operations between decimals and constants
select id, a*10, b/10 from decimals_test order by id;

-- test operations on constants
select 10.3 * 3.0;
select 10.3000 * 3.0;
select 10.30000 * 30.0;
select 10.300000000000000000 * 3.000000000000000000;
select 10.300000000000000000 * 3.0000000000000000000;

-- arithmetic operations causing an overflow throw exception
select (5e36 + 0.1) + 5e36;
select (-4e36 - 0.1) - 7e36;
select 12345678901234567890.0 * 12345678901234567890.0;
select 1e35 / 0.1;

-- arithmetic operations causing a precision loss throw exception
select 123456789123456789.1234567890 * 1.123456789123456789;
select 123456789123456789.1234567890 * 1.123456789123456789;
select 12345678912345.123456789123 / 0.000000012345678;

drop table decimals_test;
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,3 @@ select transform_values(ys, (k, v) -> k + v) as v from nested;
-- use non reversed keywords: all is non reversed only if !ansi
select transform(ys, all -> all * all) as v from values (array(32, 97)) as t(ys);
select transform(ys, (all, i) -> all + i) as v from values (array(32, 97)) as t(ys);

set spark.sql.ansi.enabled=true;
select transform(ys, all -> all * all) as v from values (array(32, 97)) as t(ys);
select transform(ys, (all, i) -> all + i) as v from values (array(32, 97)) as t(ys);
set spark.sql.ansi.enabled=false;
43 changes: 43 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/interval.sql
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,46 @@ select justify_interval(interval '1 month -59 day 25 hour');
select justify_days(interval '1 month 59 day -25 hour');
select justify_hours(interval '1 month 59 day -25 hour');
select justify_interval(interval '1 month 59 day -25 hour');

-- interval
select interval 13.123456789 seconds, interval -13.123456789 second;
select interval 1 year 2 month 3 week 4 day 5 hour 6 minute 7 seconds 8 millisecond 9 microsecond;
select interval '30' year '25' month '-100' day '40' hour '80' minute '299.889987299' second;
select interval '0 0:0:0.1' day to second;
select interval '10-9' year to month;
select interval '20 15:40:32.99899999' day to hour;
select interval '20 15:40:32.99899999' day to minute;
select interval '20 15:40:32.99899999' day to second;
select interval '15:40:32.99899999' hour to minute;
select interval '15:40.99899999' hour to second;
select interval '15:40' hour to second;
select interval '15:40:32.99899999' hour to second;
select interval '20 40:32.99899999' minute to second;
select interval '40:32.99899999' minute to second;
select interval '40:32' minute to second;

-- ns is not supported
select interval 10 nanoseconds;

-- map + interval test
select map(1, interval 1 day, 2, interval 3 week);

-- typed interval expression
select interval 'interval 3 year 1 hour';
select interval '3 year 1 hour';

-- malformed interval literal
select interval;
select interval 1 fake_unit;
select interval 1 year to month;
select interval '1' year to second;
select interval '10-9' year to month '2-1' year to month;
select interval '10-9' year to month '12:11:10' hour to second;
select interval '1 15:11' day to minute '12:11:10' hour to second;
select interval 1 year '2-1' year to month;
select interval 1 year '12:11:10' hour to second;
select interval '10-9' year to month '1' year;
select interval '12:11:10' hour to second '1' year;

-- awareness of the positive sign before interval type
select +interval '1 second';
53 changes: 1 addition & 52 deletions sql/core/src/test/resources/sql-tests/inputs/literals.sql
Original file line number Diff line number Diff line change
Expand Up @@ -82,25 +82,6 @@ select tImEstAmp '2016-03-11 20:54:00.000';
-- invalid timestamp
select timestamp '2016-33-11 20:54:00.000';

-- interval
select interval 13.123456789 seconds, interval -13.123456789 second;
select interval 1 year 2 month 3 week 4 day 5 hour 6 minute 7 seconds 8 millisecond 9 microsecond;
select interval '30' year '25' month '-100' day '40' hour '80' minute '299.889987299' second;
select interval '0 0:0:0.1' day to second;
select interval '10-9' year to month;
select interval '20 15:40:32.99899999' day to hour;
select interval '20 15:40:32.99899999' day to minute;
select interval '20 15:40:32.99899999' day to second;
select interval '15:40:32.99899999' hour to minute;
select interval '15:40.99899999' hour to second;
select interval '15:40' hour to second;
select interval '15:40:32.99899999' hour to second;
select interval '20 40:32.99899999' minute to second;
select interval '40:32.99899999' minute to second;
select interval '40:32' minute to second;
-- ns is not supported
select interval 10 nanoseconds;

-- unsupported data type
select GEO '(10,-6)';

Expand All @@ -119,46 +100,14 @@ select X'XuZ';
-- Hive literal_double test.
SELECT 3.14, -3.14, 3.14e8, 3.14e-8, -3.14e8, -3.14e-8, 3.14e+8, 3.14E8, 3.14E-8;

-- map + interval test
select map(1, interval 1 day, 2, interval 3 week);

-- typed interval expression
select interval 'interval 3 year 1 hour';
select interval '3 year 1 hour';

-- typed integer expression
select integer '7';
select integer'7';
select integer '2147483648';

-- malformed interval literal
select interval;
select interval 1 fake_unit;
select interval 1 year to month;
select interval '1' year to second;
select interval '10-9' year to month '2-1' year to month;
select interval '10-9' year to month '12:11:10' hour to second;
select interval '1 15:11' day to minute '12:11:10' hour to second;
select interval 1 year '2-1' year to month;
select interval 1 year '12:11:10' hour to second;
select interval '10-9' year to month '1' year;
select interval '12:11:10' hour to second '1' year;
-- malformed interval literal with ansi mode
SET spark.sql.ansi.enabled=true;
select interval;
select interval 1 fake_unit;
select interval 1 year to month;
select 1 year to month;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should move some tests to ansi/interval.sql. They are not duplicated tests, they test without the leading "interval".

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I see. I'll recheck later. Thanks!

select interval '1' year to second;
select '1' year to second;
select interval 1 year '2-1' year to month;
select 1 year '2-1' year to month;
SET spark.sql.ansi.enabled=false;

-- awareness of the negative sign before type
-- awareness of the negative/positive sign before type
select -integer '7';
select -date '1999-01-01';
select -timestamp '1999-01-01';
select -x'2379ACFe';
select +integer '7';
select +interval '1 second';
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 16


-- !query 0
create table decimals_test(id int, a decimal(38,18), b decimal(38,18)) using parquet
-- !query 0 schema
struct<>
-- !query 0 output



-- !query 1
insert into decimals_test values(1, 100.0, 999.0), (2, 12345.123, 12345.123),
(3, 0.1234567891011, 1234.1), (4, 123456789123456789.0, 1.123456789123456789)
-- !query 1 schema
struct<>
-- !query 1 output



-- !query 2
select id, a*10, b/10 from decimals_test order by id
-- !query 2 schema
struct<id:int,(CAST(a AS DECIMAL(38,18)) * CAST(CAST(10 AS DECIMAL(2,0)) AS DECIMAL(38,18))):decimal(38,15),(CAST(b AS DECIMAL(38,18)) / CAST(CAST(10 AS DECIMAL(2,0)) AS DECIMAL(38,18))):decimal(38,18)>
-- !query 2 output
1 1000 99.9
2 123451.23 1234.5123
3 1.234567891011 123.41
4 1234567891234567890 0.112345678912345679


-- !query 3
select 10.3 * 3.0
-- !query 3 schema
struct<(CAST(10.3 AS DECIMAL(3,1)) * CAST(3.0 AS DECIMAL(3,1))):decimal(6,2)>
-- !query 3 output
30.9


-- !query 4
select 10.3000 * 3.0
-- !query 4 schema
struct<(CAST(10.3000 AS DECIMAL(6,4)) * CAST(3.0 AS DECIMAL(6,4))):decimal(9,5)>
-- !query 4 output
30.9


-- !query 5
select 10.30000 * 30.0
-- !query 5 schema
struct<(CAST(10.30000 AS DECIMAL(7,5)) * CAST(30.0 AS DECIMAL(7,5))):decimal(11,6)>
-- !query 5 output
309


-- !query 6
select 10.300000000000000000 * 3.000000000000000000
-- !query 6 schema
struct<(CAST(10.300000000000000000 AS DECIMAL(20,18)) * CAST(3.000000000000000000 AS DECIMAL(20,18))):decimal(38,34)>
-- !query 6 output
30.9


-- !query 7
select 10.300000000000000000 * 3.0000000000000000000
-- !query 7 schema
struct<(CAST(10.300000000000000000 AS DECIMAL(21,19)) * CAST(3.0000000000000000000 AS DECIMAL(21,19))):decimal(38,34)>
-- !query 7 output
30.9


-- !query 8
select (5e36 + 0.1) + 5e36
-- !query 8 schema
struct<>
-- !query 8 output
java.lang.ArithmeticException
Decimal(expanded,10000000000000000000000000000000000000.1,39,1}) cannot be represented as Decimal(38, 1).


-- !query 9
select (-4e36 - 0.1) - 7e36
-- !query 9 schema
struct<>
-- !query 9 output
java.lang.ArithmeticException
Decimal(expanded,-11000000000000000000000000000000000000.1,39,1}) cannot be represented as Decimal(38, 1).


-- !query 10
select 12345678901234567890.0 * 12345678901234567890.0
-- !query 10 schema
struct<>
-- !query 10 output
java.lang.ArithmeticException
Decimal(expanded,1.5241578753238836750190519987501905210E+38,38,-1}) cannot be represented as Decimal(38, 2).


-- !query 11
select 1e35 / 0.1
-- !query 11 schema
struct<>
-- !query 11 output
java.lang.ArithmeticException
Decimal(expanded,1000000000000000000000000000000000000,37,0}) cannot be represented as Decimal(38, 6).


-- !query 12
select 123456789123456789.1234567890 * 1.123456789123456789
-- !query 12 schema
struct<(CAST(123456789123456789.1234567890 AS DECIMAL(36,18)) * CAST(1.123456789123456789 AS DECIMAL(36,18))):decimal(38,18)>
-- !query 12 output
138698367904130467.654320988515622621


-- !query 13
select 123456789123456789.1234567890 * 1.123456789123456789
-- !query 13 schema
struct<(CAST(123456789123456789.1234567890 AS DECIMAL(36,18)) * CAST(1.123456789123456789 AS DECIMAL(36,18))):decimal(38,18)>
-- !query 13 output
138698367904130467.654320988515622621


-- !query 14
select 12345678912345.123456789123 / 0.000000012345678
-- !query 14 schema
struct<(CAST(12345678912345.123456789123 AS DECIMAL(29,15)) / CAST(1.2345678E-8 AS DECIMAL(29,15))):decimal(38,9)>
-- !query 14 output
1000000073899961059796.725866332


-- !query 15
drop table decimals_test
-- !query 15 schema
struct<>
-- !query 15 output

Loading