Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion R/pkg/inst/tests/testthat/test_sparkSQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -1781,7 +1781,7 @@ test_that("Method coltypes() to get and set R's data types of a DataFrame", {
expect_equal(coltypes(x), "map<string,string>")

df <- selectExpr(read.json(sqlContext, jsonPath), "name", "(age * 1.21) as age")
expect_equal(dtypes(df), list(c("name", "string"), c("age", "double")))
expect_equal(dtypes(df), list(c("name", "string"), c("age", "decimal(24,2)")))

df1 <- select(df, cast(df$age, "integer"))
coltypes(df) <- c("character", "integer")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ constant
| BigintLiteral
| SmallintLiteral
| TinyintLiteral
| DecimalLiteral
| DoubleLiteral
| booleanValue
;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -418,9 +418,9 @@ TinyintLiteral
(Digit)+ 'Y'
;

DecimalLiteral
DoubleLiteral
:
Number 'B' 'D'
Number 'D'
;

ByteLengthLiteral
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -623,6 +623,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
val CASE = "(?i)CASE".r

val INTEGRAL = "[+-]?\\d+".r
val DECIMAL = "[+-]?((\\d+(\\.\\d*)?)|(\\.\\d+))".r
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is \\d+(\\.\\d*) correct or \\d+(\\.\\d+) ?
Do we allow 123. parsed as decimal?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We currently do. See this lexer rule:

Number
    :
    ((Digit+ (DOT Digit*)?) | (DOT Digit+)) Exponent?
    ;


protected def nodeToExpr(node: ASTNode): Expression = node match {
/* Attribute References */
Expand Down Expand Up @@ -785,8 +786,8 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
case ast if ast.tokenType == SparkSqlParser.BigintLiteral =>
Literal.create(ast.text.substring(0, ast.text.length() - 1).toLong, LongType)

case ast if ast.tokenType == SparkSqlParser.DecimalLiteral =>
Literal(Decimal(ast.text.substring(0, ast.text.length() - 2)))
case ast if ast.tokenType == SparkSqlParser.DoubleLiteral =>
Literal(ast.text.toDouble)

case ast if ast.tokenType == SparkSqlParser.Number =>
val text = ast.text
Expand All @@ -799,7 +800,10 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
Literal(v.longValue())
case v => Literal(v.underlying())
}
case DECIMAL(_*) =>
Literal(BigDecimal(text).underlying())
case _ =>
// Convert a scientifically notated decimal into a double.
Literal(text.toDouble)
}
case ast if ast.tokenType == SparkSqlParser.StringLiteral =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -661,12 +661,11 @@ object HiveTypeCoercion {
case e if !e.childrenResolved => e
// Find tightest common type for If, if the true value and false value have different types.
case i @ If(pred, left, right) if left.dataType != right.dataType =>
findTightestCommonTypeToString(left.dataType, right.dataType).map { widestType =>
findWiderTypeForTwo(left.dataType, right.dataType).map { widestType =>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just curious why we need to change this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This failed on the HiveCompatibilitySuite.udf_if test on the following query: SELECT IF(FALSE, 1, 1.1). It would not cast the trueCase to a decimal. CASE WHEN... ELSE did work and I used similar code for `if``.

val newLeft = if (left.dataType == widestType) left else Cast(left, widestType)
val newRight = if (right.dataType == widestType) right else Cast(right, widestType)
If(pred, newLeft, newRight)
}.getOrElse(i) // If there is no applicable conversion, leave expression unchanged.

// Convert If(null literal, _, _) into boolean type.
// In the optimizer, we should short-circuit this directly into false value.
case If(pred, left, right) if pred.dataType == NullType =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ class MathExpressionsSuite extends QueryTest with SharedSQLContext {
Seq(Row(5, 10, 0), Row(55, 60, 100), Row(555, 560, 600))
)

val pi = "3.1415BD"
val pi = "3.1415"
checkAnswer(
sql(s"SELECT round($pi, -3), round($pi, -2), round($pi, -1), " +
s"round($pi, 0), round($pi, 1), round($pi, 2), round($pi, 3)"),
Expand Down Expand Up @@ -367,6 +367,16 @@ class MathExpressionsSuite extends QueryTest with SharedSQLContext {
checkAnswer(
input.toDF("key", "value").selectExpr("abs(key) a").sort("a"),
input.map(pair => Row(pair._2)))

checkAnswer(
sql("select abs(0), abs(-1), abs(123), abs(-9223372036854775807), abs(9223372036854775807)"),
Row(0, 1, 123, 9223372036854775807L, 9223372036854775807L)
)

checkAnswer(
sql("select abs(0.0), abs(-3.14159265), abs(3.14159265)"),
Row(BigDecimal("0.0"), BigDecimal("3.14159265"), BigDecimal("3.14159265"))
)
}

test("log2") {
Expand Down
50 changes: 25 additions & 25 deletions sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1173,19 +1173,19 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {

test("Floating point number format") {
checkAnswer(
sql("SELECT 0.3"), Row(0.3)
sql("SELECT 0.3"), Row(BigDecimal(0.3))
)

checkAnswer(
sql("SELECT -0.8"), Row(-0.8)
sql("SELECT -0.8"), Row(BigDecimal(-0.8))
)

checkAnswer(
sql("SELECT .5"), Row(0.5)
sql("SELECT .5"), Row(BigDecimal(0.5))
)

checkAnswer(
sql("SELECT -.18"), Row(-0.18)
sql("SELECT -.18"), Row(BigDecimal(-0.18))
)
}

Expand All @@ -1199,11 +1199,11 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
)

checkAnswer(
sql("SELECT 9223372036854775808BD"), Row(new java.math.BigDecimal("9223372036854775808"))
sql("SELECT 9223372036854775808"), Row(new java.math.BigDecimal("9223372036854775808"))
)

checkAnswer(
sql("SELECT -9223372036854775809BD"), Row(new java.math.BigDecimal("-9223372036854775809"))
sql("SELECT -9223372036854775809"), Row(new java.math.BigDecimal("-9223372036854775809"))
)
}

Expand All @@ -1218,11 +1218,11 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
)

checkAnswer(
sql("SELECT -5.2BD"), Row(BigDecimal(-5.2))
sql("SELECT -5.2"), Row(BigDecimal(-5.2))
)

checkAnswer(
sql("SELECT +6.8"), Row(6.8d)
sql("SELECT +6.8e0"), Row(6.8d)
)

checkAnswer(
Expand Down Expand Up @@ -1597,20 +1597,20 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
}

test("decimal precision with multiply/division") {
checkAnswer(sql("select 10.3BD * 3.0BD"), Row(BigDecimal("30.90")))
checkAnswer(sql("select 10.3000BD * 3.0BD"), Row(BigDecimal("30.90000")))
checkAnswer(sql("select 10.30000BD * 30.0BD"), Row(BigDecimal("309.000000")))
checkAnswer(sql("select 10.300000000000000000BD * 3.000000000000000000BD"),
checkAnswer(sql("select 10.3 * 3.0"), Row(BigDecimal("30.90")))
checkAnswer(sql("select 10.3000 * 3.0"), Row(BigDecimal("30.90000")))
checkAnswer(sql("select 10.30000 * 30.0"), Row(BigDecimal("309.000000")))
checkAnswer(sql("select 10.300000000000000000 * 3.000000000000000000"),
Row(BigDecimal("30.900000000000000000000000000000000000", new MathContext(38))))
checkAnswer(sql("select 10.300000000000000000BD * 3.0000000000000000000BD"),
checkAnswer(sql("select 10.300000000000000000 * 3.0000000000000000000"),
Row(null))

checkAnswer(sql("select 10.3BD / 3.0BD"), Row(BigDecimal("3.433333")))
checkAnswer(sql("select 10.3000BD / 3.0BD"), Row(BigDecimal("3.4333333")))
checkAnswer(sql("select 10.30000BD / 30.0BD"), Row(BigDecimal("0.343333333")))
checkAnswer(sql("select 10.300000000000000000BD / 3.00000000000000000BD"),
checkAnswer(sql("select 10.3 / 3.0"), Row(BigDecimal("3.433333")))
checkAnswer(sql("select 10.3000 / 3.0"), Row(BigDecimal("3.4333333")))
checkAnswer(sql("select 10.30000 / 30.0"), Row(BigDecimal("0.343333333")))
checkAnswer(sql("select 10.300000000000000000 / 3.00000000000000000"),
Row(BigDecimal("3.433333333333333333333333333", new MathContext(38))))
checkAnswer(sql("select 10.3000000000000000000BD / 3.00000000000000000BD"),
checkAnswer(sql("select 10.3000000000000000000 / 3.00000000000000000"),
Row(BigDecimal("3.4333333333333333333333333333", new MathContext(38))))
}

Expand All @@ -1636,13 +1636,13 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
}

test("precision smaller than scale") {
checkAnswer(sql("select 10.00BD"), Row(BigDecimal("10.00")))
checkAnswer(sql("select 1.00BD"), Row(BigDecimal("1.00")))
checkAnswer(sql("select 0.10BD"), Row(BigDecimal("0.10")))
checkAnswer(sql("select 0.01BD"), Row(BigDecimal("0.01")))
checkAnswer(sql("select 0.001BD"), Row(BigDecimal("0.001")))
checkAnswer(sql("select -0.01BD"), Row(BigDecimal("-0.01")))
checkAnswer(sql("select -0.001BD"), Row(BigDecimal("-0.001")))
checkAnswer(sql("select 10.00"), Row(BigDecimal("10.00")))
checkAnswer(sql("select 1.00"), Row(BigDecimal("1.00")))
checkAnswer(sql("select 0.10"), Row(BigDecimal("0.10")))
checkAnswer(sql("select 0.01"), Row(BigDecimal("0.01")))
checkAnswer(sql("select 0.001"), Row(BigDecimal("0.001")))
checkAnswer(sql("select -0.01"), Row(BigDecimal("-0.01")))
checkAnswer(sql("select -0.001"), Row(BigDecimal("-0.001")))
}

test("external sorting updates peak execution memory") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -442,13 +442,13 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {

// Number and String conflict: resolve the type as number in this query.
checkAnswer(
sql("select num_str + 1.2BD from jsonTable where num_str > 14"),
sql("select num_str + 1.2 from jsonTable where num_str > 14"),
Row(BigDecimal("92233720368547758071.2"))
)

// Number and String conflict: resolve the type as number in this query.
checkAnswer(
sql("select num_str + 1.2BD from jsonTable where num_str >= 92233720368547758060BD"),
sql("select num_str + 1.2 from jsonTable where num_str >= 92233720368547758060"),
Row(new java.math.BigDecimal("92233720368547758071.2"))
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,14 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
// Feature removed in HIVE-11145
"alter_partition_protect_mode",
"drop_partitions_ignore_protection",
"protectmode"
"protectmode",

// Spark parser treats numerical literals differently: it creates decimals instead of doubles.
"udf_abs",
"udf_format_number",
"udf_round",
"udf_round_3",
"view_cast"
)

/**
Expand Down Expand Up @@ -884,7 +891,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"udf_10_trims",
"udf_E",
"udf_PI",
"udf_abs",
"udf_acos",
"udf_add",
"udf_array",
Expand Down Expand Up @@ -928,7 +934,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"udf_find_in_set",
"udf_float",
"udf_floor",
"udf_format_number",
"udf_from_unixtime",
"udf_greaterthan",
"udf_greaterthanorequal",
Expand Down Expand Up @@ -976,8 +981,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"udf_regexp_replace",
"udf_repeat",
"udf_rlike",
"udf_round",
"udf_round_3",
"udf_rpad",
"udf_rtrim",
"udf_sign",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -559,7 +559,7 @@ class HiveWindowFunctionQuerySuite extends HiveComparisonTest with BeforeAndAfte
"""
|select p_mfgr,p_name, p_size,
|histogram_numeric(p_retailprice, 5) over w1 as hist,
|percentile(p_partkey, 0.5) over w1 as per,
|percentile(p_partkey, cast(0.5 as double)) over w1 as per,
|row_number() over(distribute by p_mfgr sort by p_name) as rn
|from part
|window w1 as (distribute by p_mfgr sort by p_mfgr, p_name
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,26 @@ import org.apache.spark.sql.hive.test.TestHive
* A set of tests that validate type promotion and coercion rules.
*/
class HiveTypeCoercionSuite extends HiveComparisonTest {
val baseTypes = Seq("1", "1.0", "1L", "1S", "1Y", "'1'")
val baseTypes = Seq(
("1", "1"),
("1.0", "CAST(1.0 AS DOUBLE)"),
("1L", "1L"),
("1S", "1S"),
("1Y", "1Y"),
("'1'", "'1'"))

baseTypes.foreach { i =>
baseTypes.foreach { j =>
createQueryTest(s"$i + $j", s"SELECT $i + $j FROM src LIMIT 1")
baseTypes.foreach { case (ni, si) =>
baseTypes.foreach { case (nj, sj) =>
createQueryTest(s"$ni + $nj", s"SELECT $si + $sj FROM src LIMIT 1")
}
}

val nullVal = "null"
baseTypes.init.foreach { i =>
baseTypes.init.foreach { case (i, s) =>
createQueryTest(s"case when then $i else $nullVal end ",
s"SELECT case when true then $i else $nullVal end FROM src limit 1")
s"SELECT case when true then $s else $nullVal end FROM src limit 1")
createQueryTest(s"case when then $nullVal else $i end ",
s"SELECT case when true then $nullVal else $i end FROM src limit 1")
s"SELECT case when true then $nullVal else $s end FROM src limit 1")
}

test("[SPARK-2210] boolean cast on boolean value should be removed") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,10 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
}

test("Generic UDAF aggregates") {
checkAnswer(sql("SELECT ceiling(percentile_approx(key, 0.99999)) FROM src LIMIT 1"),
checkAnswer(sql("SELECT ceiling(percentile_approx(key, 0.99999D)) FROM src LIMIT 1"),
sql("SELECT max(key) FROM src LIMIT 1").collect().toSeq)

checkAnswer(sql("SELECT percentile_approx(100.0, array(0.9, 0.9)) FROM src LIMIT 1"),
checkAnswer(sql("SELECT percentile_approx(100.0D, array(0.9D, 0.9D)) FROM src LIMIT 1"),
sql("SELECT array(100, 100) FROM src LIMIT 1").collect().toSeq)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1012,9 +1012,9 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
| java_method("java.lang.String", "isEmpty"),
| java_method("java.lang.Math", "max", 2, 3),
| java_method("java.lang.Math", "min", 2, 3),
| java_method("java.lang.Math", "round", 2.5),
| java_method("java.lang.Math", "exp", 1.0),
| java_method("java.lang.Math", "floor", 1.9)
| java_method("java.lang.Math", "round", 2.5D),
| java_method("java.lang.Math", "exp", 1.0D),
| java_method("java.lang.Math", "floor", 1.9D)
|FROM src tablesample (1 rows)
""".stripMargin),
Row(
Expand Down Expand Up @@ -1461,6 +1461,6 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
"""
|SELECT json_tuple(json, 'f1', 'f2'), 3.14, str
|FROM (SELECT '{"f1": "value1", "f2": 12}' json, 'hello' as str) test
""".stripMargin), Row("value1", "12", 3.14, "hello"))
""".stripMargin), Row("value1", "12", BigDecimal("3.14"), "hello"))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.spark.sql.sources

import java.io.File
import java.net.URI

import org.apache.spark.sql.{AnalysisException, QueryTest}
import org.apache.spark.sql.catalyst.expressions.UnsafeProjection
Expand Down Expand Up @@ -65,6 +66,11 @@ class BucketedWriteSuite extends QueryTest with SQLTestUtils with TestHiveSingle

private val df = (0 until 50).map(i => (i % 5, i % 13, i.toString)).toDF("i", "j", "k")

def tableDir: File = {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@cloud-fan could you take a look at this?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks reasonable, just curious about why the previous one not working after your PR....

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have no idea. Nothing in the PR seems to touch this - it seems a bit random. The only think I am doing here is to make absolutely sure the paths are the same; the difference in paths is caused by the use of the hive current database in the path name.

val identifier = hiveContext.sqlParser.parseTableIdentifier("bucketed_table")
new File(URI.create(hiveContext.catalog.hiveDefaultTableFilePath(identifier)))
}

/**
* A helper method to check the bucket write functionality in low level, i.e. check the written
* bucket files to see if the data are correct. User should pass in a data dir that these bucket
Expand Down Expand Up @@ -127,7 +133,6 @@ class BucketedWriteSuite extends QueryTest with SQLTestUtils with TestHiveSingle
.bucketBy(8, "j", "k")
.saveAsTable("bucketed_table")

val tableDir = new File(hiveContext.warehousePath, "bucketed_table")
for (i <- 0 until 5) {
testBucketing(new File(tableDir, s"i=$i"), source, 8, Seq("j", "k"))
}
Expand All @@ -145,7 +150,6 @@ class BucketedWriteSuite extends QueryTest with SQLTestUtils with TestHiveSingle
.sortBy("k")
.saveAsTable("bucketed_table")

val tableDir = new File(hiveContext.warehousePath, "bucketed_table")
for (i <- 0 until 5) {
testBucketing(new File(tableDir, s"i=$i"), source, 8, Seq("j"), Seq("k"))
}
Expand All @@ -161,7 +165,6 @@ class BucketedWriteSuite extends QueryTest with SQLTestUtils with TestHiveSingle
.bucketBy(8, "i", "j")
.saveAsTable("bucketed_table")

val tableDir = new File(hiveContext.warehousePath, "bucketed_table")
testBucketing(tableDir, source, 8, Seq("i", "j"))
}
}
Expand All @@ -176,7 +179,6 @@ class BucketedWriteSuite extends QueryTest with SQLTestUtils with TestHiveSingle
.sortBy("k")
.saveAsTable("bucketed_table")

val tableDir = new File(hiveContext.warehousePath, "bucketed_table")
testBucketing(tableDir, source, 8, Seq("i", "j"), Seq("k"))
}
}
Expand Down