Skip to content
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,34 @@ public UTF8String trim() {
return copyUTF8String(s, e);
}

/**
* Trims whitespaces (<= ASCII 32) from both ends of this string.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting, so SQL trim just removes space, and java.lang.String.trim() removes everything <= 32. Maybe we could refer to that in this doc, that this is the purpose of this additional trim method.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yea, let's mention it's the same as java.lang.String.trim

*
* Note that, this method is the same as java's {@link String#trim}, and different from
* {@link UTF8String#trim()} which only remove only spaces(= ASCII 32) from both ends.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: remove one "only"

*
* @return A UTF8String whose value is this UTF8String, with any leading and trailing white
* space removed, or this UTF8String if it has no leading or trailing whitespace.
*
*/
public UTF8String trimAll() {
int s = 0;
// skip all of the whitespaces (<=0x20) in the left side
while (s < this.numBytes && getByte(s) <= 0x20) s++;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shall we use ' ' instead of 0x20?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe also replace them in method trim? I don't know if whether ok or not I do so in this pr

if (s == this.numBytes) {
// Everything trimmed
return EMPTY_UTF8;
}
// skip all of the whitespaces (<=0x20) in the right side
int e = this.numBytes - 1;
while (e > s && getByte(e) <= 0x20) e--;
if (s == 0 && e == numBytes - 1) {
// Nothing trimmed
return this;
}
return copyUTF8String(s, e);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looking at the caller side, I think it's safe to not copy the data. We can add a caveat in the javadoc that: this method doesn't copy the data and the caller side should do copy themselves if they want to hold it for a while.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we not copy when it is an EMPTY_UTF8 either? A bit odd if we do it differently.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah that's a good point. OK let's leave it.

}

/**
* Trims instances of the given trim string from both ends of this string.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ public void substring() {
@Test
public void trims() {
assertEquals(fromString("1"), fromString("1").trim());
assertEquals(fromString("1"), fromString("1\t").trimAll());

assertEquals(fromString("hello"), fromString(" hello ").trim());
assertEquals(fromString("hello "), fromString(" hello ").trimLeft());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ case class PostgreCastToBoolean(child: Expression, timeZoneId: Option[String])
override def castToBoolean(from: DataType): Any => Any = from match {
case StringType =>
buildCast[UTF8String](_, str => {
val s = str.trim().toLowerCase()
val s = str.trimAll().toLowerCase()
if (StringUtils.isTrueString(s)) {
true
} else if (StringUtils.isFalseString(s)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ object DateTimeUtils {
val segments: Array[Int] = Array[Int](1, 1, 1, 0, 0, 0, 0, 0, 0)
var i = 0
var currentSegmentValue = 0
val bytes = s.trim.getBytes
val bytes = s.trimAll().getBytes
val specialTimestamp = convertSpecialTimestamp(bytes, timeZoneId)
if (specialTimestamp.isDefined) return specialTimestamp
var j = 0
Expand Down Expand Up @@ -372,7 +372,7 @@ object DateTimeUtils {
val segments: Array[Int] = Array[Int](1, 1, 1)
var i = 0
var currentSegmentValue = 0
val bytes = s.trim.getBytes
val bytes = s.trimAll().getBytes
val specialDate = convertSpecialDate(bytes, zoneId)
if (specialDate.isDefined) return specialDate
var j = 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -465,7 +465,7 @@ object IntervalUtils {
throwIAE("interval string cannot be null")
}
// scalastyle:off caselocale .toLowerCase
val s = input.trim.toLowerCase
val s = input.trimAll().toLowerCase
// scalastyle:on
val bytes = s.getBytes
if (bytes.isEmpty) {
Expand Down
3 changes: 3 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/datetime.sql
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,6 @@ select date '2001-10-01' - 7;
select date '2001-10-01' - date '2001-09-28';
select date'2020-01-01' - timestamp'2019-10-06 10:11:12.345678';
select timestamp'2019-10-06 10:11:12.345678' - date'2020-01-01';

select date '2019-01-01\t';
select timestamp '2019-01-01\t';
5 changes: 4 additions & 1 deletion sql/core/src/test/resources/sql-tests/inputs/interval.sql
Original file line number Diff line number Diff line change
Expand Up @@ -256,4 +256,7 @@ from interval_arithmetic;
select
interval '99 11:22:33.123456789' day to second + interval '10 9:8:7.123456789' day to second,
interval '99 11:22:33.123456789' day to second - interval '10 9:8:7.123456789' day to second
from interval_arithmetic;
from interval_arithmetic;

-- control characters as white spaces
select interval '\t interval 1 day';
80 changes: 44 additions & 36 deletions sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 130
-- Number of queries: 131


-- !query 0
Expand Down Expand Up @@ -1116,34 +1116,42 @@ struct<(INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds' + INTERVAL '10


-- !query 118
select 1 year 2 days
select interval '\t interval 1 day'
-- !query 118 schema
struct<INTERVAL '1 years 2 days':interval>
struct<INTERVAL '1 days':interval>
-- !query 118 output
1 years 2 days
1 days


-- !query 119
select '10-9' year to month
select 1 year 2 days
-- !query 119 schema
struct<INTERVAL '10 years 9 months':interval>
struct<INTERVAL '1 years 2 days':interval>
-- !query 119 output
10 years 9 months
1 years 2 days


-- !query 120
select '20 15:40:32.99899999' day to second
select '10-9' year to month
-- !query 120 schema
struct<INTERVAL '20 days 15 hours 40 minutes 32.998999 seconds':interval>
struct<INTERVAL '10 years 9 months':interval>
-- !query 120 output
20 days 15 hours 40 minutes 32.998999 seconds
10 years 9 months


-- !query 121
select 30 day day
select '20 15:40:32.99899999' day to second
-- !query 121 schema
struct<>
struct<INTERVAL '20 days 15 hours 40 minutes 32.998999 seconds':interval>
-- !query 121 output
20 days 15 hours 40 minutes 32.998999 seconds


-- !query 122
select 30 day day
-- !query 122 schema
struct<>
-- !query 122 output
org.apache.spark.sql.catalyst.parser.ParseException

no viable alternative at input 'day'(line 1, pos 14)
Expand All @@ -1153,27 +1161,27 @@ select 30 day day
--------------^^^


-- !query 122
-- !query 123
select date'2012-01-01' - '2-2' year to month
-- !query 122 schema
-- !query 123 schema
struct<CAST(CAST(DATE '2012-01-01' AS TIMESTAMP) - INTERVAL '2 years 2 months' AS DATE):date>
-- !query 122 output
-- !query 123 output
2009-11-01


-- !query 123
-- !query 124
select 1 month - 1 day
-- !query 123 schema
-- !query 124 schema
struct<INTERVAL '1 months -1 days':interval>
-- !query 123 output
-- !query 124 output
1 months -1 days


-- !query 124
-- !query 125
select 1 year to month
-- !query 124 schema
-- !query 125 schema
struct<>
-- !query 124 output
-- !query 125 output
org.apache.spark.sql.catalyst.parser.ParseException

The value of from-to unit must be a string(line 1, pos 7)
Expand All @@ -1183,11 +1191,11 @@ select 1 year to month
-------^^^


-- !query 125
-- !query 126
select '1' year to second
-- !query 125 schema
-- !query 126 schema
struct<>
-- !query 125 output
-- !query 126 output
org.apache.spark.sql.catalyst.parser.ParseException

Intervals FROM year TO second are not supported.(line 1, pos 7)
Expand All @@ -1197,11 +1205,11 @@ select '1' year to second
-------^^^


-- !query 126
-- !query 127
select 1 year '2-1' year to month
-- !query 126 schema
-- !query 127 schema
struct<>
-- !query 126 output
-- !query 127 output
org.apache.spark.sql.catalyst.parser.ParseException

Can only have a single from-to unit in the interval literal syntax(line 1, pos 14)
Expand All @@ -1211,11 +1219,11 @@ select 1 year '2-1' year to month
--------------^^^


-- !query 127
-- !query 128
select (-30) day
-- !query 127 schema
-- !query 128 schema
struct<>
-- !query 127 output
-- !query 128 output
org.apache.spark.sql.catalyst.parser.ParseException

no viable alternative at input 'day'(line 1, pos 13)
Expand All @@ -1225,11 +1233,11 @@ select (-30) day
-------------^^^


-- !query 128
-- !query 129
select (a + 1) day
-- !query 128 schema
-- !query 129 schema
struct<>
-- !query 128 output
-- !query 129 output
org.apache.spark.sql.catalyst.parser.ParseException

no viable alternative at input 'day'(line 1, pos 15)
Expand All @@ -1239,11 +1247,11 @@ select (a + 1) day
---------------^^^


-- !query 129
-- !query 130
select 30 day day day
-- !query 129 schema
-- !query 130 schema
struct<>
-- !query 129 output
-- !query 130 output
org.apache.spark.sql.catalyst.parser.ParseException

no viable alternative at input 'day'(line 1, pos 14)
Expand Down
18 changes: 17 additions & 1 deletion sql/core/src/test/resources/sql-tests/results/datetime.sql.out
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 17
-- Number of queries: 19


-- !query 0
Expand Down Expand Up @@ -145,3 +145,19 @@ select timestamp'2019-10-06 10:11:12.345678' - date'2020-01-01'
struct<subtracttimestamps(TIMESTAMP '2019-10-06 10:11:12.345678', CAST(DATE '2020-01-01' AS TIMESTAMP)):interval>
-- !query 16 output
-2078 hours -48 minutes -47.654322 seconds


-- !query 17
select date '2019-01-01\t'
-- !query 17 schema
struct<DATE '2019-01-01':date>
-- !query 17 output
2019-01-01


-- !query 18
select timestamp '2019-01-01\t'
-- !query 18 schema
struct<TIMESTAMP '2019-01-01 00:00:00':timestamp>
-- !query 18 output
2019-01-01 00:00:00
10 changes: 9 additions & 1 deletion sql/core/src/test/resources/sql-tests/results/interval.sql.out
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 118
-- Number of queries: 119


-- !query 0
Expand Down Expand Up @@ -1097,3 +1097,11 @@ from interval_arithmetic
struct<(INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds' + INTERVAL '10 days 9 hours 8 minutes 7.123456 seconds'):interval,(INTERVAL '99 days 11 hours 22 minutes 33.123456 seconds' - INTERVAL '10 days 9 hours 8 minutes 7.123456 seconds'):interval>
-- !query 117 output
109 days 20 hours 30 minutes 40.246912 seconds 89 days 2 hours 14 minutes 26 seconds


-- !query 118
select interval '\t interval 1 day'
-- !query 118 schema
struct<INTERVAL '1 days':interval>
-- !query 118 output
1 days