-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-8223][SPARK-8224][SQL] shift left and shift right #7178
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
ac7fe9d
44ee324
9434a28
5189690
3b56f2a
f628706
f3f64e6
8023bb5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -351,6 +351,108 @@ case class Pow(left: Expression, right: Expression) | |
| } | ||
| } | ||
|
|
||
| case class ShiftLeft(left: Expression, right: Expression) extends BinaryExpression { | ||
|
|
||
| override def checkInputDataTypes(): TypeCheckResult = { | ||
| (left.dataType, right.dataType) match { | ||
| case (NullType, _) | (_, NullType) => return TypeCheckResult.TypeCheckSuccess | ||
| case (_, IntegerType) => left.dataType match { | ||
| case LongType | IntegerType | ShortType | ByteType => | ||
| return TypeCheckResult.TypeCheckSuccess | ||
| case _ => // failed | ||
| } | ||
| case _ => // failed | ||
| } | ||
| TypeCheckResult.TypeCheckFailure( | ||
| s"ShiftLeft expects long, integer, short or byte value as first argument and an " + | ||
| s"integer value as second argument, not (${left.dataType}, ${right.dataType})") | ||
| } | ||
|
|
||
| override def eval(input: InternalRow): Any = { | ||
| val valueLeft = left.eval(input) | ||
| if (valueLeft != null) { | ||
| val valueRight = right.eval(input) | ||
| if (valueRight != null) { | ||
| valueLeft match { | ||
| case l: Long => l << valueRight.asInstanceOf[Integer] | ||
| case i: Integer => i << valueRight.asInstanceOf[Integer] | ||
| case s: Short => s << valueRight.asInstanceOf[Integer] | ||
| case b: Byte => b << valueRight.asInstanceOf[Integer] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we handle overflow?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do you mean by overflow?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, thanks! |
||
| } | ||
| } else { | ||
| null | ||
| } | ||
| } else { | ||
| null | ||
| } | ||
| } | ||
|
|
||
| override def dataType: DataType = { | ||
| left.dataType match { | ||
| case LongType => LongType | ||
| case IntegerType | ShortType | ByteType => IntegerType | ||
| case _ => NullType | ||
| } | ||
| } | ||
|
|
||
| override protected def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { | ||
| nullSafeCodeGen(ctx, ev, (result, left, right) => s"$result = $left << $right;") | ||
| } | ||
|
|
||
| override def toString: String = s"ShiftLeft($left, $right)" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we use prettyName instead of toString?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. actually you don't need a toString here. just remove it. |
||
| } | ||
|
|
||
| case class ShiftRight(left: Expression, right: Expression) extends BinaryExpression { | ||
|
|
||
| override def checkInputDataTypes(): TypeCheckResult = { | ||
| (left.dataType, right.dataType) match { | ||
| case (NullType, _) | (_, NullType) => return TypeCheckResult.TypeCheckSuccess | ||
| case (_, IntegerType) => left.dataType match { | ||
| case LongType | IntegerType | ShortType | ByteType => | ||
| return TypeCheckResult.TypeCheckSuccess | ||
| case _ => // failed | ||
| } | ||
| case _ => // failed | ||
| } | ||
| TypeCheckResult.TypeCheckFailure( | ||
| s"ShiftRight expects long, integer, short or byte value as first argument and an " + | ||
| s"integer value as second argument, not (${left.dataType}, ${right.dataType})") | ||
| } | ||
|
|
||
| override def eval(input: InternalRow): Any = { | ||
| val valueLeft = left.eval(input) | ||
| if (valueLeft != null) { | ||
| val valueRight = right.eval(input) | ||
| if (valueRight != null) { | ||
| valueLeft match { | ||
| case l: Long => l >> valueRight.asInstanceOf[Integer] | ||
| case i: Integer => i >> valueRight.asInstanceOf[Integer] | ||
| case s: Short => s >> valueRight.asInstanceOf[Integer] | ||
| case b: Byte => b >> valueRight.asInstanceOf[Integer] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we use
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is a special Jira ticket for that: https://issues.apache.org/jira/browse/SPARK-8226. Someone created already a PR.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not pattern matching the data type? instead of the value? Will that cause extra box/unbox for primtives?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That might be a good hint. I am going to take a look on the generated code and will come back to this and create maybe a follow-up.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @chenghao-intel I investigated it a little bit, see the gist: https://gist.github.com/tarekauel/6994983b83a51668c5dc . The interesting part is that the match on the value is even faster, did I something wrong? |
||
| } | ||
| } else { | ||
| null | ||
| } | ||
| } else { | ||
| null | ||
| } | ||
| } | ||
|
|
||
| override def dataType: DataType = { | ||
| left.dataType match { | ||
| case LongType => LongType | ||
| case IntegerType | ShortType | ByteType => IntegerType | ||
| case _ => NullType | ||
| } | ||
| } | ||
|
|
||
| override protected def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { | ||
| nullSafeCodeGen(ctx, ev, (result, left, right) => s"$result = $left >> $right;") | ||
| } | ||
|
|
||
| override def toString: String = s"ShiftRight($left, $right)" | ||
| } | ||
|
|
||
| /** | ||
| * Performs the inverse operation of HEX. | ||
| * Resulting characters are returned as a byte array. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1298,6 +1298,44 @@ object functions { | |
| */ | ||
| def rint(columnName: String): Column = rint(Column(columnName)) | ||
|
|
||
| /** | ||
| * Shift the the given value numBits left. Returns int for tinyint, smallint and int and | ||
| * bigint for bigint a. | ||
| * | ||
| * @group math_funcs | ||
| * @since 1.5.0 | ||
| */ | ||
| def shiftLeft(e: Column, numBits: Integer): Column = ShiftLeft(e.expr, lit(numBits).expr) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Integer -> Int ?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If your comment is about the
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I mean we should use
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh, yes of course, sorry for that |
||
|
|
||
| /** | ||
| * Shift the the given value numBits left. Returns int for tinyint, smallint and int and | ||
| * bigint for bigint a. | ||
| * | ||
| * @group math_funcs | ||
| * @since 1.5.0 | ||
| */ | ||
| def shiftLeft(columnName: String, numBits: Integer): Column = | ||
| shiftLeft(Column(columnName), numBits) | ||
|
|
||
| /** | ||
| * Shift the the given value numBits right. Returns int for tinyint, smallint and int and | ||
| * bigint for bigint a. | ||
| * | ||
| * @group math_funcs | ||
| * @since 1.5.0 | ||
| */ | ||
| def shiftRight(e: Column, numBits: Integer): Column = ShiftRight(e.expr, lit(numBits).expr) | ||
|
|
||
| /** | ||
| * Shift the the given value numBits right. Returns int for tinyint, smallint and int and | ||
| * bigint for bigint a. | ||
| * | ||
| * @group math_funcs | ||
| * @since 1.5.0 | ||
| */ | ||
| def shiftRight(columnName: String, numBits: Integer): Column = | ||
| shiftRight(Column(columnName), numBits) | ||
|
|
||
| /** | ||
| * Computes the signum of the given value. | ||
| * | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Cannot understand the last sentence