Skip to content
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,7 @@ object FunctionRegistry {
expression[BitwiseCount]("bit_count"),
expression[BitAndAgg]("bit_and"),
expression[BitOrAgg]("bit_or"),
expression[BitXorAgg]("bit_xor"),

// json
expression[StructsToJson]("to_json"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.sql.catalyst.expressions.aggregate

import org.apache.spark.sql.catalyst.expressions.{AttributeReference, BitwiseAnd, BitwiseOr, ExpectsInputTypes, Expression, ExpressionDescription, If, IsNull, Literal}
import org.apache.spark.sql.catalyst.expressions.{AttributeReference, BitwiseAnd, BitwiseOr, BitwiseXor, ExpectsInputTypes, Expression, ExpressionDescription, If, IsNull, Literal}
import org.apache.spark.sql.types.{AbstractDataType, DataType, IntegralType}

@ExpressionDescription(
Expand Down Expand Up @@ -97,3 +97,42 @@ case class BitOrAgg(child: Expression) extends DeclarativeAggregate with Expects

override lazy val evaluateExpression: AttributeReference = bitOr
}

@ExpressionDescription(
usage = "_FUNC_(expr) - Returns the bitwise XOR of all non-null input values, or null if none.",
examples = """
Examples:
> SELECT _FUNC_(col) FROM VALUES (3), (5) AS tab(col);
6
""",
since = "3.0.0")
case class BitXorAgg(child: Expression) extends DeclarativeAggregate with ExpectsInputTypes {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BitAndAgg, BitOrAgg, and BitXorAgg has the similar logic, so can we share it by using a new trait (e.g., BitwiseOpLike)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sounds good, will update

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can make it much simpler like this;

abstract class BitAggregate extends DeclarativeAggregate with ExpectsInputTypes {

  ...
  override lazy val updateExpressions: Seq[Expression] =
    If(IsNull(bitAgg),
      child,
      If(IsNull(child), bitAgg, bitOp(bitAgg, child))) :: Nil

  override lazy val mergeExpressions: Seq[Expression] =
    If(IsNull(bitAgg.left),
      bitAgg.right,
      If(IsNull(bitAgg.right), bitAgg.left, bitOp(bitAgg.left, bitAgg.right))) :: Nil
}

case class BitAndAgg(child: Expression) extends BitAggregate {

  override def nodeName: String = "bit_and"
  override def bitOp(left: Expression, right: Expression): BinaryArithmetic =
    BitwiseAnd(left, right)
}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yea, much cooler, thanks


override def nodeName: String = "bit_xor"

override def children: Seq[Expression] = child :: Nil

override def nullable: Boolean = true

override def dataType: DataType = child.dataType

override def inputTypes: Seq[AbstractDataType] = Seq(IntegralType)

private lazy val bitXOr = AttributeReference("bit_xor", child.dataType)()

override lazy val aggBufferAttributes: Seq[AttributeReference] = bitXOr :: Nil

override lazy val initialValues: Seq[Literal] = Literal.create(null, dataType) :: Nil

override lazy val updateExpressions: Seq[Expression] =
If(IsNull(bitXOr),
child,
If(IsNull(child), bitXOr, BitwiseXor(bitXOr, child))) :: Nil

override lazy val mergeExpressions: Seq[Expression] =
If(IsNull(bitXOr.left),
bitXOr.right,
If(IsNull(bitXOr.right), bitXOr.left, BitwiseXor(bitXOr.left, bitXOr.right))) :: Nil

override lazy val evaluateExpression: AttributeReference = bitXOr
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,12 @@ CREATE OR REPLACE TEMPORARY VIEW bitwise_test AS SELECT * FROM VALUES
(7, 7, 7, 3L) AS bitwise_test(b1, b2, b3, b4);

-- empty case
SELECT BIT_AND(b1) AS n1, BIT_OR(b2) AS n2 FROM bitwise_test where 1 = 0;
SELECT BIT_AND(b1) AS n1, BIT_OR(b2) AS n2, BIT_XOR(b3) AS n3 FROM bitwise_test where 1 = 0;

-- null case
SELECT BIT_AND(b4) AS n1, BIT_OR(b4) AS n2 FROM bitwise_test where b4 is null;
SELECT BIT_AND(b4) AS n1, BIT_OR(b4) AS n2, BIT_XOR(b4) AS n3 FROM bitwise_test where b4 is null;

-- the suffix numbers show the expected answer
SELECT
BIT_AND(cast(b1 as tinyint)) AS a1,
BIT_AND(cast(b2 as smallint)) AS b1,
Expand All @@ -60,18 +61,24 @@ SELECT
BIT_OR(cast(b1 as tinyint)) AS e7,
BIT_OR(cast(b2 as smallint)) AS f7,
BIT_OR(b3) AS g7,
BIT_OR(b4) AS h3
BIT_OR(b4) AS h3,
BIT_XOR(cast(b1 as tinyint)) AS i5,
BIT_XOR(cast(b2 as smallint)) AS j5,
BIT_XOR(b3) AS k5,
BIT_XOR(b4) AS l2,
BIT_XOR(distinct b4) AS m2
FROM bitwise_test;

-- group by
SELECT b1 , bit_and(b2), bit_or(b4) FROM bitwise_test GROUP BY b1;
SELECT b1 , bit_and(b2), bit_or(b4), bit_xor(b3) FROM bitwise_test GROUP BY b1;

--having
SELECT b1, bit_and(b2) FROM bitwise_test GROUP BY b1 HAVING bit_and(b2) < 7;

-- window
SELECT b1, b2, bit_and(b2) OVER (PARTITION BY b1 ORDER BY b2) FROM bitwise_test;
SELECT b1, b2, bit_or(b2) OVER (PARTITION BY b1 ORDER BY b2) FROM bitwise_test;
SELECT b1, b2, bit_xor(b2) OVER (PARTITION BY b1 ORDER BY b2) FROM bitwise_test;

--
-- test boolean aggregates
Expand Down
Loading