Skip to content
Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public static long getPrefix(byte[] bytes) {
final int minLen = Math.min(bytes.length, 8);
long p = 0;
for (int i = 0; i < minLen; ++i) {
p |= (128L + Platform.getByte(bytes, Platform.BYTE_ARRAY_OFFSET + i))
p |= ((long) Platform.getByte(bytes, Platform.BYTE_ARRAY_OFFSET + i) & 0xff)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems we don't have unit test for this. Shall we add a ByteArraySuite?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have the unit test for this. I added some cases.

<< (56 - 8 * i);
}
return p;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,9 @@ class PrefixComparatorsSuite extends SparkFunSuite with PropertyChecks {

def compareBinary(x: Array[Byte], y: Array[Byte]): Int = {
for (i <- 0 until x.length; if i < y.length) {
val res = x(i).compare(y(i))
val v1 = x(i) & 0xff
val v2 = y(i) & 0xff
val res = v1 - v2
if (res != 0) return res
}
x.length - y.length
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,9 @@ object TypeUtils {

def compareBinary(x: Array[Byte], y: Array[Byte]): Int = {
for (i <- 0 until x.length; if i < y.length) {
val res = x(i).compareTo(y(i))
val v1 = x(i) & 0xff
val v2 = y(i) & 0xff
val res = v1 - v2
if (res != 0) return res
}
x.length - y.length
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,4 +137,26 @@ class OrderingSuite extends SparkFunSuite with ExpressionEvalHelper {
// verify that we can support up to 5000 ordering comparisons, which should be sufficient
GenerateOrdering.generate(Array.fill(5000)(sortOrder))
}

test("SPARK-21344: BinaryType comparison does signed byte array comparison") {
val data = Seq(
(Array[Byte](1), Array[Byte](-1)),
(Array[Byte](1, 1, 1, 1, 1), Array[Byte](1, 1, 1, 1, -1)),
(Array[Byte](1, 1, 1, 1, 1, 1, 1, 1, 1), Array[Byte](1, 1, 1, 1, 1, 1, 1, 1, -1))
)
data.foreach { case (b1, b2) =>
val rowOrdering = InterpretedOrdering.forSchema(Seq(BinaryType, BinaryType))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmmm, don't you just have one binary field for each row? Why you specify two fields to compare?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch, updated.

val genOrdering = GenerateOrdering.generate(
BoundReference(0, BinaryType, nullable = true).asc ::
BoundReference(1, BinaryType, nullable = true).asc :: Nil)
val rowType = StructType(
StructField("b1", BinaryType, nullable = true) ::
StructField("b2", BinaryType, nullable = true) :: Nil)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You specify two binary fields as row schema. But actually your input just has one binary field (i.e., Row(b1)).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I misunderstood, updated.

val toCatalyst = CatalystTypeConverters.createToCatalystConverter(rowType)
val rowB1 = toCatalyst(Row(b1)).asInstanceOf[InternalRow]
val rowB2 = toCatalyst(Row(b2)).asInstanceOf[InternalRow]
assert(rowOrdering.compare(rowB1, rowB2) < 0)
assert(genOrdering.compare(rowB1, rowB2) < 0)
}
}
}
3 changes: 3 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/comparator.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- binary type
select x'00' < x'0f';
select x'00' < x'ff';
18 changes: 18 additions & 0 deletions sql/core/src/test/resources/sql-tests/results/comparator.sql.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 2


-- !query 0
select x'00' < x'0f'
-- !query 0 schema
struct<(X'00' < X'0F'):boolean>
-- !query 0 output
true


-- !query 1
select x'00' < x'ff'
-- !query 1 schema
struct<(X'00' < X'FF'):boolean>
-- !query 1 output
true