Skip to content

Commit f4cc859

Browse files
committed
Merge remote-tracking branch 'origin/master' into unsafe-by-default
2 parents 963f567 + 27850af commit f4cc859

File tree

24 files changed

+1746
-141
lines changed

24 files changed

+1746
-141
lines changed

core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java

Lines changed: 3 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,7 @@
1717

1818
package org.apache.spark.util.collection.unsafe.sort;
1919

20-
import com.google.common.base.Charsets;
21-
import com.google.common.primitives.Longs;
22-
import com.google.common.primitives.UnsignedBytes;
20+
import com.google.common.primitives.UnsignedLongs;
2321

2422
import org.apache.spark.annotation.Private;
2523
import org.apache.spark.unsafe.types.UTF8String;
@@ -31,38 +29,16 @@ private PrefixComparators() {}
3129

3230
public static final StringPrefixComparator STRING = new StringPrefixComparator();
3331
public static final IntegralPrefixComparator INTEGRAL = new IntegralPrefixComparator();
34-
public static final FloatPrefixComparator FLOAT = new FloatPrefixComparator();
3532
public static final DoublePrefixComparator DOUBLE = new DoublePrefixComparator();
3633

3734
public static final class StringPrefixComparator extends PrefixComparator {
3835
@Override
3936
public int compare(long aPrefix, long bPrefix) {
40-
// TODO: can done more efficiently
41-
byte[] a = Longs.toByteArray(aPrefix);
42-
byte[] b = Longs.toByteArray(bPrefix);
43-
for (int i = 0; i < 8; i++) {
44-
int c = UnsignedBytes.compare(a[i], b[i]);
45-
if (c != 0) return c;
46-
}
47-
return 0;
48-
}
49-
50-
public long computePrefix(byte[] bytes) {
51-
if (bytes == null) {
52-
return 0L;
53-
} else {
54-
byte[] padded = new byte[8];
55-
System.arraycopy(bytes, 0, padded, 0, Math.min(bytes.length, 8));
56-
return Longs.fromByteArray(padded);
57-
}
58-
}
59-
60-
public long computePrefix(String value) {
61-
return value == null ? 0L : computePrefix(value.getBytes(Charsets.UTF_8));
37+
return UnsignedLongs.compare(aPrefix, bPrefix);
6238
}
6339

6440
public long computePrefix(UTF8String value) {
65-
return value == null ? 0L : computePrefix(value.getBytes());
41+
return value == null ? 0L : value.getPrefix();
6642
}
6743
}
6844

@@ -78,21 +54,6 @@ public int compare(long a, long b) {
7854
public final long NULL_PREFIX = Long.MIN_VALUE;
7955
}
8056

81-
public static final class FloatPrefixComparator extends PrefixComparator {
82-
@Override
83-
public int compare(long aPrefix, long bPrefix) {
84-
float a = Float.intBitsToFloat((int) aPrefix);
85-
float b = Float.intBitsToFloat((int) bPrefix);
86-
return Utils.nanSafeCompareFloats(a, b);
87-
}
88-
89-
public long computePrefix(float value) {
90-
return Float.floatToIntBits(value) & 0xffffffffL;
91-
}
92-
93-
public final long NULL_PREFIX = computePrefix(Float.NEGATIVE_INFINITY);
94-
}
95-
9657
public static final class DoublePrefixComparator extends PrefixComparator {
9758
@Override
9859
public int compare(long aPrefix, long bPrefix) {

core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,29 @@
1717

1818
package org.apache.spark.util.collection.unsafe.sort
1919

20+
import com.google.common.primitives.UnsignedBytes
2021
import org.scalatest.prop.PropertyChecks
21-
2222
import org.apache.spark.SparkFunSuite
23+
import org.apache.spark.unsafe.types.UTF8String
2324

2425
class PrefixComparatorsSuite extends SparkFunSuite with PropertyChecks {
2526

2627
test("String prefix comparator") {
2728

2829
def testPrefixComparison(s1: String, s2: String): Unit = {
29-
val s1Prefix = PrefixComparators.STRING.computePrefix(s1)
30-
val s2Prefix = PrefixComparators.STRING.computePrefix(s2)
30+
val utf8string1 = UTF8String.fromString(s1)
31+
val utf8string2 = UTF8String.fromString(s2)
32+
val s1Prefix = PrefixComparators.STRING.computePrefix(utf8string1)
33+
val s2Prefix = PrefixComparators.STRING.computePrefix(utf8string2)
3134
val prefixComparisonResult = PrefixComparators.STRING.compare(s1Prefix, s2Prefix)
35+
36+
val cmp = UnsignedBytes.lexicographicalComparator().compare(
37+
utf8string1.getBytes.take(8), utf8string2.getBytes.take(8))
38+
3239
assert(
33-
(prefixComparisonResult == 0) ||
34-
(prefixComparisonResult < 0 && s1 < s2) ||
35-
(prefixComparisonResult > 0 && s1 > s2))
40+
(prefixComparisonResult == 0 && cmp == 0) ||
41+
(prefixComparisonResult < 0 && s1.compareTo(s2) < 0) ||
42+
(prefixComparisonResult > 0 && s1.compareTo(s2) > 0))
3643
}
3744

3845
// scalastyle:off
@@ -48,18 +55,6 @@ class PrefixComparatorsSuite extends SparkFunSuite with PropertyChecks {
4855
forAll { (s1: String, s2: String) => testPrefixComparison(s1, s2) }
4956
}
5057

51-
test("float prefix comparator handles NaN properly") {
52-
val nan1: Float = java.lang.Float.intBitsToFloat(0x7f800001)
53-
val nan2: Float = java.lang.Float.intBitsToFloat(0x7fffffff)
54-
assert(nan1.isNaN)
55-
assert(nan2.isNaN)
56-
val nan1Prefix = PrefixComparators.FLOAT.computePrefix(nan1)
57-
val nan2Prefix = PrefixComparators.FLOAT.computePrefix(nan2)
58-
assert(nan1Prefix === nan2Prefix)
59-
val floatMaxPrefix = PrefixComparators.FLOAT.computePrefix(Float.MaxValue)
60-
assert(PrefixComparators.FLOAT.compare(nan1Prefix, floatMaxPrefix) === 1)
61-
}
62-
6358
test("double prefix comparator handles NaNs properly") {
6459
val nan1: Double = java.lang.Double.longBitsToDouble(0x7ff0000000000001L)
6560
val nan2: Double = java.lang.Double.longBitsToDouble(0x7fffffffffffffffL)

0 commit comments

Comments
 (0)