1919
2020import com .google .common .base .Charsets ;
2121import com .google .common .primitives .Longs ;
22+ import com .google .common .primitives .UnsignedBytes ;
2223
2324import org .apache .spark .annotation .Private ;
2425import org .apache .spark .unsafe .types .UTF8String ;
@@ -35,36 +36,33 @@ private PrefixComparators() {}
3536 public static final class StringPrefixComparator extends PrefixComparator {
3637 @ Override
3738 public int compare (long aPrefix , long bPrefix ) {
38- // TODO: this can certainly be done more efficiently
39+ // TODO: can done more efficiently
3940 byte [] a = Longs .toByteArray (aPrefix );
4041 byte [] b = Longs .toByteArray (bPrefix );
4142 for (int i = 0 ; i < 8 ; i ++) {
42- if (a [i ] == b [i ]) continue ;
43- if (a [i ] > b [i ]) return -1 ;
44- else if (a [i ] < b [i ]) return 1 ;
43+ int c = UnsignedBytes .compare (a [i ], b [i ]);
44+ if (c != 0 ) return c ;
4545 }
4646 return 0 ;
4747 }
4848
49- public long computePrefix (UTF8String value ) {
50- // TODO: this can certainly be done more efficiently
51- return value == null ? 0L : computePrefix (value .toString ());
52- }
53-
54- public long computePrefix (String value ) {
55- // TODO: this can certainly be done more efficiently
56- if (value == null || value .length () == 0 ) {
49+ public long computePrefix (byte [] bytes ) {
50+ if (bytes == null ) {
5751 return 0L ;
5852 } else {
59- String first4Chars = value .substring (0 , Math .min (3 , value .length () - 1 ));
60- byte [] utf16Bytes = first4Chars .getBytes (Charsets .UTF_16 );
6153 byte [] padded = new byte [8 ];
62- if (utf16Bytes .length < 8 ) {
63- System .arraycopy (utf16Bytes , 0 , padded , 0 , utf16Bytes .length );
64- }
54+ System .arraycopy (bytes , 0 , padded , 0 , Math .min (bytes .length , 8 ));
6555 return Longs .fromByteArray (padded );
6656 }
6757 }
58+
59+ public long computePrefix (String value ) {
60+ return value == null ? 0L : computePrefix (value .getBytes (Charsets .UTF_8 ));
61+ }
62+
63+ public long computePrefix (UTF8String value ) {
64+ return value == null ? 0L : computePrefix (value .getBytes ());
65+ }
6866 }
6967
7068 /**
0 commit comments