@@ -64,7 +64,8 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
6464 5 , 5 , 5 , 5 ,
6565 6 , 6 };
6666
67- private static boolean isLittleEndian = ByteOrder .nativeOrder () == ByteOrder .LITTLE_ENDIAN ;
67+ private static final boolean IS_LITTLE_ENDIAN =
68+ ByteOrder .nativeOrder () == ByteOrder .LITTLE_ENDIAN ;
6869
6970 private static final UTF8String COMMA_UTF8 = UTF8String .fromString ("," );
7071 public static final UTF8String EMPTY_UTF8 = UTF8String .fromString ("" );
@@ -220,7 +221,7 @@ public long getPrefix() {
220221 // After getting the data, we use a mask to mask out data that is not part of the string.
221222 long p ;
222223 long mask = 0 ;
223- if (isLittleEndian ) {
224+ if (IS_LITTLE_ENDIAN ) {
224225 if (numBytes >= 8 ) {
225226 p = Platform .getLong (base , offset );
226227 } else if (numBytes > 4 ) {
@@ -1097,10 +1098,23 @@ public UTF8String copy() {
10971098 @ Override
10981099 public int compareTo (@ Nonnull final UTF8String other ) {
10991100 int len = Math .min (numBytes , other .numBytes );
1100- // TODO: compare 8 bytes as unsigned long
1101- for (int i = 0 ; i < len ; i ++) {
1101+ int wordMax = (len / 8 ) * 8 ;
1102+ long roffset = other .offset ;
1103+ Object rbase = other .base ;
1104+ for (int i = 0 ; i < wordMax ; i += 8 ) {
1105+ long left = getLong (base , offset + i );
1106+ long right = getLong (rbase , roffset + i );
1107+ if (left != right ) {
1108+ if (IS_LITTLE_ENDIAN ) {
1109+ return Long .compareUnsigned (Long .reverseBytes (left ), Long .reverseBytes (right ));
1110+ } else {
1111+ return Long .compareUnsigned (left , right );
1112+ }
1113+ }
1114+ }
1115+ for (int i = wordMax ; i < len ; i ++) {
11021116 // In UTF-8, the byte should be unsigned, so we should compare them as unsigned int.
1103- int res = (getByte (i ) & 0xFF ) - (other .getByte (i ) & 0xFF );
1117+ int res = (getByte (i ) & 0xFF ) - (Platform .getByte (rbase , roffset + i ) & 0xFF );
11041118 if (res != 0 ) {
11051119 return res ;
11061120 }
0 commit comments