@@ -68,7 +68,8 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
6868 5 , 5 , 5 , 5 ,
6969 6 , 6 };
7070
71- private static final boolean isLittleEndian = ByteOrder .nativeOrder () == ByteOrder .LITTLE_ENDIAN ;
71+ private static final boolean IS_LITTLE_ENDIAN =
72+ ByteOrder .nativeOrder () == ByteOrder .LITTLE_ENDIAN ;
7273
7374 private static final UTF8String COMMA_UTF8 = UTF8String .fromString ("," );
7475 public static final UTF8String EMPTY_UTF8 = UTF8String .fromString ("" );
@@ -195,7 +196,7 @@ public long getPrefix() {
195196 // After getting the data, we use a mask to mask out data that is not part of the string.
196197 long p ;
197198 long mask = 0 ;
198- if (isLittleEndian ) {
199+ if (IS_LITTLE_ENDIAN ) {
199200 if (numBytes >= 8 ) {
200201 p = Platform .getLong (base , offset );
201202 } else if (numBytes > 4 ) {
@@ -1054,77 +1055,28 @@ public int compareTo(@Nonnull final UTF8String other) {
10541055 return compare (other );
10551056 }
10561057
1057- /** Read integer in big-endian format */
1058- static int getIntBigEndian (final Object base , final long offset ) {
1059- return isLittleEndian ? Integer .reverseBytes (Platform .getInt (base , offset ))
1060- : Platform .getInt (base , offset );
1061- }
1062-
1063- /** Read long in big-endian format */
1064- static long getLongBigEndian (final Object base , final long offset ) {
1065- return isLittleEndian ? Long .reverseBytes (Platform .getLong (base , offset ))
1066- : Platform .getLong (base , offset );
1067- }
1068-
10691058 public final int compare (final UTF8String other ) {
1070- final Object rightBase = other .getBaseObject ();
1071- long rightOffset = other .getBaseOffset ();
1072- final Object leftBase = base ;
1073- long leftOffset = offset ;
1074-
1075- final int len = Math .min (numBytes , other .numBytes );
1076-
1077- // for the case that compare will fail in first few bytes itself, the overhead
1078- // of JNI call is too high
1079- /*
1080- // noinspection ConstantConditions
1081- if (leftBase == null && rightBase == null &&
1082- len >= Native.MIN_JNI_SIZE && Native.isLoaded()) {
1083- final int result = Native.compareString(leftOffset, rightOffset, len);
1084- return result != 0 ? result : (numBytes - other.numBytes);
1085- }
1086- */
1087-
1088- long endOffset = leftOffset + len ;
1089- // for architectures that support unaligned accesses, read 8 bytes at a time
1090- if (Platform .unaligned () || (((leftOffset & 0x7 ) == 0 ) && ((rightOffset & 0x7 ) == 0 ))) {
1091- endOffset -= 8 ;
1092- while (leftOffset <= endOffset ) {
1093- // In UTF-8, the byte should be unsigned, so we should compare them as unsigned long.
1094- final long ll = getLongBigEndian (leftBase , leftOffset );
1095- final long rl = getLongBigEndian (rightBase , rightOffset );
1096- final long res = ll - rl ;
1097- // If the sign of both values is same then "res" is with correct sign.
1098- // If the sign of values is different then "res" has opposite sign.
1099- // The XOR operations will revert the sign bit of res if sign of values is different.
1100- // After that converting to signum is "(1 + ((v >> 63) << 1))"
1101- // where (v >> 63) will flow the sign to give -1 or 0, and (1 + 2 times)
1102- // of that will give -1 or 1 respectively.
1103- if (res != 0 ) return (int )(1 + (((ll ^ rl ^ res ) >> 63 ) << 1 ));
1104- leftOffset += 8 ;
1105- rightOffset += 8 ;
1106- }
1107- endOffset += 4 ;
1108- if (leftOffset <= endOffset ) {
1109- // In UTF-8, the byte should be unsigned, so we should compare them as unsigned int
1110- // which is done by converting to unsigned longs.
1111- // After that conversion to signed integer is "(1 + ((v >> 63) << 1))" as above.
1112- final long res = (getIntBigEndian (leftBase , leftOffset ) & 0xffffffffL ) -
1113- (getIntBigEndian (rightBase , rightOffset ) & 0xffffffffL );
1114- if (res != 0 ) return (int )(1 + ((res >> 63 ) << 1 ));
1115- leftOffset += 4 ;
1116- rightOffset += 4 ;
1059+ int len = Math .min (numBytes , other .numBytes );
1060+ int wordMax = (len / 8 ) * 8 ;
1061+ long roffset = other .offset ;
1062+ Object rbase = other .base ;
1063+ for (int i = 0 ; i < wordMax ; i += 8 ) {
1064+ long left = getLong (base , offset + i );
1065+ long right = getLong (rbase , roffset + i );
1066+ if (left != right ) {
1067+ if (IS_LITTLE_ENDIAN ) {
1068+ return Long .compareUnsigned (Long .reverseBytes (left ), Long .reverseBytes (right ));
1069+ } else {
1070+ return Long .compareUnsigned (left , right );
1071+ }
11171072 }
1118- endOffset += 4 ;
11191073 }
1120- // finish the remaining bytes
1121- while (leftOffset < endOffset ) {
1074+ for (int i = wordMax ; i < len ; i ++) {
11221075 // In UTF-8, the byte should be unsigned, so we should compare them as unsigned int.
1123- final int res = (Platform .getByte (leftBase , leftOffset ) & 0xff ) -
1124- (Platform .getByte (rightBase , rightOffset ) & 0xff );
1125- if (res != 0 ) return res ;
1126- leftOffset ++;
1127- rightOffset ++;
1076+ int res = (getByte (i ) & 0xFF ) - (Platform .getByte (rbase , roffset + i ) & 0xFF );
1077+ if (res != 0 ) {
1078+ return res ;
1079+ }
11281080 }
11291081 return numBytes - other .numBytes ;
11301082 }
0 commit comments