@@ -19,8 +19,9 @@ package org.apache.spark.sql.types
1919
2020import java .util .Arrays
2121
22- import org .apache .spark .unsafe .PlatformDependent
23- import org .apache .spark .unsafe .string .{UTF8StringPointer , UTF8StringMethods }
22+ import org .apache .spark .unsafe .PlatformDependent .BYTE_ARRAY_OFFSET
23+ import org .apache .spark .unsafe .array .ByteArrayMethods
24+ import org .apache .spark .unsafe .string .UTF8StringMethods
2425
2526/**
2627 * A UTF-8 String, as internal representation of StringType in SparkSQL
@@ -35,8 +36,6 @@ final class UTF8String extends Ordered[UTF8String] with Serializable {
3536
3637 private [this ] var bytes : Array [Byte ] = _
3738
38- private val pointer : UTF8StringPointer = new UTF8StringPointer
39-
4039 /**
4140 * Update the UTF8String with String.
4241 */
@@ -49,7 +48,6 @@ final class UTF8String extends Ordered[UTF8String] with Serializable {
4948 */
5049 def set (bytes : Array [Byte ]): UTF8String = {
5150 this .bytes = bytes
52- pointer.set(bytes, PlatformDependent .BYTE_ARRAY_OFFSET , bytes.length)
5351 this
5452 }
5553
@@ -59,7 +57,7 @@ final class UTF8String extends Ordered[UTF8String] with Serializable {
5957 * This is only used by Substring() when `start` is negative.
6058 */
6159 def length (): Int = {
62- pointer .getLengthInCodePoints
60+ UTF8StringMethods .getLengthInCodePoints(bytes, BYTE_ARRAY_OFFSET , bytes.length)
6361 }
6462
6563 def getBytes : Array [Byte ] = {
@@ -107,19 +105,27 @@ final class UTF8String extends Ordered[UTF8String] with Serializable {
107105 }
108106
109107 def startsWith (prefix : UTF8String ): Boolean = {
110- val b = prefix.getBytes
111- if (b.length > bytes.length) {
112- return false
113- }
114- Arrays .equals(Arrays .copyOfRange(bytes, 0 , b.length), b)
108+ val prefixBytes = prefix.getBytes
109+ UTF8StringMethods .startsWith(
110+ bytes,
111+ BYTE_ARRAY_OFFSET ,
112+ bytes.length,
113+ prefixBytes,
114+ BYTE_ARRAY_OFFSET ,
115+ prefixBytes.length
116+ )
115117 }
116118
117119 def endsWith (suffix : UTF8String ): Boolean = {
118- val b = suffix.getBytes
119- if (b.length > bytes.length) {
120- return false
121- }
122- Arrays .equals(Arrays .copyOfRange(bytes, bytes.length - b.length, bytes.length), b)
120+ val suffixBytes = suffix.getBytes
121+ UTF8StringMethods .endsWith(
122+ bytes,
123+ BYTE_ARRAY_OFFSET ,
124+ bytes.length,
125+ suffixBytes,
126+ BYTE_ARRAY_OFFSET ,
127+ suffixBytes.length
128+ )
123129 }
124130
125131 def toUpperCase (): UTF8String = {
@@ -139,13 +145,14 @@ final class UTF8String extends Ordered[UTF8String] with Serializable {
139145 override def clone (): UTF8String = new UTF8String ().set(this .bytes)
140146
141147 override def compare (other : UTF8String ): Int = {
148+ val otherBytes = other.getBytes
142149 UTF8StringMethods .compare(
143- pointer.getBaseObject ,
144- pointer.getBaseOffset ,
145- pointer.getLengthInBytes ,
146- other.pointer.getBaseObject ,
147- other.pointer.getBaseOffset ,
148- other.pointer.getLengthInBytes
150+ bytes ,
151+ BYTE_ARRAY_OFFSET ,
152+ bytes.length ,
153+ otherBytes ,
154+ BYTE_ARRAY_OFFSET ,
155+ otherBytes.length
149156 )
150157 }
151158
@@ -155,7 +162,14 @@ final class UTF8String extends Ordered[UTF8String] with Serializable {
155162
156163 override def equals (other : Any ): Boolean = other match {
157164 case s : UTF8String =>
158- Arrays .equals(bytes, s.getBytes)
165+ val otherBytes = s.getBytes
166+ otherBytes.length == bytes.length && ByteArrayMethods .arrayEquals(
167+ bytes,
168+ BYTE_ARRAY_OFFSET ,
169+ otherBytes,
170+ BYTE_ARRAY_OFFSET ,
171+ otherBytes.length
172+ )
159173 case s : String =>
160174 // This is only used for Catalyst unit tests
161175 // fail fast
0 commit comments