@@ -23,7 +23,8 @@ import scala.collection.mutable
2323import scala .reflect .ClassTag
2424import scala .reflect .runtime .universe .runtimeMirror
2525
26- import org .apache .spark .sql .catalyst .expressions .GenericMutableRow
26+ import org .apache .spark .sql .Row
27+ import org .apache .spark .sql .catalyst .expressions .SpecificMutableRow
2728import org .apache .spark .sql .catalyst .types ._
2829import org .apache .spark .sql .columnar ._
2930import org .apache .spark .util .Utils
@@ -33,18 +34,20 @@ private[sql] case object PassThrough extends CompressionScheme {
3334
3435 override def supports (columnType : ColumnType [_, _]) = true
3536
36- override def encoder [T <: NativeType ] = new this .Encoder [T ]
37+ override def encoder [T <: NativeType ](columnType : NativeColumnType [T ]) = {
38+ new this .Encoder [T ](columnType)
39+ }
3740
3841 override def decoder [T <: NativeType ](buffer : ByteBuffer , columnType : NativeColumnType [T ]) = {
3942 new this .Decoder (buffer, columnType)
4043 }
4144
42- class Encoder [T <: NativeType ] extends compression.Encoder [T ] {
45+ class Encoder [T <: NativeType ]( columnType : NativeColumnType [ T ]) extends compression.Encoder [T ] {
4346 override def uncompressedSize = 0
4447
4548 override def compressedSize = 0
4649
47- override def compress (from : ByteBuffer , to : ByteBuffer , columnType : NativeColumnType [ T ] ) = {
50+ override def compress (from : ByteBuffer , to : ByteBuffer ) = {
4851 // Writes compression type ID and copies raw contents
4952 to.putInt(PassThrough .typeId).put(from).rewind()
5053 to
@@ -63,7 +66,9 @@ private[sql] case object PassThrough extends CompressionScheme {
6366private [sql] case object RunLengthEncoding extends CompressionScheme {
6467 override val typeId = 1
6568
66- override def encoder [T <: NativeType ] = new this .Encoder [T ]
69+ override def encoder [T <: NativeType ](columnType : NativeColumnType [T ]) = {
70+ new this .Encoder [T ](columnType)
71+ }
6772
6873 override def decoder [T <: NativeType ](buffer : ByteBuffer , columnType : NativeColumnType [T ]) = {
6974 new this .Decoder (buffer, columnType)
@@ -74,20 +79,21 @@ private[sql] case object RunLengthEncoding extends CompressionScheme {
7479 case _ => false
7580 }
7681
77- class Encoder [T <: NativeType ] extends compression.Encoder [T ] {
82+ class Encoder [T <: NativeType ]( columnType : NativeColumnType [ T ]) extends compression.Encoder [T ] {
7883 private var _uncompressedSize = 0
7984 private var _compressedSize = 0
8085
8186 // Using `MutableRow` to store the last value to avoid boxing/unboxing cost.
82- private val lastValue = new GenericMutableRow ( 1 )
87+ private val lastValue = new SpecificMutableRow ( Seq (columnType.dataType) )
8388 private var lastRun = 0
8489
8590 override def uncompressedSize = _uncompressedSize
8691
8792 override def compressedSize = _compressedSize
8893
89- override def gatherCompressibilityStats (value : T # JvmType , columnType : NativeColumnType [T ]) {
90- val actualSize = columnType.actualSize(value)
94+ override def gatherCompressibilityStats (row : Row , ordinal : Int ) {
95+ val value = columnType.getField(row, ordinal)
96+ val actualSize = columnType.actualSize(row, ordinal)
9197 _uncompressedSize += actualSize
9298
9399 if (lastValue.isNullAt(0 )) {
@@ -105,7 +111,7 @@ private[sql] case object RunLengthEncoding extends CompressionScheme {
105111 }
106112 }
107113
108- override def compress (from : ByteBuffer , to : ByteBuffer , columnType : NativeColumnType [ T ] ) = {
114+ override def compress (from : ByteBuffer , to : ByteBuffer ) = {
109115 to.putInt(RunLengthEncoding .typeId)
110116
111117 if (from.hasRemaining) {
@@ -171,14 +177,16 @@ private[sql] case object DictionaryEncoding extends CompressionScheme {
171177 new this .Decoder (buffer, columnType)
172178 }
173179
174- override def encoder [T <: NativeType ] = new this .Encoder [T ]
180+ override def encoder [T <: NativeType ](columnType : NativeColumnType [T ]) = {
181+ new this .Encoder [T ](columnType)
182+ }
175183
176184 override def supports (columnType : ColumnType [_, _]) = columnType match {
177185 case INT | LONG | STRING => true
178186 case _ => false
179187 }
180188
181- class Encoder [T <: NativeType ] extends compression.Encoder [T ] {
189+ class Encoder [T <: NativeType ]( columnType : NativeColumnType [ T ]) extends compression.Encoder [T ] {
182190 // Size of the input, uncompressed, in bytes. Note that we only count until the dictionary
183191 // overflows.
184192 private var _uncompressedSize = 0
@@ -200,9 +208,11 @@ private[sql] case object DictionaryEncoding extends CompressionScheme {
200208 // to store dictionary element count.
201209 private var dictionarySize = 4
202210
203- override def gatherCompressibilityStats (value : T # JvmType , columnType : NativeColumnType [T ]) {
211+ override def gatherCompressibilityStats (row : Row , ordinal : Int ) {
212+ val value = columnType.getField(row, ordinal)
213+
204214 if (! overflow) {
205- val actualSize = columnType.actualSize(value )
215+ val actualSize = columnType.actualSize(row, ordinal )
206216 count += 1
207217 _uncompressedSize += actualSize
208218
@@ -221,7 +231,7 @@ private[sql] case object DictionaryEncoding extends CompressionScheme {
221231 }
222232 }
223233
224- override def compress (from : ByteBuffer , to : ByteBuffer , columnType : NativeColumnType [ T ] ) = {
234+ override def compress (from : ByteBuffer , to : ByteBuffer ) = {
225235 if (overflow) {
226236 throw new IllegalStateException (
227237 " Dictionary encoding should not be used because of dictionary overflow." )
@@ -279,25 +289,20 @@ private[sql] case object BooleanBitSet extends CompressionScheme {
279289 new this .Decoder (buffer).asInstanceOf [compression.Decoder [T ]]
280290 }
281291
282- override def encoder [T <: NativeType ] = (new this .Encoder ).asInstanceOf [compression.Encoder [T ]]
292+ override def encoder [T <: NativeType ](columnType : NativeColumnType [T ]) = {
293+ (new this .Encoder ).asInstanceOf [compression.Encoder [T ]]
294+ }
283295
284296 override def supports (columnType : ColumnType [_, _]) = columnType == BOOLEAN
285297
286298 class Encoder extends compression.Encoder [BooleanType .type ] {
287299 private var _uncompressedSize = 0
288300
289- override def gatherCompressibilityStats (
290- value : Boolean ,
291- columnType : NativeColumnType [BooleanType .type ]) {
292-
301+ override def gatherCompressibilityStats (row : Row , ordinal : Int ) {
293302 _uncompressedSize += BOOLEAN .defaultSize
294303 }
295304
296- override def compress (
297- from : ByteBuffer ,
298- to : ByteBuffer ,
299- columnType : NativeColumnType [BooleanType .type ]) = {
300-
305+ override def compress (from : ByteBuffer , to : ByteBuffer ) = {
301306 to.putInt(BooleanBitSet .typeId)
302307 // Total element count (1 byte per Boolean value)
303308 .putInt(from.remaining)
@@ -364,13 +369,18 @@ private[sql] case object BooleanBitSet extends CompressionScheme {
364369 }
365370}
366371
367- private [sql] sealed abstract class IntegralDelta [I <: IntegralType ] extends CompressionScheme {
372+ private [sql] sealed abstract class IntegralDelta [I <: IntegralType ](
373+ columnType : NativeColumnType [I ])
374+ extends CompressionScheme {
375+
368376 override def decoder [T <: NativeType ](buffer : ByteBuffer , columnType : NativeColumnType [T ]) = {
369377 new this .Decoder (buffer, columnType.asInstanceOf [NativeColumnType [I ]])
370378 .asInstanceOf [compression.Decoder [T ]]
371379 }
372380
373- override def encoder [T <: NativeType ] = (new this .Encoder ).asInstanceOf [compression.Encoder [T ]]
381+ override def encoder [T <: NativeType ](columnType : NativeColumnType [T ]) = {
382+ (new this .Encoder ).asInstanceOf [compression.Encoder [T ]]
383+ }
374384
375385 /**
376386 * Computes `delta = x - y`, returns `(true, delta)` if `delta` can fit into a single byte, or
@@ -392,7 +402,8 @@ private[sql] sealed abstract class IntegralDelta[I <: IntegralType] extends Comp
392402
393403 private var initial = true
394404
395- override def gatherCompressibilityStats (value : I # JvmType , columnType : NativeColumnType [I ]) {
405+ override def gatherCompressibilityStats (row : Row , ordinal : Int ) {
406+ val value = columnType.getField(row, ordinal)
396407 _uncompressedSize += columnType.defaultSize
397408
398409 if (initial) {
@@ -406,7 +417,7 @@ private[sql] sealed abstract class IntegralDelta[I <: IntegralType] extends Comp
406417 prev = value
407418 }
408419
409- override def compress (from : ByteBuffer , to : ByteBuffer , columnType : NativeColumnType [ I ] ) = {
420+ override def compress (from : ByteBuffer , to : ByteBuffer ) = {
410421 to.putInt(typeId)
411422
412423 if (from.hasRemaining) {
@@ -452,7 +463,7 @@ private[sql] sealed abstract class IntegralDelta[I <: IntegralType] extends Comp
452463 }
453464}
454465
455- private [sql] case object IntDelta extends IntegralDelta [IntegerType .type ] {
466+ private [sql] case object IntDelta extends IntegralDelta [IntegerType .type ]( INT ) {
456467 override val typeId = 4
457468
458469 override def supports (columnType : ColumnType [_, _]) = columnType == INT
@@ -465,7 +476,7 @@ private[sql] case object IntDelta extends IntegralDelta[IntegerType.type] {
465476 }
466477}
467478
468- private [sql] case object LongDelta extends IntegralDelta [LongType .type ] {
479+ private [sql] case object LongDelta extends IntegralDelta [LongType .type ]( LONG ) {
469480 override val typeId = 5
470481
471482 override def supports (columnType : ColumnType [_, _]) = columnType == LONG
0 commit comments