apache · kiszk · Jul 2, 2017 · Jul 2, 2017 · Jul 2, 2017 · Jul 5, 2017
diff --git a/...main/java/org/apache/spark/sql/catalyst/expressions/FixedLengthRowBasedKeyValueBatch.java b/...main/java/org/apache/spark/sql/catalyst/expressions/FixedLengthRowBasedKeyValueBatch.java
@@ -62,7 +62,7 @@ public UnsafeRow appendRow(Object kbase, long koff, int klen,
 
     keyRowId = numRows;
     keyRow.pointTo(base, recordOffset, klen);
-    valueRow.pointTo(base, recordOffset + klen, vlen + 4);
+    valueRow.pointTo(base, recordOffset + klen, vlen);
     numRows++;
     return valueRow;
   }
@@ -95,7 +95,7 @@ protected UnsafeRow getValueFromKey(int rowId) {
       getKeyRow(rowId);
     }
     assert(rowId >= 0);
-    valueRow.pointTo(base, keyRow.getBaseOffset() + klen, vlen + 4);
+    valueRow.pointTo(base, keyRow.getBaseOffset() + klen, vlen);
     return valueRow;
   }
 
@@ -131,7 +131,7 @@ public boolean next() {
         }
 
         key.pointTo(base, offsetInPage, klen);
-        value.pointTo(base, offsetInPage + klen, vlen + 4);
+        value.pointTo(base, offsetInPage + klen, vlen);
 
         offsetInPage += recordLength;
         recordsInPage -= 1;

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
@@ -167,6 +167,7 @@ public UnsafeRow() {}
    */
   public void pointTo(Object baseObject, long baseOffset, int sizeInBytes) {
     assert numFields >= 0 : "numFields (" + numFields + ") should >= 0";
+    assert sizeInBytes % 8 == 0 : "sizeInBytes (" + sizeInBytes + ") should be a multiple of 8";
     this.baseObject = baseObject;
     this.baseOffset = baseOffset;
     this.sizeInBytes = sizeInBytes;
@@ -183,6 +184,7 @@ public void pointTo(byte[] buf, int sizeInBytes) {
   }
 
   public void setTotalSize(int sizeInBytes) {
+    assert sizeInBytes % 8 == 0 : "sizeInBytes (" + sizeInBytes + ") should be a multiple of 8";
     this.sizeInBytes = sizeInBytes;
   }
 
@@ -538,6 +540,7 @@ public void copyFrom(UnsafeRow row) {
       row.baseObject, row.baseOffset, this.baseObject, this.baseOffset, row.sizeInBytes);
     // update the sizeInBytes.
     this.sizeInBytes = row.sizeInBytes;
+    assert sizeInBytes % 8 == 0 : "sizeInBytes (" + sizeInBytes + ") should be a multiple of 8";
   }
 
   /**
@@ -664,6 +667,7 @@ public void writeExternal(ObjectOutput out) throws IOException {
   public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
     this.baseOffset = BYTE_ARRAY_OFFSET;
     this.sizeInBytes = in.readInt();
+    assert sizeInBytes % 8 == 0 : "sizeInBytes (" + sizeInBytes + ") should be a multiple of 8";
     this.numFields = in.readInt();
     this.bitSetWidthInBytes = calculateBitSetWidthInBytes(numFields);
     this.baseObject = new byte[sizeInBytes];
@@ -682,6 +686,7 @@ public void write(Kryo kryo, Output out) {
   public void read(Kryo kryo, Input in) {
     this.baseOffset = BYTE_ARRAY_OFFSET;
     this.sizeInBytes = in.readInt();
+    assert sizeInBytes % 8 == 0 : "sizeInBytes (" + sizeInBytes + ") should be a multiple of 8";
     this.numFields = in.readInt();
     this.bitSetWidthInBytes = calculateBitSetWidthInBytes(numFields);
     this.baseObject = new byte[sizeInBytes];

diff --git a/...n/java/org/apache/spark/sql/catalyst/expressions/VariableLengthRowBasedKeyValueBatch.java b/...n/java/org/apache/spark/sql/catalyst/expressions/VariableLengthRowBasedKeyValueBatch.java
@@ -65,7 +65,7 @@ public UnsafeRow appendRow(Object kbase, long koff, int klen,
 
     keyRowId = numRows;
     keyRow.pointTo(base, recordOffset + 8, klen);
-    valueRow.pointTo(base, recordOffset + 8 + klen, vlen + 4);
+    valueRow.pointTo(base, recordOffset + 8 + klen, vlen);
     numRows++;
     return valueRow;
   }
@@ -102,7 +102,7 @@ public UnsafeRow getValueFromKey(int rowId) {
     long offset = keyRow.getBaseOffset();
     int klen = keyRow.getSizeInBytes();
     int vlen = Platform.getInt(base, offset - 8) - klen - 4;
-    valueRow.pointTo(base, offset + klen, vlen + 4);
+    valueRow.pointTo(base, offset + klen, vlen);
     return valueRow;
   }
 
@@ -146,7 +146,7 @@ public boolean next() {
         currentvlen = totalLength - currentklen;
 
         key.pointTo(base, offsetInPage + 8, currentklen);
-        value.pointTo(base, offsetInPage + 8 + currentklen, currentvlen + 4);
+        value.pointTo(base, offsetInPage + 8 + currentklen, currentvlen);
 
         offsetInPage += 8 + totalLength + 8;
         recordsInPage -= 1;

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java b/sql/catalyst/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java
@@ -208,9 +208,10 @@ private static final class RowComparator extends RecordComparator {
 
     @Override
     public int compare(Object baseObj1, long baseOff1, Object baseObj2, long baseOff2) {
-      // TODO: Why are the sizes -1?
-      row1.pointTo(baseObj1, baseOff1, -1);
-      row2.pointTo(baseObj2, baseOff2, -1);
+      // Note that since ordering doesn't need the total length of the record, we just pass 0
+      // into the row.
+      row1.pointTo(baseObj1, baseOff1, 0);
+      row2.pointTo(baseObj2, baseOff2, 0);
       return ordering.compare(row1, row2);
     }
   }

diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKVExternalSorter.java b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKVExternalSorter.java
@@ -238,10 +238,10 @@ private static final class KVComparator extends RecordComparator {
 
     @Override
     public int compare(Object baseObj1, long baseOff1, Object baseObj2, long baseOff2) {
-      // Note that since ordering doesn't need the total length of the record, we just pass -1
+      // Note that since ordering doesn't need the total length of the record, we just pass 0
       // into the row.
-      row1.pointTo(baseObj1, baseOff1 + 4, -1);
-      row2.pointTo(baseObj2, baseOff2 + 4, -1);
+      row1.pointTo(baseObj1, baseOff1 + 4, 0);
+      row2.pointTo(baseObj2, baseOff2 + 4, 0);
       return ordering.compare(row1, row2);
     }
   }

diff --git a/...n/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/...n/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
@@ -350,20 +350,24 @@ private[state] class HDFSBackedStateStoreProvider extends StateStoreProvider wit
           throw new IOException(
             s"Error reading delta file $fileToRead of $this: key size cannot be $keySize")
         } else {
-          val keyRowBuffer = new Array[Byte](keySize)
+          // If key size in an existing file is not a multiple of 8, round it to multiple of 8
+          val keyAllocationSize = ((keySize + 7) / 8) * 8
+          val keyRowBuffer = new Array[Byte](keyAllocationSize)
           ByteStreams.readFully(input, keyRowBuffer, 0, keySize)
 
           val keyRow = new UnsafeRow(keySchema.fields.length)
-          keyRow.pointTo(keyRowBuffer, keySize)
+          keyRow.pointTo(keyRowBuffer, keyAllocationSize)
 
           val valueSize = input.readInt()
           if (valueSize < 0) {
             map.remove(keyRow)
           } else {
-            val valueRowBuffer = new Array[Byte](valueSize)
+            // If value size in an existing file is not a multiple of 8, round it to multiple of 8
+            val valueAllocationSize = ((valueSize + 7) / 8) * 8
+            val valueRowBuffer = new Array[Byte](valueAllocationSize)
             ByteStreams.readFully(input, valueRowBuffer, 0, valueSize)
             val valueRow = new UnsafeRow(valueSchema.fields.length)
-            valueRow.pointTo(valueRowBuffer, valueSize)
+            valueRow.pointTo(valueRowBuffer, valueAllocationSize)
             map.put(keyRow, valueRow)
           }
         }
@@ -413,21 +417,25 @@ private[state] class HDFSBackedStateStoreProvider extends StateStoreProvider wit
           throw new IOException(
             s"Error reading snapshot file $fileToRead of $this: key size cannot be $keySize")
         } else {
-          val keyRowBuffer = new Array[Byte](keySize)
+          // If key size in an existing file is not a multiple of 8, round it to multiple of 8
+          val keyAllocationSize = ((keySize + 7) / 8) * 8
+          val keyRowBuffer = new Array[Byte](keyAllocationSize)
           ByteStreams.readFully(input, keyRowBuffer, 0, keySize)
 
           val keyRow = new UnsafeRow(keySchema.fields.length)
-          keyRow.pointTo(keyRowBuffer, keySize)
+          keyRow.pointTo(keyRowBuffer, keyAllocationSize)
 
           val valueSize = input.readInt()
           if (valueSize < 0) {
             throw new IOException(
               s"Error reading snapshot file $fileToRead of $this: value size cannot be $valueSize")
           } else {
-            val valueRowBuffer = new Array[Byte](valueSize)
+            // If value size in an existing file is not a multiple of 8, round it to multiple of 8
+            val valueAllocationSize = ((valueSize + 7) / 8) * 8
+            val valueRowBuffer = new Array[Byte](valueAllocationSize)
             ByteStreams.readFully(input, valueRowBuffer, 0, valueSize)
             val valueRow = new UnsafeRow(valueSchema.fields.length)
-            valueRow.pointTo(valueRowBuffer, valueSize)
+            valueRow.pointTo(valueRowBuffer, valueAllocationSize)
             map.put(keyRow, valueRow)
           }
         }

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -479,6 +479,61 @@ class StreamSuite extends StreamTest {
       CheckAnswer((1, 2), (2, 2), (3, 2)))
   }
 
+  testQuietly("store to and recover from a checkpoint") {
+    val checkpointDir = Utils.createTempDir(namePrefix = "stream.checkpoint").getCanonicalPath
+
+    def query(data: MemoryStream[Int], checkpointDir: String, queryName: String):
+        DataStreamWriter[Row] = {
+      data.toDF
+        .groupBy($"value")
+        .agg(count("*"))
+        .writeStream
+        .outputMode("complete")
+        .option("checkpointLocation", checkpointDir)
+        .format("memory")
+        .queryName(queryName)
+    }
+
+    withSQLConf(
+      SQLConf.SHUFFLE_PARTITIONS.key -> "10") {
+      var writeQuery: StreamingQuery = null
+      try {
+        val data = MemoryStream[Int]
+        writeQuery = query(data, checkpointDir, "write").start()
+
+        data.addData(1, 2, 3, 4)
+        writeQuery.processAllAvailable()
+        data.addData(3, 4, 5, 6)
+        writeQuery.processAllAvailable()
+        data.addData(5, 6, 7, 8)
+        writeQuery.processAllAvailable()
+      } finally {
+        assert(writeQuery != null)
+        writeQuery.stop()
+      }
+
+      var restartQuery: StreamingQuery = null
+      try {
+        val data = MemoryStream[Int]
+        data.addData(1, 2, 3, 4)
+        data.addData(3, 4, 5, 6)
+        data.addData(5, 6, 7, 8)
+
+        restartQuery = query(data, checkpointDir, "counts").start()
+        restartQuery.processAllAvailable()
+        data.addData(9)
+        restartQuery.processAllAvailable()
+
+        QueryTest.checkAnswer(spark.table("counts").toDF,
+          Row("1", 1) :: Row("2", 1) :: Row("3", 2) :: Row("4", 2) ::
+            Row("5", 2) :: Row("6", 2) :: Row("7", 1) :: Row("8", 1) :: Row("9", 1) :: Nil)
+      } finally {
+        assert(restartQuery != null)
+        restartQuery.stop()
+      }
+    }
+  }
+
   testQuietly("recover from a Spark v2.1 checkpoint") {
     var inputData: MemoryStream[Int] = null
     var query: DataStreamWriter[Row] = null