From 24dd723f7a0b80b617f6931b8e799cb65b49b193 Mon Sep 17 00:00:00 2001 From: Sean Zhong Date: Wed, 22 Jun 2016 07:52:54 +0800 Subject: [PATCH 1/4] fix SPARK-16071 --- .../catalyst/expressions/codegen/BufferHolder.java | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolder.java index af61e2011f400..da72f8fe3ab36 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolder.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolder.java @@ -45,7 +45,12 @@ public BufferHolder(UnsafeRow row) { } public BufferHolder(UnsafeRow row, int initialSize) { - this.fixedSize = UnsafeRow.calculateBitSetWidthInBytes(row.numFields()) + 8 * row.numFields(); + int bitsetWidthInBytes = UnsafeRow.calculateBitSetWidthInBytes(row.numFields()); + if (row.numFields() > (Integer.MAX_VALUE - initialSize) / 8) { + throw new UnsupportedOperationException( + "Cannot create BufferHolder from input UnsafeRow because it is too big."); + } + this.fixedSize = bitsetWidthInBytes + 8 * row.numFields(); this.buffer = new byte[fixedSize + initialSize]; this.row = row; this.row.pointTo(buffer, buffer.length); @@ -55,6 +60,11 @@ public BufferHolder(UnsafeRow row, int initialSize) { * Grows the buffer by at least neededSize and points the row to the buffer. */ public void grow(int neededSize) { + if (neededSize > Integer.MAX_VALUE / 2 - totalSize()) { + throw new UnsupportedOperationException( + "Cannot grow BufferHolder by size " + neededSize + " because the size after growing " + + "exceeds size limitation " + Integer.MAX_VALUE / 2); + } final int length = totalSize() + neededSize; if (buffer.length < length) { // This will not happen frequently, because the buffer is re-used. From 6473e6d02d7360579ef4131154052a9ebe5f885d Mon Sep 17 00:00:00 2001 From: Sean Zhong Date: Wed, 22 Jun 2016 09:20:40 +0800 Subject: [PATCH 2/4] update UT --- .../codegen/BufferHolderSuite.scala | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolderSuite.scala diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolderSuite.scala new file mode 100644 index 0000000000000..65e6811b0ff31 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolderSuite.scala @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions.codegen + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.expressions.UnsafeRow + +class BufferHolderSuite extends SparkFunSuite { + + test("SPARK-16071 Check the size limit to avoid integer overflow") { + var e = intercept[UnsupportedOperationException] { + new BufferHolder(new UnsafeRow(Int.MaxValue / 8)) + } + assert(e.getMessage.contains("it is too big")) + + val holder = new BufferHolder(new UnsafeRow(1000)) + holder.grow(1000) + e = intercept[UnsupportedOperationException] { + holder.grow(2e10.toInt) + } + assert(e.getMessage.contains("exceeds size limitation")) + } +} From b831e85a27d42a510960e70a1d033ed3fe066c51 Mon Sep 17 00:00:00 2001 From: Sean Zhong Date: Wed, 29 Jun 2016 08:47:22 +0800 Subject: [PATCH 3/4] update UT --- .../spark/sql/catalyst/expressions/codegen/BufferHolder.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolder.java index da72f8fe3ab36..cee7efecc51cb 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolder.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolder.java @@ -46,9 +46,10 @@ public BufferHolder(UnsafeRow row) { public BufferHolder(UnsafeRow row, int initialSize) { int bitsetWidthInBytes = UnsafeRow.calculateBitSetWidthInBytes(row.numFields()); - if (row.numFields() > (Integer.MAX_VALUE - initialSize) / 8) { + if (row.numFields() > (Integer.MAX_VALUE - initialSize - bitsetWidthInBytes) / 8) { throw new UnsupportedOperationException( - "Cannot create BufferHolder from input UnsafeRow because it is too big."); + "Cannot create BufferHolder for input UnsafeRow because there are " + + "too many fields (number of fields: " + row.numFields() + ")"); } this.fixedSize = bitsetWidthInBytes + 8 * row.numFields(); this.buffer = new byte[fixedSize + initialSize]; From 943f7de62204af5fee228e938d293e3283f4b395 Mon Sep 17 00:00:00 2001 From: Sean Zhong Date: Wed, 29 Jun 2016 14:18:40 +0800 Subject: [PATCH 4/4] fix UT --- .../sql/catalyst/expressions/codegen/BufferHolder.java | 7 ++++--- .../catalyst/expressions/codegen/BufferHolderSuite.scala | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolder.java index cee7efecc51cb..0e4264fe8dfb5 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolder.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolder.java @@ -61,15 +61,16 @@ public BufferHolder(UnsafeRow row, int initialSize) { * Grows the buffer by at least neededSize and points the row to the buffer. */ public void grow(int neededSize) { - if (neededSize > Integer.MAX_VALUE / 2 - totalSize()) { + if (neededSize > Integer.MAX_VALUE - totalSize()) { throw new UnsupportedOperationException( "Cannot grow BufferHolder by size " + neededSize + " because the size after growing " + - "exceeds size limitation " + Integer.MAX_VALUE / 2); + "exceeds size limitation " + Integer.MAX_VALUE); } final int length = totalSize() + neededSize; if (buffer.length < length) { // This will not happen frequently, because the buffer is re-used. - final byte[] tmp = new byte[length * 2]; + int newLength = length < Integer.MAX_VALUE / 2 ? length * 2 : Integer.MAX_VALUE; + final byte[] tmp = new byte[newLength]; Platform.copyMemory( buffer, Platform.BYTE_ARRAY_OFFSET, diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolderSuite.scala index 65e6811b0ff31..c7c386b5b838a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolderSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolderSuite.scala @@ -26,12 +26,13 @@ class BufferHolderSuite extends SparkFunSuite { var e = intercept[UnsupportedOperationException] { new BufferHolder(new UnsafeRow(Int.MaxValue / 8)) } - assert(e.getMessage.contains("it is too big")) + assert(e.getMessage.contains("too many fields")) val holder = new BufferHolder(new UnsafeRow(1000)) + holder.reset() holder.grow(1000) e = intercept[UnsupportedOperationException] { - holder.grow(2e10.toInt) + holder.grow(Integer.MAX_VALUE) } assert(e.getMessage.contains("exceeds size limitation")) }