From 565c52eebd56a5495843c81002b017a7a86a879d Mon Sep 17 00:00:00 2001 From: Mark Schreiber Date: Wed, 28 Dec 2022 14:58:23 -0500 Subject: [PATCH 1/4] short circuit null checks when comparing values from vectors that cannot contain nulls --- .../algorithm/sort/VectorValueComparator.java | 40 ++++++++++++++----- .../sort/TestDefaultVectorComparator.java | 29 ++++++++++++++ 2 files changed, 58 insertions(+), 11 deletions(-) diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java index ed32e16ca26..423cf49d8a9 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java @@ -41,6 +41,18 @@ public abstract class VectorValueComparator { */ protected int valueWidth; + + private boolean checkNullsOnCompare = true; + + /** + * This value is true by default and re-computed when vectors are attached to the comparator. If both vectors cannot + * contain nulls then this value is {@code false} and calls to {@code compare(i1, i2)} are short-circuited + * to {@code compareNotNull(i1, i2)} thereby speeding up comparisons resulting in faster sorts etc. + */ + public boolean checkNullsOnCompare() { + return this.checkNullsOnCompare; + } + /** * Constructor for variable-width vectors. */ @@ -76,6 +88,10 @@ public void attachVector(V vector) { public void attachVectors(V vector1, V vector2) { this.vector1 = vector1; this.vector2 = vector2; + + final boolean v1MayHaveNulls = vector1.getField().getFieldType().isNullable(); + final boolean v2MayHaveNulls = vector2.getField().getFieldType().isNullable(); + this.checkNullsOnCompare = v1MayHaveNulls || v2MayHaveNulls; } /** @@ -87,17 +103,19 @@ public void attachVectors(V vector1, V vector2) { * values are equal. */ public int compare(int index1, int index2) { - boolean isNull1 = vector1.isNull(index1); - boolean isNull2 = vector2.isNull(index2); - - if (isNull1 || isNull2) { - if (isNull1 && isNull2) { - return 0; - } else if (isNull1) { - // null is smaller - return -1; - } else { - return 1; + if (checkNullsOnCompare) { + boolean isNull1 = vector1.isNull(index1); + boolean isNull2 = vector2.isNull(index2); + + if (isNull1 || isNull2) { + if (isNull1 && isNull2) { + return 0; + } else if (isNull1) { + // null is smaller + return -1; + } else { + return 1; + } } } return compareNotNull(index1, index2); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java index 2fbf598bf33..06201c56a08 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java @@ -19,6 +19,7 @@ import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.OFFSET_WIDTH; import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; import org.apache.arrow.memory.BufferAllocator; @@ -34,6 +35,7 @@ import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.testing.ValueVectorDataPopulator; import org.apache.arrow.vector.types.Types; +import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; import org.junit.After; import org.junit.Before; @@ -390,4 +392,31 @@ public void testCompareByte() { assertTrue(comparator.compare(7, 7) == 0); } } + + @Test + public void testCheckNullsOnCompareIsFalseForNonNullableVector() { + try (IntVector vec = new IntVector("not nullable", + FieldType.notNullable(new ArrowType.Int(32, false)), allocator)) { + final VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + comparator.attachVector(vec); + + assertFalse(comparator.checkNullsOnCompare()); + } + } + + @Test + public void testCheckNullsOnCompareIsTrueForNullableVector() { + try (IntVector vec = new IntVector("nullable", FieldType.nullable( + new ArrowType.Int(32, false)), allocator); + IntVector vec2 = new IntVector("not-nullable", FieldType.notNullable( + new ArrowType.Int(32, false)), allocator) + ) { + final VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + comparator.attachVector(vec); + assertTrue(comparator.checkNullsOnCompare()); + + comparator.attachVectors(vec, vec2); + assertTrue(comparator.checkNullsOnCompare()); + } + } } From 3a7c71d99432bbcc3bd9fe2c668143c56c5dcdd6 Mon Sep 17 00:00:00 2001 From: Mark Schreiber Date: Thu, 29 Dec 2022 15:26:41 -0500 Subject: [PATCH 2/4] include test for null valdity buffer --- .../apache/arrow/algorithm/sort/VectorValueComparator.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java index 423cf49d8a9..df041275fdb 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java @@ -89,8 +89,11 @@ public void attachVectors(V vector1, V vector2) { this.vector1 = vector1; this.vector2 = vector2; - final boolean v1MayHaveNulls = vector1.getField().getFieldType().isNullable(); - final boolean v2MayHaveNulls = vector2.getField().getFieldType().isNullable(); + final boolean v1MayHaveNulls = vector1.getField().getFieldType().isNullable() || + vector1.getValidityBuffer() == null; + final boolean v2MayHaveNulls = vector2.getField().getFieldType().isNullable() || + vector2.getValidityBuffer() == null; + this.checkNullsOnCompare = v1MayHaveNulls || v2MayHaveNulls; } From 55d479b4c555c49d0366729c778ac097fd360a30 Mon Sep 17 00:00:00 2001 From: Mark Schreiber Date: Thu, 29 Dec 2022 15:37:33 -0500 Subject: [PATCH 3/4] include test for null valdity buffer --- .../arrow/algorithm/sort/VectorValueComparator.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java index df041275fdb..07ab01bdda2 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java @@ -89,10 +89,10 @@ public void attachVectors(V vector1, V vector2) { this.vector1 = vector1; this.vector2 = vector2; - final boolean v1MayHaveNulls = vector1.getField().getFieldType().isNullable() || - vector1.getValidityBuffer() == null; - final boolean v2MayHaveNulls = vector2.getField().getFieldType().isNullable() || - vector2.getValidityBuffer() == null; + final boolean v1MayHaveNulls = vector1.getField().getFieldType().isNullable() && + vector1.getValidityBuffer() != null; + final boolean v2MayHaveNulls = vector2.getField().getFieldType().isNullable() && + vector2.getValidityBuffer() != null; this.checkNullsOnCompare = v1MayHaveNulls || v2MayHaveNulls; } From 636f569c1202a0844375553534cfd03e58fc4945 Mon Sep 17 00:00:00 2001 From: Mark Schreiber Date: Thu, 29 Dec 2022 18:05:54 -0500 Subject: [PATCH 4/4] added zeroCount test to mayHaveNulls test --- .../algorithm/sort/VectorValueComparator.java | 16 +++++-- .../sort/TestDefaultVectorComparator.java | 45 +++++++++++++++++++ 2 files changed, 57 insertions(+), 4 deletions(-) diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java index 07ab01bdda2..d2c772ca8a8 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java @@ -89,14 +89,22 @@ public void attachVectors(V vector1, V vector2) { this.vector1 = vector1; this.vector2 = vector2; - final boolean v1MayHaveNulls = vector1.getField().getFieldType().isNullable() && - vector1.getValidityBuffer() != null; - final boolean v2MayHaveNulls = vector2.getField().getFieldType().isNullable() && - vector2.getValidityBuffer() != null; + final boolean v1MayHaveNulls = mayHaveNulls(vector1); + final boolean v2MayHaveNulls = mayHaveNulls(vector2); this.checkNullsOnCompare = v1MayHaveNulls || v2MayHaveNulls; } + private boolean mayHaveNulls(V v) { + if (v.getValueCount() == 0) { + return true; + } + if (! v.getField().isNullable()) { + return false; + } + return v.getNullCount() > 0; + } + /** * Compare two values, given their indices. * @param index1 index of the first value to compare. diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java index 06201c56a08..818bb60d116 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java @@ -397,6 +397,9 @@ public void testCompareByte() { public void testCheckNullsOnCompareIsFalseForNonNullableVector() { try (IntVector vec = new IntVector("not nullable", FieldType.notNullable(new ArrowType.Int(32, false)), allocator)) { + + ValueVectorDataPopulator.setVector(vec, 1, 2, 3, 4); + final VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); @@ -411,8 +414,50 @@ public void testCheckNullsOnCompareIsTrueForNullableVector() { IntVector vec2 = new IntVector("not-nullable", FieldType.notNullable( new ArrowType.Int(32, false)), allocator) ) { + + ValueVectorDataPopulator.setVector(vec, 1, null, 3, 4); + ValueVectorDataPopulator.setVector(vec2, 1, 2, 3, 4); + + final VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + comparator.attachVector(vec); + assertTrue(comparator.checkNullsOnCompare()); + + comparator.attachVectors(vec, vec2); + assertTrue(comparator.checkNullsOnCompare()); + } + } + + @Test + public void testCheckNullsOnCompareIsFalseWithNoNulls() { + try (IntVector vec = new IntVector("nullable", FieldType.nullable( + new ArrowType.Int(32, false)), allocator); + IntVector vec2 = new IntVector("also-nullable", FieldType.nullable( + new ArrowType.Int(32, false)), allocator) + ) { + + // no null values + ValueVectorDataPopulator.setVector(vec, 1, 2, 3, 4); + ValueVectorDataPopulator.setVector(vec2, 1, 2, 3, 4); + final VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); + assertFalse(comparator.checkNullsOnCompare()); + + comparator.attachVectors(vec, vec2); + assertFalse(comparator.checkNullsOnCompare()); + } + } + + @Test + public void testCheckNullsOnCompareIsTrueWithEmptyVectors() { + try (IntVector vec = new IntVector("nullable", FieldType.nullable( + new ArrowType.Int(32, false)), allocator); + IntVector vec2 = new IntVector("also-nullable", FieldType.nullable( + new ArrowType.Int(32, false)), allocator) + ) { + + final VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + comparator.attachVector(vec2); assertTrue(comparator.checkNullsOnCompare()); comparator.attachVectors(vec, vec2);