diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index 6d0d24baf99..5153c5c1d2a 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -803,6 +803,25 @@ public final ColumnVector mergeAndSetValidity(BinaryOp mergeOp, ColumnView... co return new ColumnVector(bitwiseMergeAndSetValidity(getNativeView(), columnViews, mergeOp.nativeId)); } + /** + * Creates a deep copy of a column while replacing the validity mask. The validity mask is the + * device_vector equivalent of the boolean column given as argument. + * + * The boolColumn must have the same number of rows as the current column. + * The result column will have the same number of rows as the current column. + * For all indices `i` where the boolColumn is `true`, the result column will have a valid value at index i. + * For all other values (i.e. `false` or `null`), the result column will have nulls. + * + * If the current column has a null at a given index `i`, and the new validity mask is `true` at index `i`, + * then the row value is undefined. + * + * @param boolColumn bool column whose value is to be used as the validity mask. + * @return Deep copy of the column with replaced validity mask. + */ + public final ColumnVector copyWithBooleanColumnAsValidity(ColumnView boolColumn) { + return new ColumnVector(copyWithBooleanColumnAsValidity(getNativeView(), boolColumn.getNativeView())); + } + ///////////////////////////////////////////////////////////////////////////// // DATE/TIME ///////////////////////////////////////////////////////////////////////////// @@ -3752,6 +3771,25 @@ private static native long clamper(long nativeView, long loScalarHandle, long lo private static native long bitwiseMergeAndSetValidity(long baseHandle, long[] viewHandles, int nullConfig) throws CudfException; + /** + * Native method to deep copy a column while replacing the null mask. The null mask is the + * device_vector equivalent of the boolean column given as argument. + * + * The boolColumn must have the same number of rows as the exemplar column. + * The result column will have the same number of rows as the exemplar. + * For all indices `i` where the boolean column is `true`, the result column will have a valid value at index i. + * For all other values (i.e. `false` or `null`), the result column will have nulls. + * + * If the exemplar column has a null at a given index `i`, and the new validity mask is `true` at index `i`, + * then the resultant row value is undefined. + * + * @param exemplarViewHandle column view of the column that is deep copied. + * @param boolColumnViewHandle bool column whose value is to be used as the null mask. + * @return Deep copy of the column with replaced null mask. + */ + private static native long copyWithBooleanColumnAsValidity(long exemplarViewHandle, + long boolColumnViewHandle) throws CudfException; + /** * Get the number of bytes needed to allocate a validity buffer for the given number of rows. */ diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt index 0ed2f31bfac..2db37d57cbb 100755 --- a/java/src/main/native/CMakeLists.txt +++ b/java/src/main/native/CMakeLists.txt @@ -225,6 +225,7 @@ add_library( src/CudaJni.cpp src/ColumnVectorJni.cpp src/ColumnViewJni.cpp + src/ColumnViewJni.cu src/CompiledExpression.cpp src/ContiguousTableJni.cpp src/HashJoinJni.cpp diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 02d5dc4569c..4cd4b070aed 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -14,8 +14,11 @@ * limitations under the License. */ +#include "ColumnViewJni.hpp" #include +#include + #include #include #include @@ -66,14 +69,11 @@ #include #include #include -#include - -#include "cudf/types.hpp" #include "cudf_jni_apis.hpp" #include "dtype_utils.hpp" -#include "jni.h" #include "jni_utils.hpp" +#include "map_lookup.hpp" namespace { @@ -1576,6 +1576,21 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_bitwiseMergeAndSetValidit CATCH_STD(env, 0); } +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_copyWithBooleanColumnAsValidity( + JNIEnv *env, jobject j_object, jlong exemplar_handle, jlong validity_column_handle) { + JNI_NULL_CHECK(env, exemplar_handle, "ColumnView handle is null", 0); + JNI_NULL_CHECK(env, validity_column_handle, "Validity column handle is null", 0); + try { + cudf::jni::auto_set_device(env); + + auto const exemplar = *reinterpret_cast(exemplar_handle); + auto const validity = *reinterpret_cast(validity_column_handle); + auto deep_copy = cudf::jni::new_column_with_boolean_column_as_validity(exemplar, validity); + return reinterpret_cast(deep_copy.release()); + } + CATCH_STD(env, 0); +} + //////// // Native cudf::column_view life cycle and metadata access methods. Life cycle methods // should typically only be called from the CudfColumn inner class. diff --git a/java/src/main/native/src/ColumnViewJni.cu b/java/src/main/native/src/ColumnViewJni.cu new file mode 100644 index 00000000000..47055ca1611 --- /dev/null +++ b/java/src/main/native/src/ColumnViewJni.cu @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include "ColumnViewJni.hpp" + +namespace cudf::jni { + +std::unique_ptr +new_column_with_boolean_column_as_validity(cudf::column_view const &exemplar, + cudf::column_view const &validity_column) { + CUDF_EXPECTS(validity_column.type().id() == type_id::BOOL8, + "Validity column must be of type bool"); + CUDF_EXPECTS(validity_column.size() == exemplar.size(), + "Exemplar and validity columns must have the same size"); + + auto validity_device_view = cudf::column_device_view::create(validity_column); + auto validity_begin = cudf::detail::make_optional_iterator( + *validity_device_view, cudf::nullate::DYNAMIC{validity_column.has_nulls()}); + auto validity_end = validity_begin + validity_device_view->size(); + auto [null_mask, null_count] = + cudf::detail::valid_if(validity_begin, validity_end, [] __device__(auto optional_bool) { + return optional_bool.value_or(false); + }); + auto const exemplar_without_null_mask = cudf::column_view{ + exemplar.type(), + exemplar.size(), + exemplar.head(), + nullptr, + 0, + exemplar.offset(), + std::vector{exemplar.child_begin(), exemplar.child_end()}}; + auto deep_copy = std::make_unique(exemplar_without_null_mask); + deep_copy->set_null_mask(std::move(null_mask), null_count); + return deep_copy; +} + +} // namespace cudf::jni diff --git a/java/src/main/native/src/ColumnViewJni.hpp b/java/src/main/native/src/ColumnViewJni.hpp new file mode 100644 index 00000000000..37e58ecb63a --- /dev/null +++ b/java/src/main/native/src/ColumnViewJni.hpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace cudf::jni { + +/** + * @brief Creates a deep copy of the exemplar column, with its validity set to the equivalent + * of the boolean `validity` column's value. + * + * The bool_column must have the same number of rows as the exemplar column. + * The result column will have the same number of rows as the exemplar. + * For all indices `i` where the boolean column is `true`, the result column will have a valid value + * at index i. For all other values (i.e. `false` or `null`), the result column will have nulls. + * + * @param exemplar The column to be deep copied. + * @param bool_column bool column whose value is to be used as the validity. + * @return Deep copy of the exemplar, with the replaced validity. + */ +std::unique_ptr +new_column_with_boolean_column_as_validity(cudf::column_view const &exemplar, + cudf::column_view const &bool_column); + +} // namespace cudf::jni diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 7120a40a26a..b78183692a3 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -5886,4 +5886,44 @@ void testReplaceSameIndexColumnInStruct() { }); assertTrue(e.getMessage().contains("Duplicate mapping found for replacing child index")); } + + @Test + void testCopyWithBooleanColumnAsValidity() { + final Boolean T = true; + final Boolean F = false; + final Integer X = null; + + // Straight-line: Invalidate every other row. + try (ColumnVector exemplar = ColumnVector.fromBoxedInts(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + ColumnVector validity = ColumnVector.fromBoxedBooleans(F, T, F, T, F, T, F, T, F, T); + ColumnVector expected = ColumnVector.fromBoxedInts(X, 2, X, 4, X, 6, X, 8, X, 10); + ColumnVector result = exemplar.copyWithBooleanColumnAsValidity(validity)) { + assertColumnsAreEqual(expected, result); + } + + // Straight-line: Invalidate all Rows. + try (ColumnVector exemplar = ColumnVector.fromBoxedInts(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + ColumnVector validity = ColumnVector.fromBoxedBooleans(F, F, F, F, F, F, F, F, F, F); + ColumnVector expected = ColumnVector.fromBoxedInts(X, X, X, X, X, X, X, X, X, X); + ColumnVector result = exemplar.copyWithBooleanColumnAsValidity(validity)) { + assertColumnsAreEqual(expected, result); + } + + // Nulls in the validity column are treated as invalid. + try (ColumnVector exemplar = ColumnVector.fromBoxedInts(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + ColumnVector validity = ColumnVector.fromBoxedBooleans(F, T, F, T, F, T, F, null, F, null); + ColumnVector expected = ColumnVector.fromBoxedInts(X, 2, X, 4, X, 6, X, X, X, X); + ColumnVector result = exemplar.copyWithBooleanColumnAsValidity(validity)) { + assertColumnsAreEqual(expected, result); + } + + // Negative case: Mismatch in row count. + Exception x = assertThrows(CudfException.class, () -> { + try (ColumnVector exemplar = ColumnVector.fromBoxedInts(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + ColumnVector validity = ColumnVector.fromBoxedBooleans(F, T, F, T); + ColumnVector result = exemplar.copyWithBooleanColumnAsValidity(validity)) { + } + }); + assertTrue(x.getMessage().contains("Exemplar and validity columns must have the same size")); + } }