diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 3d604fb1030..c796b2eafc1 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -438,7 +438,8 @@ add_library( src/round/round.cu src/scalar/scalar.cpp src/scalar/scalar_factories.cpp - src/search/contains.cu + src/search/contains_column.cu + src/search/contains_table.cu src/search/contains_nested.cu src/search/search_ordered.cu src/sort/is_sorted.cu diff --git a/cpp/benchmarks/join/join_common.hpp b/cpp/benchmarks/join/join_common.hpp index 6c1500cf1cf..7d80b42529e 100644 --- a/cpp/benchmarks/join/join_common.hpp +++ b/cpp/benchmarks/join/join_common.hpp @@ -88,14 +88,14 @@ static void BM_join(state_type& state, Join JoinFunc) return cudf::detail::valid_if(validity, validity + size, thrust::identity{}).first; }; - std::unique_ptr build_key_column = [&]() { + std::unique_ptr build_key_column0 = [&]() { return Nullable ? cudf::make_numeric_column(cudf::data_type(cudf::type_to_id()), build_table_size, build_random_null_mask(build_table_size)) : cudf::make_numeric_column(cudf::data_type(cudf::type_to_id()), build_table_size); }(); - std::unique_ptr probe_key_column = [&]() { + std::unique_ptr probe_key_column0 = [&]() { return Nullable ? cudf::make_numeric_column(cudf::data_type(cudf::type_to_id()), probe_table_size, build_random_null_mask(probe_table_size)) @@ -104,21 +104,36 @@ static void BM_join(state_type& state, Join JoinFunc) }(); generate_input_tables( - build_key_column->mutable_view().data(), + build_key_column0->mutable_view().data(), build_table_size, - probe_key_column->mutable_view().data(), + probe_key_column0->mutable_view().data(), probe_table_size, selectivity, multiplicity); + // Copy build_key_column0 and probe_key_column0 into new columns. + // If Nullable, the new columns will be assigned new nullmasks. + auto const build_key_column1 = [&]() { + auto col = std::make_unique(build_key_column0->view()); + if (Nullable) { col->set_null_mask(build_random_null_mask(build_table_size)); } + return col; + }(); + auto const probe_key_column1 = [&]() { + auto col = std::make_unique(probe_key_column0->view()); + if (Nullable) { col->set_null_mask(build_random_null_mask(probe_table_size)); } + return col; + }(); + auto init = cudf::make_fixed_width_scalar(static_cast(0)); auto build_payload_column = cudf::sequence(build_table_size, *init); auto probe_payload_column = cudf::sequence(probe_table_size, *init); CUDF_CHECK_CUDA(0); - cudf::table_view build_table({build_key_column->view(), *build_payload_column}); - cudf::table_view probe_table({probe_key_column->view(), *probe_payload_column}); + cudf::table_view build_table( + {build_key_column0->view(), build_key_column1->view(), *build_payload_column}); + cudf::table_view probe_table( + {probe_key_column0->view(), probe_key_column1->view(), *probe_payload_column}); // Setup join parameters and result table [[maybe_unused]] std::vector columns_to_join = {0}; diff --git a/cpp/include/cudf/detail/search.hpp b/cpp/include/cudf/detail/search.hpp index bd2d4fecb32..a9764235c90 100644 --- a/cpp/include/cudf/detail/search.hpp +++ b/cpp/include/cudf/detail/search.hpp @@ -22,6 +22,7 @@ #include #include +#include namespace cudf::detail { /** @@ -65,6 +66,37 @@ std::unique_ptr contains(column_view const& haystack, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); +/** + * @brief Check if rows in the given `needles` table exist in the `haystack` table. + * + * Given two tables, each row in the `needles` table is checked to see if there is any matching row + * (i.e., compared equal to it) in the `haystack` table. The boolean search results are written into + * the corresponding rows of the output array. + * + * @code{.pseudo} + * Example: + * + * haystack = { { 5, 4, 1, 2, 3 } } + * needles = { { 0, 1, 2 } } + * output = { false, true, true } + * @endcode + * + * @param haystack The table containing the search space + * @param needles A table of rows whose existence to check in the search space + * @param compare_nulls Control whether nulls should be compared as equal or not + * @param compare_nans Control whether floating-point NaNs values should be compared as equal or not + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned vector + * @return A vector of bools indicating if each row in `needles` has matching rows in `haystack` + */ +rmm::device_uvector contains( + table_view const& haystack, + table_view const& needles, + null_equality compare_nulls, + nan_equality compare_nans, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Check if the (unique) row of the `needle` column is contained in the `haystack` column. * diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp index f13e2e847fb..d4d6e44509f 100644 --- a/cpp/include/cudf/join.hpp +++ b/cpp/include/cudf/join.hpp @@ -327,7 +327,6 @@ std::unique_ptr full_join( * @code{.pseudo} * TableA: {{0, 1, 2}} * TableB: {{1, 2, 3}} - * right_on: {1} * Result: {1, 2} * @endcode * diff --git a/cpp/include/cudf/table/table_view.hpp b/cpp/include/cudf/table/table_view.hpp index 2df37887d9a..2551306bf99 100644 --- a/cpp/include/cudf/table/table_view.hpp +++ b/cpp/include/cudf/table/table_view.hpp @@ -327,6 +327,14 @@ inline bool has_nested_nulls(table_view const& input) }); } +/** + * @brief The function to collect all nullable columns at all nested levels in a given table. + * + * @param table The input table + * @return A vector containing all nullable columns in the input table + */ +std::vector get_nullable_columns(table_view const& table); + /** * @brief Checks if two `table_view`s have columns of same types * diff --git a/cpp/src/join/join_common_utils.cuh b/cpp/src/join/join_common_utils.cuh index b3994685623..43c20cc01d9 100644 --- a/cpp/src/join/join_common_utils.cuh +++ b/cpp/src/join/join_common_utils.cuh @@ -42,25 +42,28 @@ constexpr auto remap_sentinel_hash(H hash, S sentinel) } /** - * @brief Device functor to create a pair of hash value and index for a given row. + * @brief Device functor to create a pair of {hash_value, row_index} for a given row. + * + * @tparam T Type of row index, must be convertible to `size_type`. + * @tparam Hasher The type of internal hasher to compute row hash. */ +template class make_pair_function { public: - CUDF_HOST_DEVICE make_pair_function(row_hash const& hash, - hash_value_type const empty_key_sentinel) + CUDF_HOST_DEVICE make_pair_function(Hasher const& hash, hash_value_type const empty_key_sentinel) : _hash{hash}, _empty_key_sentinel{empty_key_sentinel} { } - __device__ __forceinline__ cudf::detail::pair_type operator()(size_type i) const noexcept + __device__ __forceinline__ auto operator()(size_type i) const noexcept { // Compute the hash value of row `i` auto row_hash_value = remap_sentinel_hash(_hash(i), _empty_key_sentinel); - return cuco::make_pair(row_hash_value, i); + return cuco::make_pair(row_hash_value, T{i}); } private: - row_hash _hash; + Hasher _hash; hash_value_type const _empty_key_sentinel; }; @@ -93,7 +96,10 @@ class row_is_valid { * probe_row_hash == build_row_hash) and then using a row_equality_comparator * to compare the contents of the row indices that are stored as the payload in * the hash map. + * + * @tparam Comparator The row comparator type to perform row equality comparison from row indices. */ +template class pair_equality { public: pair_equality(table_device_view lhs, @@ -104,14 +110,16 @@ class pair_equality { { } - __device__ __forceinline__ bool operator()(const pair_type& lhs, - const pair_type& rhs) const noexcept + pair_equality(Comparator const d_eqcomp) : _check_row_equality{std::move(d_eqcomp)} {} + + template + __device__ __forceinline__ bool operator()(LhsPair const& lhs, RhsPair const& rhs) const noexcept { return lhs.first == rhs.first and _check_row_equality(rhs.second, lhs.second); } private: - row_equality _check_row_equality; + Comparator _check_row_equality; }; /** diff --git a/cpp/src/join/semi_join.cu b/cpp/src/join/semi_join.cu index d3233f3a925..5cb58b92fe9 100644 --- a/cpp/src/join/semi_join.cu +++ b/cpp/src/join/semi_join.cu @@ -14,15 +14,12 @@ * limitations under the License. */ -#include #include -#include #include #include -#include #include -#include +#include #include #include #include @@ -37,26 +34,11 @@ #include #include #include -#include +#include namespace cudf { namespace detail { -namespace { -/** - * @brief Device functor to create a pair of hash value and index for a given row. - */ -struct make_pair_fn { - __device__ __forceinline__ cudf::detail::pair_type operator()(size_type i) const noexcept - { - // The value is irrelevant since we only ever use the hash map to check for - // membership of a particular row index. - return cuco::make_pair(static_cast(i), 0); - } -}; - -} // namespace - std::unique_ptr> left_semi_anti_join( join_kind const kind, cudf::table_view const& left_keys, @@ -78,90 +60,28 @@ std::unique_ptr> left_semi_anti_join( return result; } - auto const left_num_rows = left_keys.num_rows(); - auto const right_num_rows = right_keys.num_rows(); - - // flatten structs for the right and left and use that for the hash table - auto right_flattened_tables = structs::detail::flatten_nested_columns( - right_keys, {}, {}, structs::detail::column_nullability::FORCE); - auto left_flattened_tables = structs::detail::flatten_nested_columns( - left_keys, {}, {}, structs::detail::column_nullability::FORCE); - - auto right_flattened_keys = right_flattened_tables.flattened_columns(); - auto left_flattened_keys = left_flattened_tables.flattened_columns(); - - // Create hash table. - semi_map_type hash_table{compute_hash_table_size(right_num_rows), - cuco::sentinel::empty_key{std::numeric_limits::max()}, - cuco::sentinel::empty_value{cudf::detail::JoinNoneValue}, - hash_table_allocator_type{default_allocator{}, stream}, - stream.value()}; - - // Create hash table containing all keys found in right table - auto right_rows_d = table_device_view::create(right_flattened_keys, stream); - auto const right_nulls = cudf::nullate::DYNAMIC{cudf::has_nulls(right_flattened_keys)}; - row_hash const hash_build{right_nulls, *right_rows_d}; - row_equality equality_build{right_nulls, *right_rows_d, *right_rows_d, compare_nulls}; - make_pair_fn pair_func_build{}; - - auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func_build); - - // skip rows that are null here. - if ((compare_nulls == null_equality::EQUAL) or (not nullable(right_keys))) { - hash_table.insert(iter, iter + right_num_rows, hash_build, equality_build, stream.value()); - } else { - thrust::counting_iterator stencil(0); - auto const [row_bitmask, _] = cudf::detail::bitmask_and(right_flattened_keys, stream); - row_is_valid pred{static_cast(row_bitmask.data())}; - - // insert valid rows - hash_table.insert_if( - iter, iter + right_num_rows, stencil, pred, hash_build, equality_build, stream.value()); - } - - // Now we have a hash table, we need to iterate over the rows of the left table - // and check to see if they are contained in the hash table - auto left_rows_d = table_device_view::create(left_flattened_keys, stream); - auto const left_nulls = cudf::nullate::DYNAMIC{cudf::has_nulls(left_flattened_keys)}; - row_hash hash_probe{left_nulls, *left_rows_d}; - // Note: This equality comparator violates symmetry of equality and is - // therefore relying on the implementation detail of the order in which its - // operator is invoked. If cuco makes no promises about the order of - // invocation this seems a bit unsafe. - row_equality equality_probe{left_nulls, *right_rows_d, *left_rows_d, compare_nulls}; - - // For semi join we want contains to be true, for anti join we want contains to be false - bool const join_type_boolean = (kind == join_kind::LEFT_SEMI_JOIN); - - auto hash_table_view = hash_table.get_device_view(); + // Materialize a `flagged` boolean array to generate a gather map. + // Previously, the gather map was generated directly without this array but by calling to + // `map.contains` inside the `thrust::copy_if` kernel. However, that led to increasing register + // usage and reducing performance, as reported here: https://github.com/rapidsai/cudf/pull/10511. + auto const flagged = + cudf::detail::contains(right_keys, left_keys, compare_nulls, nan_equality::ALL_EQUAL, stream); + auto const left_num_rows = left_keys.num_rows(); auto gather_map = std::make_unique>(left_num_rows, stream, mr); - rmm::device_uvector flagged(left_num_rows, stream, mr); - auto flagged_d = flagged.data(); - - auto counting_iter = thrust::counting_iterator(0); - thrust::for_each( - rmm::exec_policy(stream), - counting_iter, - counting_iter + left_num_rows, - [flagged_d, hash_table_view, join_type_boolean, hash_probe, equality_probe] __device__( - const size_type idx) { - flagged_d[idx] = - hash_table_view.contains(idx, hash_probe, equality_probe) == join_type_boolean; - }); - // gather_map_end will be the end of valid data in gather_map auto gather_map_end = thrust::copy_if(rmm::exec_policy(stream), - counting_iter, - counting_iter + left_num_rows, + thrust::counting_iterator(0), + thrust::counting_iterator(left_num_rows), gather_map->begin(), - [flagged_d] __device__(size_type const idx) { return flagged_d[idx]; }); + [kind, d_flagged = flagged.begin()] __device__(size_type const idx) { + return *(d_flagged + idx) == (kind == join_kind::LEFT_SEMI_JOIN); + }); - auto join_size = thrust::distance(gather_map->begin(), gather_map_end); - gather_map->resize(join_size, stream); + gather_map->resize(thrust::distance(gather_map->begin(), gather_map_end), stream); return gather_map; } diff --git a/cpp/src/search/contains.cu b/cpp/src/search/contains_column.cu similarity index 100% rename from cpp/src/search/contains.cu rename to cpp/src/search/contains_column.cu diff --git a/cpp/src/search/contains_table.cu b/cpp/src/search/contains_table.cu new file mode 100644 index 00000000000..2aa6bf9f9a3 --- /dev/null +++ b/cpp/src/search/contains_table.cu @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include + +namespace cudf::detail { + +namespace { + +using cudf::experimental::row::lhs_index_type; +using cudf::experimental::row::rhs_index_type; + +} // namespace + +rmm::device_uvector contains(table_view const& haystack, + table_view const& needles, + null_equality compare_nulls, + nan_equality compare_nans, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + // Use a hash map with key type is row hash values and map value type is `lhs_index_type` to store + // all row indices of the haystack table. + using static_multimap = + cuco::static_multimap>, + cuco::double_hashing>; + + auto map = static_multimap(compute_hash_table_size(haystack.num_rows()), + cuco::sentinel::empty_key{std::numeric_limits::max()}, + cuco::sentinel::empty_value{lhs_index_type{detail::JoinNoneValue}}, + stream.value(), + detail::hash_table_allocator_type{default_allocator{}, stream}); + + auto const haystack_has_nulls = has_nested_nulls(haystack); + auto const needles_has_nulls = has_nested_nulls(needles); + + // Insert all row hash values and indices of the haystack table. + { + auto const hasher = cudf::experimental::row::hash::row_hasher(haystack, stream); + auto const d_hasher = hasher.device_hasher(nullate::DYNAMIC{haystack_has_nulls}); + + using make_pair_fn = make_pair_function; + + auto const haystack_it = cudf::detail::make_counting_transform_iterator( + size_type{0}, make_pair_fn{d_hasher, map.get_empty_key_sentinel()}); + + // If the haystack table has nulls but they are compared unequal, don't insert them. + // Otherwise, it was known to cause performance issue: + // - https://github.com/rapidsai/cudf/pull/6943 + // - https://github.com/rapidsai/cudf/pull/8277 + if (haystack_has_nulls && compare_nulls == null_equality::UNEQUAL) { + // Collect all nullable columns at all levels from the haystack table. + auto const haystack_nullable_columns = get_nullable_columns(haystack); + CUDF_EXPECTS(haystack_nullable_columns.size() > 0, + "Haystack table has nulls thus it should have nullable columns."); + + // If there are more than one nullable column, we compute bitmask_and of their null masks. + // Otherwise, we have only one nullable column and can use its null mask directly. + auto const row_bitmask = + haystack_nullable_columns.size() > 1 + ? cudf::detail::bitmask_and(table_view{haystack_nullable_columns}, stream).first + : rmm::device_buffer{0, stream}; + auto const row_bitmask_ptr = haystack_nullable_columns.size() > 1 + ? static_cast(row_bitmask.data()) + : haystack_nullable_columns.front().null_mask(); + + // Insert only rows that do not have any null at any level. + map.insert_if(haystack_it, + haystack_it + haystack.num_rows(), + thrust::counting_iterator(0), // stencil + row_is_valid{row_bitmask_ptr}, + stream.value()); + } else { + map.insert(haystack_it, haystack_it + haystack.num_rows(), stream.value()); + } + } + + // The output vector. + auto contained = rmm::device_uvector(needles.num_rows(), stream, mr); + + // Check existence for each row of the needles table in the haystack table. + { + auto const hasher = cudf::experimental::row::hash::row_hasher(needles, stream); + auto const d_hasher = hasher.device_hasher(nullate::DYNAMIC{needles_has_nulls}); + + auto const comparator = + cudf::experimental::row::equality::two_table_comparator(haystack, needles, stream); + + using make_pair_fn = make_pair_function; + + auto const needles_it = cudf::detail::make_counting_transform_iterator( + size_type{0}, make_pair_fn{d_hasher, map.get_empty_key_sentinel()}); + + auto const check_contains = [&](auto const value_comp) { + auto const d_eqcomp = comparator.equal_to( + nullate::DYNAMIC{needles_has_nulls || haystack_has_nulls}, compare_nulls, value_comp); + map.pair_contains(needles_it, + needles_it + needles.num_rows(), + contained.begin(), + pair_equality{d_eqcomp}, + stream.value()); + }; + + if (compare_nans == nan_equality::ALL_EQUAL) { + using nan_equal_comparator = + cudf::experimental::row::equality::nan_equal_physical_equality_comparator; + check_contains(nan_equal_comparator{}); + } else { + using nan_unequal_comparator = + cudf::experimental::row::equality::physical_equality_comparator; + check_contains(nan_unequal_comparator{}); + } + } + + return contained; +} + +} // namespace cudf::detail diff --git a/cpp/src/table/table_view.cpp b/cpp/src/table/table_view.cpp index a315da6faac..a413c8fe65b 100644 --- a/cpp/src/table/table_view.cpp +++ b/cpp/src/table/table_view.cpp @@ -98,6 +98,22 @@ table_view scatter_columns(table_view const& source, return table_view{updated_columns}; } +std::vector get_nullable_columns(table_view const& table) +{ + std::vector result; + for (auto const& col : table) { + if (col.nullable()) { result.push_back(col); } + for (auto it = col.child_begin(); it != col.child_end(); ++it) { + auto const& child = *it; + if (child.size() == col.size()) { + auto const child_result = get_nullable_columns(table_view{{child}}); + result.insert(result.end(), child_result.begin(), child_result.end()); + } + } + } + return result; +} + namespace detail { template