diff --git a/cpp/include/cudf/lists/contains.hpp b/cpp/include/cudf/lists/contains.hpp index 7cd40bb2f86..d529677d505 100644 --- a/cpp/include/cudf/lists/contains.hpp +++ b/cpp/include/cudf/lists/contains.hpp @@ -27,7 +27,7 @@ namespace lists { */ /** - * @brief Create a column of bool values indicating whether the specified scalar + * @brief Create a column of `bool` values indicating whether the specified scalar * is an element of each row of a list column. * * The output column has as many elements as the input `lists` column. @@ -51,7 +51,7 @@ std::unique_ptr contains( rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** - * @brief Create a column of bool values indicating whether the list rows of the first + * @brief Create a column of `bool` values indicating whether the list rows of the first * column contain the corresponding values in the second column * * The output column has as many elements as the input `lists` column. @@ -74,6 +74,104 @@ std::unique_ptr contains( cudf::column_view const& search_keys, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Create a column of `bool` values indicating whether each row in the `lists` column + * contains at least one null element. + * + * The output column has as many elements as the input `lists` column. + * Output `column[i]` is set to null the list row `lists[i]` is null. + * Otherwise, `column[i]` is set to a non-null boolean value, depending on whether that list + * contains a null element. + * (Empty list rows are considered *NOT* to contain a null element.) + * + * @param lists Lists column whose `n` rows are to be searched + * @param mr Device memory resource used to allocate the returned column's device memory. + * @return std::unique_ptr BOOL8 column of `n` rows with the result of the lookup + */ +std::unique_ptr contains_nulls( + cudf::lists_column_view const& lists, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Option to choose whether `index_of()` returns the first or last match + * of a search key in a list row + */ +enum class duplicate_find_option : int32_t { + FIND_FIRST = 0, ///< Finds first instance of a search key in a list row. + FIND_LAST ///< Finds last instance of a search key in a list row. +}; + +/** + * @brief Create a column of `size_type` values indicating the position of a search key + * within each list row in the `lists` column + * + * The output column has as many elements as there are rows in the input `lists` column. + * Output `column[i]` contains a 0-based index indicating the position of the search key + * in each list, counting from the beginning of the list. + * Note: + * 1. If the `search_key` is null, all output rows are set to null. + * 2. If the row `lists[i]` is null, `output[i]` is also null. + * 3. If the row `lists[i]` does not contain the `search_key`, `output[i]` is set to `-1`. + * 4. In all other cases, `output[i]` is set to a non-negative `size_type` index. + * + * If the `find_option` is set to `FIND_FIRST`, the position of the first match for + * `search_key` is returned. + * If `find_option == FIND_LAST`, the position of the last match in the list row is + * returned. + * + * @param lists Lists column whose `n` rows are to be searched + * @param search_key The scalar key to be looked up in each list row + * @param find_option Whether to return the position of the first match (`FIND_FIRST`) or + * last (`FIND_LAST`) + * @param mr Device memory resource used to allocate the returned column's device memory. + * @return std::unique_ptr INT32 column of `n` rows with the location of the `search_key` + * + * @throw cudf::logic_error If `search_key` type does not match the element type in `lists` + * @throw cudf::logic_error If `search_key` is of a nested type, or `lists` contains nested + * elements (LIST, STRUCT) + */ +std::unique_ptr index_of( + cudf::lists_column_view const& lists, + cudf::scalar const& search_key, + duplicate_find_option find_option = duplicate_find_option::FIND_FIRST, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + +/** + * @brief Create a column of `size_type` values indicating the position of a search key + * row within the corresponding list row in the `lists` column + * + * The output column has as many elements as there are rows in the input `lists` column. + * Output `column[i]` contains a 0-based index indicating the position of each search key + * row in its corresponding list row, counting from the beginning of the list. + * Note: + * 1. If `search_keys[i]` is null, `output[i]` is also null. + * 2. If the row `lists[i]` is null, `output[i]` is also null. + * 3. If the row `lists[i]` does not contain `search_key[i]`, `output[i]` is set to `-1`. + * 4. In all other cases, `output[i]` is set to a non-negative `size_type` index. + * + * If the `find_option` is set to `FIND_FIRST`, the position of the first match for + * `search_key` is returned. + * If `find_option == FIND_LAST`, the position of the last match in the list row is + * returned. + * + * @param lists Lists column whose `n` rows are to be searched + * @param search_keys A column of search keys to be looked up in each corresponding row of + * `lists` + * @param find_option Whether to return the position of the first match (`FIND_FIRST`) or + * last (`FIND_LAST`) + * @param mr Device memory resource used to allocate the returned column's device memory. + * @return std::unique_ptr INT32 column of `n` rows with the location of the `search_key` + * + * @throw cudf::logic_error If `search_keys` does not match `lists` in its number of rows + * @throw cudf::logic_error If `search_keys` type does not match the element type in `lists` + * @throw cudf::logic_error If `lists` or `search_keys` contains nested elements (LIST, STRUCT) + */ +std::unique_ptr index_of( + cudf::lists_column_view const& lists, + cudf::column_view const& search_keys, + duplicate_find_option find_option = duplicate_find_option::FIND_FIRST, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** @} */ // end of group } // namespace lists } // namespace cudf diff --git a/cpp/src/lists/contains.cu b/cpp/src/lists/contains.cu index 3d135992dea..5d095fdd5a3 100644 --- a/cpp/src/lists/contains.cu +++ b/cpp/src/lists/contains.cu @@ -35,6 +35,8 @@ namespace lists { namespace { +auto constexpr absent_index = size_type{-1}; + auto get_search_keys_device_iterable_view(cudf::column_view const& search_keys, rmm::cuda_stream_view stream) { @@ -46,6 +48,59 @@ auto get_search_keys_device_iterable_view(cudf::scalar const& search_key, rmm::c return &search_key; } +template +auto __device__ find_begin(list_device_view const& list) +{ + if constexpr (find_option == duplicate_find_option::FIND_FIRST) { + return list.pair_rep_begin(); + } else { + return thrust::make_reverse_iterator(list.pair_rep_end()); + } +} + +template +auto __device__ find_end(list_device_view const& list) +{ + if constexpr (find_option == duplicate_find_option::FIND_FIRST) { + return list.pair_rep_end(); + } else { + return thrust::make_reverse_iterator(list.pair_rep_begin()); + } +} + +template +size_type __device__ distance([[maybe_unused]] Iterator begin, Iterator end, Iterator find_iter) +{ + if (find_iter == end) { + return absent_index; // Not found. + } + + if constexpr (find_option == duplicate_find_option::FIND_FIRST) { + return find_iter - begin; // Distance of find_position from begin. + } else { + return end - find_iter - 1; // Distance of find_position from end. + } +} + +/** + * @brief __device__ functor to search for a key in a `list_device_view`. + */ +template +struct finder { + template + __device__ size_type operator()(list_device_view const& list, ElementType const& search_key) const + { + auto const list_begin = find_begin(list); + auto const list_end = find_end(list); + auto const find_iter = thrust::find_if( + thrust::seq, list_begin, list_end, [search_key] __device__(auto element_and_validity) { + auto [element, element_is_valid] = element_and_validity; + return element_is_valid && cudf::equality_compare(element, search_key); + }); + return distance(list_begin, list_end, find_iter); + }; +}; + /** * @brief Functor to search each list row for the specified search keys. */ @@ -63,13 +118,15 @@ struct lookup_functor { Args&&...) const { CUDF_FAIL( - "lists::contains() is only supported on numeric types, decimals, chrono types, and strings."); + "List search operations are only supported on numeric types, decimals, chrono types, and " + "strings."); } - std::pair construct_null_mask(lists_column_view const& input_lists, - column_view const& result_validity, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + std::pair construct_null_mask( + lists_column_view const& input_lists, + column_view const& result_validity, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const { if (!search_keys_have_nulls && !input_lists.has_nulls() && !input_lists.child().has_nulls()) { return {rmm::device_buffer{0, stream, mr}, size_type{0}}; @@ -82,50 +139,31 @@ struct lookup_functor { template void search_each_list_row(cudf::detail::lists_column_device_view const& d_lists, SearchKeyPairIter search_key_pair_iter, - cudf::mutable_column_device_view mutable_ret_bools, - cudf::mutable_column_device_view mutable_ret_validity, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource*) + duplicate_find_option find_option, + cudf::mutable_column_device_view ret_positions, + cudf::mutable_column_device_view ret_validity, + rmm::cuda_stream_view stream) const { - thrust::for_each( + auto output_iterator = thrust::make_zip_iterator( + thrust::make_tuple(ret_positions.data(), ret_validity.data())); + + thrust::tabulate( rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(d_lists.size()), - [d_lists, - search_key_pair_iter, - d_bools = mutable_ret_bools.data(), - d_validity = mutable_ret_validity.data()] __device__(auto row_index) { - auto search_key_and_validity = search_key_pair_iter[row_index]; - auto const& search_key_is_valid = search_key_and_validity.second; - - if (search_keys_have_nulls && !search_key_is_valid) { - d_bools[row_index] = false; - d_validity[row_index] = false; - return; - } + output_iterator, + output_iterator + d_lists.size(), + [d_lists, search_key_pair_iter, absent_index = absent_index, find_option] __device__( + auto row_index) -> thrust::pair { + auto [search_key, search_key_is_valid] = search_key_pair_iter[row_index]; + + if (search_keys_have_nulls && !search_key_is_valid) { return {absent_index, false}; } auto list = cudf::list_device_view(d_lists, row_index); - if (list.is_null()) { - d_bools[row_index] = false; - d_validity[row_index] = false; - return; - } - - auto search_key = search_key_and_validity.first; - d_bools[row_index] = - thrust::find_if(thrust::seq, - list.pair_rep_begin(), - list.pair_rep_end(), - [search_key] __device__(auto element_and_validity) { - return element_and_validity.second && - cudf::equality_compare(element_and_validity.first, search_key); - }) != list.pair_rep_end(); - d_validity[row_index] = - d_bools[row_index] || - thrust::none_of(thrust::seq, - thrust::make_counting_iterator(size_type{0}), - thrust::make_counting_iterator(list.size()), - [&list] __device__(auto const& i) { return list.is_null(i); }); + if (list.is_null()) { return {absent_index, false}; } + + auto const position = find_option == duplicate_find_option::FIND_FIRST + ? finder{}(list, search_key) + : finder{}(list, search_key); + return {position, true}; }); } @@ -133,74 +171,171 @@ struct lookup_functor { std::enable_if_t::value, std::unique_ptr> operator()( cudf::lists_column_view const& lists, SearchKeyType const& search_key, + duplicate_find_option find_option, rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const { using namespace cudf; using namespace cudf::detail; CUDF_EXPECTS(!cudf::is_nested(lists.child().type()), - "Nested types not supported in lists::contains()"); + "Nested types not supported in list search operations."); CUDF_EXPECTS(lists.child().type() == search_key.type(), "Type/Scale of search key does not match list column element type."); CUDF_EXPECTS(search_key.type().id() != type_id::EMPTY, "Type cannot be empty."); auto constexpr search_key_is_scalar = std::is_same_v; - if (search_keys_have_nulls && search_key_is_scalar) { - return make_fixed_width_column(data_type(type_id::BOOL8), - lists.size(), - cudf::create_null_mask(lists.size(), mask_state::ALL_NULL, mr), - lists.size(), - stream, - mr); + if constexpr (search_keys_have_nulls && search_key_is_scalar) { + return make_numeric_column(data_type(type_id::INT32), + lists.size(), + cudf::create_null_mask(lists.size(), mask_state::ALL_NULL, mr), + lists.size(), + stream, + mr); } auto const device_view = column_device_view::create(lists.parent(), stream); - auto const d_lists = lists_column_device_view(*device_view); + auto const d_lists = lists_column_device_view{*device_view}; auto const d_skeys = get_search_keys_device_iterable_view(search_key, stream); - auto result_validity = make_fixed_width_column( + auto result_positions = make_numeric_column( + data_type{type_id::INT32}, lists.size(), cudf::mask_state::UNALLOCATED, stream, mr); + auto result_validity = make_numeric_column( data_type{type_id::BOOL8}, lists.size(), cudf::mask_state::UNALLOCATED, stream, mr); - auto result_bools = make_fixed_width_column( - data_type{type_id::BOOL8}, lists.size(), cudf::mask_state::UNALLOCATED, stream, mr); - auto mutable_result_bools = - mutable_column_device_view::create(result_bools->mutable_view(), stream); + auto mutable_result_positions = + mutable_column_device_view::create(result_positions->mutable_view(), stream); auto mutable_result_validity = mutable_column_device_view::create(result_validity->mutable_view(), stream); auto search_key_iter = cudf::detail::make_pair_rep_iterator(*d_skeys); - search_each_list_row( - d_lists, search_key_iter, *mutable_result_bools, *mutable_result_validity, stream, mr); - - rmm::device_buffer null_mask; - size_type num_nulls; + search_each_list_row(d_lists, + search_key_iter, + find_option, + *mutable_result_positions, + *mutable_result_validity, + stream); - std::tie(null_mask, num_nulls) = - construct_null_mask(lists, result_validity->view(), stream, mr); - result_bools->set_null_mask(std::move(null_mask), num_nulls); - - return result_bools; + auto [null_mask, num_nulls] = construct_null_mask(lists, result_validity->view(), stream, mr); + result_positions->set_null_mask(std::move(null_mask), num_nulls); + return result_positions; } }; +/** + * @brief Converts key-positions vector (from index_of()) to a BOOL8 vector, indicating if + * the search key was found. + */ +std::unique_ptr to_contains(std::unique_ptr&& key_positions, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + CUDF_EXPECTS(key_positions->type().id() == type_id::INT32, + "Expected input column of type INT32."); + // If position == -1, the list did not contain the search key. + auto const num_rows = key_positions->size(); + auto const positions_begin = key_positions->view().begin(); + auto result = + make_numeric_column(data_type{type_id::BOOL8}, num_rows, mask_state::UNALLOCATED, stream, mr); + thrust::transform(rmm::exec_policy(stream), + positions_begin, + positions_begin + num_rows, + result->mutable_view().begin(), + [] __device__(auto i) { return i != absent_index; }); + [[maybe_unused]] auto [_, null_mask, __] = key_positions->release(); + result->set_null_mask(std::move(*null_mask)); + return result; +} } // namespace namespace detail { +/** + * @copydoc cudf::lists::index_of(cudf::lists_column_view const&, + * cudf::scalar const&, + * duplicate_find_option, + * rmm::mr::device_memory_resource*) + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +std::unique_ptr index_of( + cudf::lists_column_view const& lists, + cudf::scalar const& search_key, + duplicate_find_option find_option, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +{ + return search_key.is_valid(stream) + ? cudf::type_dispatcher(search_key.type(), + lookup_functor{}, // No nulls in search key + lists, + search_key, + find_option, + stream, + mr) + : cudf::type_dispatcher(search_key.type(), + lookup_functor{}, // Nulls in search key + lists, + search_key, + find_option, + stream, + mr); +} + +/** + * @copydoc cudf::lists::index_of(cudf::lists_column_view const&, + * cudf::column_view const&, + * duplicate_find_option, + * rmm::mr::device_memory_resource*) + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +std::unique_ptr index_of( + cudf::lists_column_view const& lists, + cudf::column_view const& search_keys, + duplicate_find_option find_option, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) +{ + CUDF_EXPECTS(search_keys.size() == lists.size(), + "Number of search keys must match list column size."); + + return search_keys.has_nulls() + ? cudf::type_dispatcher(search_keys.type(), + lookup_functor{}, // Nulls in search keys + lists, + search_keys, + find_option, + stream, + mr) + : cudf::type_dispatcher(search_keys.type(), + lookup_functor{}, // No nulls in search keys + lists, + search_keys, + find_option, + stream, + mr); +} +/** + * @copydoc cudf::lists::contains(cudf::lists_column_view const&, + * cudf::scalar const&, + * rmm::mr::device_memory_resource*) + * @param stream CUDA stream used for device memory operations and kernel launches. + */ std::unique_ptr contains(cudf::lists_column_view const& lists, cudf::scalar const& search_key, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - return search_key.is_valid(stream) - ? cudf::type_dispatcher( - search_key.type(), lookup_functor{}, lists, search_key, stream, mr) - : cudf::type_dispatcher( - search_key.type(), lookup_functor{}, lists, search_key, stream, mr); + return to_contains( + index_of(lists, search_key, duplicate_find_option::FIND_FIRST, stream), stream, mr); } +/** + * @copydoc cudf::lists::contains(cudf::lists_column_view const&, + * cudf::column_view const&, + * rmm::mr::device_memory_resource*) + * @param stream CUDA stream used for device memory operations and kernel launches. + */ std::unique_ptr contains(cudf::lists_column_view const& lists, cudf::column_view const& search_keys, rmm::cuda_stream_view stream, @@ -209,11 +344,44 @@ std::unique_ptr contains(cudf::lists_column_view const& lists, CUDF_EXPECTS(search_keys.size() == lists.size(), "Number of search keys must match list column size."); - return search_keys.has_nulls() - ? cudf::type_dispatcher( - search_keys.type(), lookup_functor{}, lists, search_keys, stream, mr) - : cudf::type_dispatcher( - search_keys.type(), lookup_functor{}, lists, search_keys, stream, mr); + return to_contains( + index_of(lists, search_keys, duplicate_find_option::FIND_FIRST, stream), stream, mr); +} + +/** + * @copydoc cudf::lists::contain_nulls(cudf::lists_column_view const&, + * rmm::mr::device_memory_resource*) + * @param stream CUDA stream used for device memory operations and kernel launches. + */ +std::unique_ptr contains_nulls(cudf::lists_column_view const& input_lists, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + auto const num_rows = input_lists.size(); + auto const d_lists = column_device_view::create(input_lists.parent()); + auto has_nulls_output = make_numeric_column( + data_type{type_id::BOOL8}, input_lists.size(), mask_state::UNALLOCATED, stream, mr); + auto const output_begin = has_nulls_output->mutable_view().begin(); + thrust::tabulate( + rmm::exec_policy(stream), + output_begin, + output_begin + num_rows, + [lists = cudf::detail::lists_column_device_view{*d_lists}] __device__(auto list_idx) { + auto list = list_device_view{lists, list_idx}; + auto list_begin = thrust::make_counting_iterator(size_type{0}); + return list.is_null() || + thrust::any_of(thrust::seq, list_begin, list_begin + list.size(), [&list](auto i) { + return list.is_null(i); + }); + }); + auto const validity_begin = cudf::detail::make_counting_transform_iterator( + 0, [lists = cudf::detail::lists_column_device_view{*d_lists}] __device__(auto list_idx) { + return not list_device_view{lists, list_idx}.is_null(); + }); + auto [null_mask, num_nulls] = cudf::detail::valid_if( + validity_begin, validity_begin + num_rows, thrust::identity{}, stream, mr); + has_nulls_output->set_null_mask(std::move(null_mask), num_nulls); + return has_nulls_output; } } // namespace detail @@ -234,5 +402,30 @@ std::unique_ptr contains(cudf::lists_column_view const& lists, return detail::contains(lists, search_keys, rmm::cuda_stream_default, mr); } +std::unique_ptr contains_nulls(cudf::lists_column_view const& input_lists, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::contains_nulls(input_lists, rmm::cuda_stream_default, mr); +} + +std::unique_ptr index_of(cudf::lists_column_view const& lists, + cudf::scalar const& search_key, + duplicate_find_option find_option, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::index_of(lists, search_key, find_option, rmm::cuda_stream_default, mr); +} + +std::unique_ptr index_of(cudf::lists_column_view const& lists, + cudf::column_view const& search_keys, + duplicate_find_option find_option, + rmm::mr::device_memory_resource* mr) +{ + CUDF_FUNC_RANGE(); + return detail::index_of(lists, search_keys, find_option, rmm::cuda_stream_default, mr); +} + } // namespace lists } // namespace cudf diff --git a/cpp/tests/lists/contains_tests.cpp b/cpp/tests/lists/contains_tests.cpp index 5d7e218898c..066eb7eafc8 100644 --- a/cpp/tests/lists/contains_tests.cpp +++ b/cpp/tests/lists/contains_tests.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include namespace cudf { @@ -42,6 +43,12 @@ struct TypedContainsTest : public ContainsTest { TYPED_TEST_SUITE(TypedContainsTest, ContainsTestTypes); namespace { + +auto constexpr x = int32_t{-1}; // Placeholder for nulls. +auto constexpr absent = size_type{-1}; // Index when key is not found in a list. +auto constexpr FIND_FIRST = lists::duplicate_find_option::FIND_FIRST; +auto constexpr FIND_LAST = lists::duplicate_find_option::FIND_LAST; + template (), void>* = nullptr> auto create_scalar_search_key(T const& value) { @@ -101,238 +108,381 @@ auto create_null_search_key() } // namespace -TYPED_TEST(TypedContainsTest, ListContainsScalarWithNoNulls) +using iterators::all_nulls; +using iterators::null_at; +using iterators::nulls_at; +using bools = fixed_width_column_wrapper; +using indices = fixed_width_column_wrapper; + +TYPED_TEST(TypedContainsTest, ScalarKeyWithNoNulls) { using T = TypeParam; - auto search_space = lists_column_wrapper{ - {0, 1, 2}, - {3, 4, 5}, - {6, 7, 8}, - {9, 0, 1}, - {2, 3, 4}, - {5, 6, 7}, - {8, 9, 0}, - {}, - {1, 2, 3}, - {}}.release(); - auto search_key_one = create_scalar_search_key(1); - auto actual_result = lists::contains(search_space->view(), *search_key_one); - auto expected_result = fixed_width_column_wrapper{1, 0, 0, 1, 0, 0, 0, 0, 1, 0}; + auto search_space = lists_column_view{lists_column_wrapper{{0, 1, 2, 1}, + {3, 4, 5}, + {6, 7, 8}, + {9, 0, 1, 3, 1}, + {2, 3, 4}, + {5, 6, 7}, + {8, 9, 0}, + {}, + {1, 2, 1, 3}, + {}}}; + auto search_key_one = create_scalar_search_key(1); - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result); + { + // CONTAINS + auto result = lists::contains(search_space, *search_key_one); + auto expected = bools{1, 0, 0, 1, 0, 0, 0, 0, 1, 0}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // CONTAINS NULLS + auto result = lists::contains_nulls(search_space); + auto expected = bools{0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST + auto result = lists::index_of(search_space, *search_key_one, FIND_FIRST); + auto expected = indices{1, absent, absent, 2, absent, absent, absent, absent, 0, absent}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST + auto result = lists::index_of(search_space, *search_key_one, FIND_LAST); + auto expected = indices{3, absent, absent, 4, absent, absent, absent, absent, 2, absent}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } } -TYPED_TEST(TypedContainsTest, ListContainsScalarWithNullLists) +TYPED_TEST(TypedContainsTest, ScalarKeyWithNullLists) { // Test List columns that have NULL list rows. - using T = TypeParam; - auto search_space = lists_column_wrapper{ - {{0, 1, 2}, - {3, 4, 5}, - {6, 7, 8}, - {}, - {9, 0, 1}, - {2, 3, 4}, - {5, 6, 7}, - {8, 9, 0}, - {}, - {1, 2, 3}, - {}}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { - return (i != 3) && (i != 10); - })}.release(); - + auto search_space = lists_column_view{lists_column_wrapper{{{0, 1, 2, 1}, + {3, 4, 5}, + {6, 7, 8}, + {}, + {9, 0, 1, 3, 1}, + {2, 3, 4}, + {5, 6, 7}, + {8, 9, 0}, + {}, + {1, 2, 2, 3}, + {}}, + nulls_at({3, 10})}}; auto search_key_one = create_scalar_search_key(1); - auto actual_result = lists::contains(search_space->view(), *search_key_one); - auto expected_result = - fixed_width_column_wrapper{{1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0}, - cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return (i != 3) && (i != 10); })}; - - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result); + { + // CONTAINS + auto result = lists::contains(search_space, *search_key_one); + auto expected = bools{{1, 0, 0, x, 1, 0, 0, 0, 0, 1, x}, nulls_at({3, 10})}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // CONTAINS NULLS + auto result = lists::contains_nulls(search_space); + auto expected = bools{{0, 0, 0, x, 0, 0, 0, 0, 0, 0, x}, nulls_at({3, 10})}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST + auto result = lists::index_of(search_space, *search_key_one, FIND_FIRST); + auto expected = + indices{{1, absent, absent, x, 2, absent, absent, absent, absent, 0, x}, nulls_at({3, 10})}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST + auto result = lists::index_of(search_space, *search_key_one, FIND_LAST); + auto expected = + indices{{3, absent, absent, x, 4, absent, absent, absent, absent, 0, x}, nulls_at({3, 10})}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } } TYPED_TEST(TypedContainsTest, SlicedLists) { // Test sliced List columns. - using namespace cudf; + using T = TypeParam; - using T = TypeParam; - using bools = fixed_width_column_wrapper; - - auto search_space = lists_column_wrapper{ - {{0, 1, 2}, - {3, 4, 5}, - {6, 7, 8}, - {}, - {9, 0, 1}, - {2, 3, 4}, - {5, 6, 7}, - {8, 9, 0}, - {}, - {1, 2, 3}, - {}}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { - return (i != 3) && (i != 10); - })}.release(); - - auto sliced_column_1 = cudf::detail::slice(search_space->view(), {1, 8}).front(); - - auto search_key_one = create_scalar_search_key(1); - auto result_1 = lists::contains(sliced_column_1, *search_key_one); - - auto expected_result_1 = bools{ - {0, 0, 0, 1, 0, 0, 0}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { - return (i != 2); - })}.release(); - - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result_1->view(), result_1->view()); - - auto sliced_column_2 = cudf::detail::slice(search_space->view(), {3, 10}).front(); - - auto result_2 = lists::contains(sliced_column_2, *search_key_one); + auto search_space = lists_column_wrapper{{{0, 1, 2, 1}, + {3, 4, 5}, + {6, 7, 8}, + {}, + {9, 0, 1, 3, 1}, + {2, 3, 4}, + {5, 6, 7}, + {8, 9, 0}, + {}, + {1, 2, 1, 3}, + {}}, + nulls_at({3, 10})}; - auto expected_result_2 = bools{ - {0, 1, 0, 0, 0, 0, 1}, cudf::detail::make_counting_transform_iterator(0, [](auto i) { - return (i != 0); - })}.release(); + { + // First Slice. + auto sliced_column_1 = cudf::detail::slice(search_space, {1, 8}).front(); + auto search_key_one = create_scalar_search_key(1); + { + // CONTAINS + auto result = lists::contains(sliced_column_1, *search_key_one); + auto expected_result = bools{{0, 0, x, 1, 0, 0, 0}, null_at(2)}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view()); + } + { + // CONTAINS NULLS + auto result = lists::contains_nulls(sliced_column_1); + auto expected_result = bools{{0, 0, x, 0, 0, 0, 0}, null_at(2)}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view()); + } + { + // FIND_FIRST + auto result = lists::index_of(sliced_column_1, *search_key_one, FIND_FIRST); + auto expected_result = indices{{absent, absent, 0, 2, absent, absent, absent}, null_at(2)}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view()); + } + { + // FIND_LAST + auto result = lists::index_of(sliced_column_1, *search_key_one, FIND_LAST); + auto expected_result = indices{{absent, absent, 0, 4, absent, absent, absent}, null_at(2)}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view()); + } + } - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result_2->view(), result_2->view()); + { + // Second Slice. + auto sliced_column_2 = cudf::detail::slice(search_space, {3, 10}).front(); + auto search_key_one = create_scalar_search_key(1); + { + // CONTAINS + auto result = lists::contains(sliced_column_2, *search_key_one); + auto expected_result = bools{{x, 1, 0, 0, 0, 0, 1}, null_at(0)}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view()); + } + { + // CONTAINS NULLS + auto result = lists::contains_nulls(sliced_column_2); + auto expected_result = bools{{x, 0, 0, 0, 0, 0, 0}, null_at(0)}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view()); + } + { + // FIND_FIRST + auto result = lists::index_of(sliced_column_2, *search_key_one, FIND_FIRST); + auto expected_result = indices{{0, 2, absent, absent, absent, absent, 0}, null_at(0)}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view()); + } + { + // FIND_LAST + auto result = lists::index_of(sliced_column_2, *search_key_one, FIND_LAST); + auto expected_result = indices{{0, 4, absent, absent, absent, absent, 2}, null_at(0)}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view()); + } + } } -TYPED_TEST(TypedContainsTest, ListContainsScalarNonNullListsWithNullValues) +TYPED_TEST(TypedContainsTest, ScalarKeyNonNullListsWithNullValues) { // Test List columns that have no NULL list rows, but NULL elements in some list rows. using T = TypeParam; - auto numerals = fixed_width_column_wrapper{ - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) -> bool { return i % 3; })}; - - auto search_space = - make_lists_column(8, - fixed_width_column_wrapper{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), - numerals.release(), - 0, - {}); - + auto numerals = fixed_width_column_wrapper{{x, 1, 2, x, 4, 5, x, 7, 8, x, x, 1, 2, x, 1}, + nulls_at({0, 3, 6, 9, 10, 13})}; + auto search_space = make_lists_column( + 8, indices{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), numerals.release(), 0, {}); + // Search space: [ [x], [1,2], [x,4,5,x], [], [], [7,8,x], [x], [1,2,x,1] ] auto search_key_one = create_scalar_search_key(1); - auto actual_result = lists::contains(search_space->view(), *search_key_one); - auto expected_result = - fixed_width_column_wrapper{{0, 1, 0, 0, 0, 0, 0, 1}, {0, 1, 0, 1, 1, 0, 1, 1}}; - - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result); + { + // CONTAINS + auto result = lists::contains(search_space->view(), *search_key_one); + auto expected = bools{0, 1, 0, 0, 0, 0, 0, 1}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // CONTAINS NULLS + auto result = lists::contains_nulls(search_space->view()); + auto expected = bools{1, 0, 1, 0, 0, 1, 1, 1}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST + auto result = lists::index_of(search_space->view(), *search_key_one, FIND_FIRST); + auto expected = indices{absent, 0, absent, absent, absent, absent, absent, 0}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST + auto result = lists::index_of(search_space->view(), *search_key_one, FIND_LAST); + auto expected = indices{absent, 0, absent, absent, absent, absent, absent, 3}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } } -TYPED_TEST(TypedContainsTest, ListContainsScalarWithNullsInLists) +TYPED_TEST(TypedContainsTest, ScalarKeysWithNullsInLists) { using T = TypeParam; - auto numerals = fixed_width_column_wrapper{ - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) -> bool { return i % 3; })}; - - auto input_null_mask_iter = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; }); + auto numerals = fixed_width_column_wrapper{{x, 1, 2, x, 4, 5, x, 7, 8, x, x, 1, 2, x, 1}, + nulls_at({0, 3, 6, 9, 10, 13})}; + auto input_null_mask_iter = null_at(4); auto search_space = make_lists_column( 8, - fixed_width_column_wrapper{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), + indices{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), numerals.release(), 1, cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8)); + // Search space: [ [x], [1,2], [x,4,5,x], [], x, [7,8,x], [x], [1,2,x,1] ] auto search_key_one = create_scalar_search_key(1); - auto actual_result = lists::contains(search_space->view(), *search_key_one); - auto expected_result = - fixed_width_column_wrapper{{0, 1, 0, 0, 0, 0, 0, 1}, {0, 1, 0, 1, 0, 0, 1, 1}}; - - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result); + { + // CONTAINS. + auto result = lists::contains(search_space->view(), *search_key_one); + auto expected = bools{{0, 1, 0, 0, x, 0, 0, 1}, null_at(4)}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // CONTAINS NULLS. + auto result = lists::contains_nulls(search_space->view()); + auto expected = bools{{1, 0, 1, 0, x, 1, 1, 1}, null_at(4)}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST. + auto result = lists::index_of(search_space->view(), *search_key_one, FIND_FIRST); + auto expected = indices{{absent, 0, absent, absent, x, absent, absent, 0}, null_at(4)}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST. + auto result = lists::index_of(search_space->view(), *search_key_one, FIND_LAST); + auto expected = indices{{absent, 0, absent, absent, x, absent, absent, 3}, null_at(4)}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } } -TEST_F(ContainsTest, BoolListContainsScalarWithNullsInLists) +TEST_F(ContainsTest, BoolScalarWithNullsInLists) { using T = bool; - auto numerals = fixed_width_column_wrapper{ - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) -> bool { return i % 3; })}; - - auto input_null_mask_iter = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; }); - - auto search_space = make_lists_column( + auto numerals = fixed_width_column_wrapper{{x, 1, 1, x, 1, 1, x, 1, 1, x, x, 1, 1, x, 1}, + nulls_at({0, 3, 6, 9, 10, 13})}; + auto input_null_mask_iter = null_at(4); + auto search_space = make_lists_column( 8, fixed_width_column_wrapper{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), numerals.release(), 1, cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8)); + // Search space: [ [x], [1,1], [x,1,1,x], [], x, [1,1,x], [x], [1,1,x,1] ] auto search_key_one = create_scalar_search_key(1); - auto actual_result = lists::contains(search_space->view(), *search_key_one); - auto expected_result = - fixed_width_column_wrapper{{0, 1, 1, 0, 0, 1, 0, 1}, {0, 1, 1, 1, 0, 1, 1, 1}}; - - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result); + { + // CONTAINS + auto result = lists::contains(search_space->view(), *search_key_one); + auto expected = bools{{0, 1, 1, 0, x, 1, 0, 1}, null_at(4)}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // CONTAINS NULLS + auto result = lists::contains_nulls(search_space->view()); + auto expected = bools{{1, 0, 1, 0, x, 1, 1, 1}, null_at(4)}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST. + auto result = lists::index_of(search_space->view(), *search_key_one, FIND_FIRST); + auto expected = indices{{absent, 0, 1, absent, x, 0, absent, 0}, null_at(4)}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST. + auto result = lists::index_of(search_space->view(), *search_key_one, FIND_LAST); + auto expected = indices{{absent, 1, 2, absent, x, 1, absent, 3}, null_at(4)}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } } -TEST_F(ContainsTest, StringListContainsScalarWithNullsInLists) +TEST_F(ContainsTest, StringScalarWithNullsInLists) { using T = std::string; auto strings = strings_column_wrapper{ - {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "0", "1", "2", "3", "4"}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) -> bool { return i % 3; })}; - - auto input_null_mask_iter = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; }); - - auto search_space = make_lists_column( + {"X", "1", "2", "X", "4", "5", "X", "7", "8", "X", "X", "1", "2", "X", "1"}, + nulls_at({0, 3, 6, 9, 10, 13})}; + auto input_null_mask_iter = null_at(4); + auto search_space = make_lists_column( 8, - fixed_width_column_wrapper{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), + indices{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), strings.release(), 1, cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8)); + // Search space: [ [x], [1,2], [x,4,5,x], [], x, [7,8,x], [x], [1,2,x,1] ] auto search_key_one = create_scalar_search_key("1"); - auto actual_result = lists::contains(search_space->view(), *search_key_one); - auto expected_result = - fixed_width_column_wrapper{{0, 1, 0, 0, 0, 0, 0, 1}, {0, 1, 0, 1, 0, 0, 1, 1}}; - - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result); + { + // CONTAINS + auto result = lists::contains(search_space->view(), *search_key_one); + auto expected = bools{{0, 1, 0, 0, x, 0, 0, 1}, null_at(4)}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // CONTAINS NULLS + auto result = lists::contains_nulls(search_space->view()); + auto expected = bools{{1, 0, 1, 0, x, 1, 1, 1}, null_at(4)}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST. + auto result = lists::index_of(search_space->view(), *search_key_one, FIND_FIRST); + auto expected = indices{{absent, 0, absent, absent, x, absent, absent, 0}, null_at(4)}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST. + auto result = lists::index_of(search_space->view(), *search_key_one, FIND_LAST); + auto expected = indices{{absent, 0, absent, absent, x, absent, absent, 3}, null_at(4)}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } } -TYPED_TEST(TypedContainsTest, ContainsScalarNullSearchKey) +TYPED_TEST(TypedContainsTest, ScalarNullSearchKey) { using T = TypeParam; - auto search_space = lists_column_wrapper{ - {{0, 1, 2}, - {3, 4, 5}, - {6, 7, 8}, - {}, - {9, 0, 1}, - {2, 3, 4}, - {5, 6, 7}, - {8, 9, 0}, - {}, - {1, 2, 3}, - {}}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { - return (i != 3) && (i != 10); - })}.release(); - + auto search_space = lists_column_wrapper{{{0, 1, 2}, + {3, 4, 5}, + {6, 7, 8}, + {}, + {9, 0, 1}, + {2, 3, 4}, + {5, 6, 7}, + {8, 9, 0}, + {}, + {1, 2, 3}, + {}}, + nulls_at({3, 10})} + .release(); auto search_key_null = create_null_search_key(); - auto actual_result = lists::contains(search_space->view(), *search_key_null); - auto expected_result = fixed_width_column_wrapper{ - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return false; })}; - - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result); + { + // CONTAINS + auto result = lists::contains(search_space->view(), *search_key_null); + auto expected = bools{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, all_nulls()}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST + auto result = lists::index_of(search_space->view(), *search_key_null, FIND_FIRST); + auto expected = indices{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, all_nulls()}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST + auto result = lists::index_of(search_space->view(), *search_key_null, FIND_LAST); + auto expected = indices{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, all_nulls()}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } } TEST_F(ContainsTest, ScalarTypeRelatedExceptions) @@ -346,9 +496,12 @@ TEST_F(ContainsTest, ScalarTypeRelatedExceptions) {4, 5, 6}}}.release(); auto skey = create_scalar_search_key(10); CUDF_EXPECT_THROW_MESSAGE(lists::contains(list_of_lists->view(), *skey), - "Nested types not supported in lists::contains()"); + "Nested types not supported in list search operations."); + CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_lists->view(), *skey, FIND_FIRST), + "Nested types not supported in list search operations."); + CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_lists->view(), *skey, FIND_LAST), + "Nested types not supported in list search operations."); } - { // Search key must match list elements in type. auto list_of_ints = @@ -360,6 +513,10 @@ TEST_F(ContainsTest, ScalarTypeRelatedExceptions) auto skey = create_scalar_search_key("Hello, World!"); CUDF_EXPECT_THROW_MESSAGE(lists::contains(list_of_ints->view(), *skey), "Type/Scale of search key does not match list column element type."); + CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_ints->view(), *skey, FIND_FIRST), + "Type/Scale of search key does not match list column element type."); + CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_ints->view(), *skey, FIND_LAST), + "Type/Scale of search key does not match list column element type."); } } @@ -367,199 +524,275 @@ template struct TypedVectorContainsTest : public ContainsTest { }; -using VectorContainsTestTypes = +using VectorTestTypes = cudf::test::Concat; -TYPED_TEST_SUITE(TypedVectorContainsTest, VectorContainsTestTypes); +TYPED_TEST_SUITE(TypedVectorContainsTest, VectorTestTypes); -TYPED_TEST(TypedVectorContainsTest, ListContainsVectorWithNoNulls) +TYPED_TEST(TypedVectorContainsTest, VectorKeysWithNoNulls) { using T = TypeParam; auto search_space = lists_column_wrapper{ - {0, 1, 2}, + {0, 1, 2, 1}, {3, 4, 5}, {6, 7, 8}, - {9, 0, 1}, + {9, 0, 1, 3, 1}, {2, 3, 4}, {5, 6, 7}, {8, 9, 0}, {}, - {1, 2, 3}, + {1, 2, 3, 3}, {}}.release(); - auto search_key = fixed_width_column_wrapper{1, 2, 3, 1, 2, 3, 1, 2, 3, 1}; - auto actual_result = lists::contains(search_space->view(), search_key); - auto expected_result = fixed_width_column_wrapper{1, 0, 0, 1, 1, 0, 0, 0, 1, 0}; - - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result); + auto search_key = fixed_width_column_wrapper{1, 2, 3, 1, 2, 3, 1, 2, 3, 1}; + { + // CONTAINS + auto result = lists::contains(search_space->view(), search_key); + auto expected = bools{1, 0, 0, 1, 1, 0, 0, 0, 1, 0}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST + auto result = lists::index_of(search_space->view(), search_key, FIND_FIRST); + auto expected = indices{1, absent, absent, 2, 0, absent, absent, absent, 2, absent}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST + auto result = lists::index_of(search_space->view(), search_key, FIND_LAST); + auto expected = indices{3, absent, absent, 4, 0, absent, absent, absent, 3, absent}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } } -TYPED_TEST(TypedVectorContainsTest, ListContainsVectorWithNullLists) +TYPED_TEST(TypedVectorContainsTest, VectorWithNullLists) { // Test List columns that have NULL list rows. using T = TypeParam; - auto search_space = lists_column_wrapper{ - {{0, 1, 2}, - {3, 4, 5}, - {6, 7, 8}, - {}, - {9, 0, 1}, - {2, 3, 4}, - {5, 6, 7}, - {8, 9, 0}, - {}, - {1, 2, 3}, - {}}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { - return (i != 3) && (i != 10); - })}.release(); - - auto search_keys = fixed_width_column_wrapper{1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2}; - auto actual_result = lists::contains(search_space->view(), search_keys); - auto expected_result = - fixed_width_column_wrapper{{1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0}, - cudf::detail::make_counting_transform_iterator( - 0, [](auto i) { return (i != 3) && (i != 10); })}; - - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result); + auto search_space = lists_column_wrapper{{{0, 1, 2, 1}, + {3, 4, 5}, + {6, 7, 8}, + {}, + {9, 0, 1, 3, 1}, + {2, 3, 4}, + {5, 6, 7}, + {8, 9, 0}, + {}, + {1, 2, 3, 3}, + {}}, + nulls_at({3, 10})} + .release(); + + auto search_keys = fixed_width_column_wrapper{1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2}; + + { + // CONTAINS + auto result = lists::contains(search_space->view(), search_keys); + auto expected = bools{{1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0}, nulls_at({3, 10})}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST + auto result = lists::index_of(search_space->view(), search_keys, FIND_FIRST); + auto expected = + indices{{1, absent, absent, x, absent, 1, absent, absent, absent, 0, x}, nulls_at({3, 10})}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST + auto result = lists::index_of(search_space->view(), search_keys, FIND_LAST); + auto expected = + indices{{3, absent, absent, x, absent, 1, absent, absent, absent, 0, x}, nulls_at({3, 10})}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } } -TYPED_TEST(TypedVectorContainsTest, ListContainsVectorNonNullListsWithNullValues) +TYPED_TEST(TypedVectorContainsTest, VectorNonNullListsWithNullValues) { // Test List columns that have no NULL list rows, but NULL elements in some list rows. using T = TypeParam; - auto numerals = fixed_width_column_wrapper{ - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) -> bool { return i % 3; })}; - - auto search_space = - make_lists_column(8, - fixed_width_column_wrapper{0, 1, 3, 7, 7, 7, 10, 12, 15}.release(), - numerals.release(), - 0, - {}); - - auto search_keys = fixed_width_column_wrapper{1, 2, 3, 1, 2, 3, 1, 3}; - auto actual_result = lists::contains(search_space->view(), search_keys); - auto expected_result = - fixed_width_column_wrapper{{0, 1, 0, 0, 0, 0, 1, 1}, {0, 1, 0, 1, 1, 0, 1, 1}}; + auto numerals = fixed_width_column_wrapper{{x, 1, 2, x, 4, 5, x, 7, 8, x, x, 1, 2, x, 1}, + nulls_at({0, 3, 6, 9, 10, 13})}; - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result); + auto search_space = make_lists_column( + 8, indices{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), numerals.release(), 0, {}); + // Search space: [ [x], [1,2], [x,4,5,x], [], [], [7,8,x], [x], [1,2,x,1] ] + auto search_keys = fixed_width_column_wrapper{1, 2, 3, 1, 2, 3, 1, 1}; + { + // CONTAINS + auto result = lists::contains(search_space->view(), search_keys); + auto expected = bools{0, 1, 0, 0, 0, 0, 0, 1}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST + auto result = lists::index_of(search_space->view(), search_keys, FIND_FIRST); + auto expected = indices{absent, 1, absent, absent, absent, absent, absent, 0}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST + auto result = lists::index_of(search_space->view(), search_keys, FIND_LAST); + auto expected = indices{absent, 1, absent, absent, absent, absent, absent, 3}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } } -TYPED_TEST(TypedVectorContainsTest, ListContainsVectorWithNullsInLists) +TYPED_TEST(TypedVectorContainsTest, VectorWithNullsInLists) { using T = TypeParam; - auto numerals = fixed_width_column_wrapper{ - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) -> bool { return i % 3; })}; + auto numerals = fixed_width_column_wrapper{{x, 1, 2, x, 4, 5, x, 7, 8, x, x, 1, 2, x, 1}, + nulls_at({0, 3, 6, 9, 10, 13})}; - auto input_null_mask_iter = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; }); + auto input_null_mask_iter = null_at(4); auto search_space = make_lists_column( 8, - fixed_width_column_wrapper{0, 1, 3, 7, 7, 7, 10, 12, 15}.release(), + indices{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), numerals.release(), 1, cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8)); + // Search space: [ [x], [1,2], [x,4,5,x], [], x, [7,8,x], [x], [1,2,x,1] ] - auto search_keys = fixed_width_column_wrapper{1, 2, 3, 1, 2, 3, 1, 3}; - auto actual_result = lists::contains(search_space->view(), search_keys); - auto expected_result = - fixed_width_column_wrapper{{0, 1, 0, 0, 0, 0, 1, 1}, {0, 1, 0, 1, 0, 0, 1, 1}}; - - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result); + auto search_keys = fixed_width_column_wrapper{1, 2, 3, 1, 2, 3, 1, 1}; + { + // CONTAINS + auto result = lists::contains(search_space->view(), search_keys); + auto expected = bools{{0, 1, 0, 0, x, 0, 0, 1}, null_at(4)}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST + auto result = lists::index_of(search_space->view(), search_keys, FIND_FIRST); + auto expected = indices{{absent, 1, absent, absent, x, absent, absent, 0}, null_at(4)}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST + auto result = lists::index_of(search_space->view(), search_keys, FIND_LAST); + auto expected = indices{{absent, 1, absent, absent, x, absent, absent, 3}, null_at(4)}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } } TYPED_TEST(TypedVectorContainsTest, ListContainsVectorWithNullsInListsAndInSearchKeys) { using T = TypeParam; - auto numerals = fixed_width_column_wrapper{ - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) -> bool { return i % 3; })}; + auto numerals = fixed_width_column_wrapper{{x, 1, 2, x, 4, 5, x, 7, 8, x, x, 1, 2, x, 1}, + nulls_at({0, 3, 6, 9, 10, 13})}; - auto input_null_mask_iter = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; }); + auto input_null_mask_iter = null_at(4); auto search_space = make_lists_column( 8, - fixed_width_column_wrapper{0, 1, 3, 7, 7, 7, 10, 12, 15}.release(), + indices{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), numerals.release(), 1, cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8)); + // Search space: [ [x], [1,2], [x,4,5,x], [], x, [7,8,x], [x], [1,2,x,1] ] - auto search_keys = fixed_width_column_wrapper{ - {1, 2, 3, 1, 2, 3, 1, 3}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 6; })}; - - auto actual_result = lists::contains(search_space->view(), search_keys); - auto expected_result = - fixed_width_column_wrapper{{0, 1, 0, 0, 0, 0, 0, 1}, {0, 1, 0, 1, 0, 0, 0, 1}}; - - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result); + auto search_keys = fixed_width_column_wrapper{{1, 2, 3, x, 2, 3, 1, 1}, null_at(3)}; + { + // CONTAINS + auto result = lists::contains(search_space->view(), search_keys); + auto expected = bools{{0, 1, 0, x, x, 0, 0, 1}, nulls_at({3, 4})}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST + auto result = lists::index_of(search_space->view(), search_keys, FIND_FIRST); + auto expected = indices{{absent, 1, absent, x, x, absent, absent, 0}, nulls_at({3, 4})}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST + auto result = lists::index_of(search_space->view(), search_keys, FIND_LAST); + auto expected = indices{{absent, 1, absent, x, x, absent, absent, 3}, nulls_at({3, 4})}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } } -TEST_F(ContainsTest, BoolListContainsVectorWithNullsInListsAndInSearchKeys) +TEST_F(ContainsTest, BoolKeyVectorWithNullsInListsAndInSearchKeys) { using T = bool; - auto numerals = fixed_width_column_wrapper{ - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) -> bool { return i % 3; })}; + auto numerals = fixed_width_column_wrapper{{x, 0, 1, x, 1, 1, x, 1, 1, x, x, 0, 1, x, 1}, + nulls_at({0, 3, 6, 9, 10, 13})}; - auto input_null_mask_iter = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; }); + auto input_null_mask_iter = null_at(4); auto search_space = make_lists_column( 8, - fixed_width_column_wrapper{0, 1, 3, 7, 7, 7, 10, 12, 15}.release(), + indices{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), numerals.release(), 1, cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8)); - auto search_keys = fixed_width_column_wrapper{ - {0, 1, 0, 1, 0, 0, 1, 1}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 6; })}; - - auto actual_result = lists::contains(search_space->view(), search_keys); - auto expected_result = - fixed_width_column_wrapper{{0, 1, 0, 0, 0, 0, 0, 1}, {0, 1, 0, 1, 0, 0, 0, 1}}; - - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result); + auto search_keys = fixed_width_column_wrapper{{0, 1, 0, x, 0, 0, 1, 1}, null_at(3)}; + // Search space: [ [x], [0,1], [x,1,1,x], [], x, [1,1,x], [x], [0,1,x,1] ] + // Search keys : [ 0, 1, 0, x, 0, 0, 1, 1 ] + { + // CONTAINS + auto result = lists::contains(search_space->view(), search_keys); + auto expected = bools{{0, 1, 0, x, x, 0, 0, 1}, nulls_at({3, 4})}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST + auto result = lists::index_of(search_space->view(), search_keys, FIND_FIRST); + auto expected = indices{{absent, 1, absent, x, x, absent, absent, 1}, nulls_at({3, 4})}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST + auto result = lists::index_of(search_space->view(), search_keys, FIND_LAST); + auto expected = indices{{absent, 1, absent, x, x, absent, absent, 3}, nulls_at({3, 4})}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } } -TEST_F(ContainsTest, StringListContainsVectorWithNullsInListsAndInSearchKeys) +TEST_F(ContainsTest, StringKeyVectorWithNullsInListsAndInSearchKeys) { - auto numerals = strings_column_wrapper{ - {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "0", "1", "2", "3", "4"}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) -> bool { return i % 3; })}; - - auto input_null_mask_iter = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 4; }); - - auto search_space = make_lists_column( + auto strings = strings_column_wrapper{ + {"X", "1", "2", "X", "4", "5", "X", "7", "8", "X", "X", "1", "2", "X", "1"}, + nulls_at({0, 3, 6, 9, 10, 13})}; + auto input_null_mask_iter = null_at(4); + auto search_space = make_lists_column( 8, - fixed_width_column_wrapper{0, 1, 3, 7, 7, 7, 10, 12, 15}.release(), - numerals.release(), + fixed_width_column_wrapper{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), + strings.release(), 1, cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8)); - auto search_keys = strings_column_wrapper{ - {"1", "2", "3", "1", "2", "3", "1", "3"}, - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 6; })}; + auto search_keys = strings_column_wrapper{{"1", "2", "3", "X", "2", "3", "1", "1"}, null_at(3)}; - auto actual_result = lists::contains(search_space->view(), search_keys); - auto expected_result = - fixed_width_column_wrapper{{0, 1, 0, 0, 0, 0, 0, 1}, {0, 1, 0, 1, 0, 0, 0, 1}}; + // Search space: [ [x], [1,2], [x,4,5,x], [], x, [7,8,x], [x], [1,2,x,1] ] + // Search keys: [ 1, 2, 3, X, 2, 3, 1, 1] - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result); + { + // CONTAINS + auto result = lists::contains(search_space->view(), search_keys); + auto expected = bools{{0, 1, 0, x, x, 0, 0, 1}, nulls_at({3, 4})}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST + auto result = lists::index_of(search_space->view(), search_keys, FIND_FIRST); + auto expected = indices{{absent, 1, absent, x, x, absent, absent, 0}, nulls_at({3, 4})}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST + auto result = lists::index_of(search_space->view(), search_keys, FIND_LAST); + auto expected = indices{{absent, 1, absent, x, x, absent, absent, 3}, nulls_at({3, 4})}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } } TEST_F(ContainsTest, VectorTypeRelatedExceptions) @@ -573,9 +806,12 @@ TEST_F(ContainsTest, VectorTypeRelatedExceptions) {4, 5, 6}}}.release(); auto skey = fixed_width_column_wrapper{0, 1, 2}; CUDF_EXPECT_THROW_MESSAGE(lists::contains(list_of_lists->view(), skey), - "Nested types not supported in lists::contains()"); + "Nested types not supported in list search operations."); + CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_lists->view(), skey, FIND_FIRST), + "Nested types not supported in list search operations."); + CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_lists->view(), skey, FIND_LAST), + "Nested types not supported in list search operations."); } - { // Search key must match list elements in type. auto list_of_ints = @@ -587,15 +823,21 @@ TEST_F(ContainsTest, VectorTypeRelatedExceptions) auto skey = strings_column_wrapper{"Hello", "World"}; CUDF_EXPECT_THROW_MESSAGE(lists::contains(list_of_ints->view(), skey), "Type/Scale of search key does not match list column element type."); + CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_ints->view(), skey, FIND_FIRST), + "Type/Scale of search key does not match list column element type."); + CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_ints->view(), skey, FIND_LAST), + "Type/Scale of search key does not match list column element type."); } - { // Search key column size must match lists column size. auto list_of_ints = lists_column_wrapper{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}}.release(); - - auto skey = fixed_width_column_wrapper{0, 1, 2, 3}; + auto skey = fixed_width_column_wrapper{0, 1, 2, 3}; CUDF_EXPECT_THROW_MESSAGE(lists::contains(list_of_ints->view(), skey), "Number of search keys must match list column size."); + CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_ints->view(), skey, FIND_FIRST), + "Number of search keys must match list column size."); + CUDF_EXPECT_THROW_MESSAGE(lists::index_of(list_of_ints->view(), skey, FIND_LAST), + "Number of search keys must match list column size."); } } @@ -605,6 +847,7 @@ struct TypedContainsNaNsTest : public ContainsTest { TYPED_TEST_SUITE(TypedContainsNaNsTest, FloatingPointTypes); +namespace { template T get_nan(const char* nan_contents) { @@ -616,8 +859,9 @@ float get_nan(const char* nan_contents) { return std::nanf(nan_contents); } +} // namespace -TYPED_TEST(TypedContainsNaNsTest, ListWithNaNsContainsScalar) +TYPED_TEST(TypedContainsNaNsTest, ListWithNaNsScalar) { using T = TypeParam; @@ -637,11 +881,25 @@ TYPED_TEST(TypedContainsNaNsTest, ListWithNaNsContainsScalar) {1, 2, 3}, {}}.release(); - auto search_key_nan = create_scalar_search_key(nan_3); - auto actual_result = lists::contains(search_space->view(), *search_key_nan); - auto expected_result = fixed_width_column_wrapper{0, 0, 0, 0, 1, 0, 1, 0, 0, 0}; - - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result); + auto search_key_nan = create_scalar_search_key(nan_3); + { + // CONTAINS + auto result = lists::contains(search_space->view(), *search_key_nan); + auto expected = bools{0, 0, 0, 0, 1, 0, 1, 0, 0, 0}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST + auto result = lists::index_of(search_space->view(), *search_key_nan, FIND_FIRST); + auto expected = indices{absent, absent, absent, absent, 0, absent, 1, absent, absent, absent}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST + auto result = lists::index_of(search_space->view(), *search_key_nan, FIND_LAST); + auto expected = indices{absent, absent, absent, absent, 0, absent, 1, absent, absent, absent}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } } TYPED_TEST(TypedContainsNaNsTest, ListWithNaNsContainsVector) @@ -652,19 +910,18 @@ TYPED_TEST(TypedContainsNaNsTest, ListWithNaNsContainsVector) // presence of NaN values: // 1. If the search key is null, null is still returned. // 2. If the list contains a null, and the non-null search - // key is not found, null is returned. + // key is not found: + // a) contains() returns `null`. + // b) index_of() returns -1. using T = TypeParam; auto nan_1 = get_nan("1"); auto nan_2 = get_nan("2"); auto nan_3 = get_nan("3"); - auto null_at_index_2 = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 2; }); - auto search_space = lists_column_wrapper{ {0.0, 1.0, 2.0}, - {{3, 4, 5}, null_at_index_2}, // i.e. {3, 4, ∅}. + {{3, 4, 5}, null_at(2)}, // i.e. {3, 4, ∅}. {6, 7, 8}, {9, 0, 1}, {nan_1, 3.0, 4.0}, @@ -679,33 +936,52 @@ TYPED_TEST(TypedContainsNaNsTest, ListWithNaNsContainsVector) { // With nulls in the search key rows. (At index 2.) auto search_keys = - fixed_width_column_wrapper{ - search_key_values.begin(), search_key_values.end(), null_at_index_2} + fixed_width_column_wrapper{search_key_values.begin(), search_key_values.end(), null_at(2)} .release(); - auto actual_result = lists::contains(search_space->view(), search_keys->view()); - auto null_at_index_1_and_2 = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1 && i != 2; }); - - auto expected_result = - fixed_width_column_wrapper{{1, 0, 0, 0, 1, 0, 1, 0, 1, 0}, null_at_index_1_and_2}; - - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result); + { + // CONTAINS + auto result = lists::contains(search_space->view(), search_keys->view()); + auto expected = bools{{1, 0, 0, 0, 1, 0, 1, 0, 1, 0}, null_at(2)}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST + auto result = lists::index_of(search_space->view(), search_keys->view(), FIND_FIRST); + auto expected = + indices{{1, absent, x, absent, 0, absent, 2, absent, 1, absent}, nulls_at({2})}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST + auto result = lists::index_of(search_space->view(), search_keys->view(), FIND_LAST); + auto expected = + indices{{1, absent, x, absent, 0, absent, 2, absent, 1, absent}, nulls_at({2})}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } } - { // No nulls in the search key rows. auto search_keys = fixed_width_column_wrapper(search_key_values.begin(), search_key_values.end()).release(); - - auto actual_result = lists::contains(search_space->view(), search_keys->view()); - auto null_at_index_1 = - cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; }); - - auto expected_result = - fixed_width_column_wrapper{{1, 0, 0, 0, 1, 0, 1, 0, 1, 0}, null_at_index_1}; - - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result); + { + // CONTAINS + auto result = lists::contains(search_space->view(), search_keys->view()); + auto expected = bools{1, 0, 0, 0, 1, 0, 1, 0, 1, 0}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST + auto result = lists::index_of(search_space->view(), search_keys->view(), FIND_FIRST); + auto expected = indices{1, absent, absent, absent, 0, absent, 2, absent, 1, absent}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST + auto result = lists::index_of(search_space->view(), search_keys->view(), FIND_LAST); + auto expected = indices{1, absent, absent, absent, 0, absent, 2, absent, 1, absent}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } } } @@ -715,50 +991,79 @@ struct TypedContainsDecimalsTest : public ContainsTest { TYPED_TEST_SUITE(TypedContainsDecimalsTest, FixedPointTypes); -TYPED_TEST(TypedContainsDecimalsTest, ListContainsScalar) +TYPED_TEST(TypedContainsDecimalsTest, ScalarKey) { using T = TypeParam; - auto const values = std::vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, - 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3}; - auto decimals = fixed_point_column_wrapper{ - values.begin(), values.end(), numeric::scale_type{0}}; - - auto list_offsets = fixed_width_column_wrapper{0, 3, 6, 9, 12, 15, 18, 21, 21, 24, 24}; - - auto const search_space = - make_lists_column(10, list_offsets.release(), decimals.release(), 0, {}); - - auto search_key_one = make_fixed_point_scalar(typename T::rep{1}, numeric::scale_type{0}); - auto actual_result = lists::contains(search_space->view(), *search_key_one); - auto expected_result = fixed_width_column_wrapper{1, 0, 0, 1, 0, 0, 0, 0, 1, 0}; - - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result); + auto const search_space = [] { + auto const values = std::vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, + 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3}; + auto decimals = fixed_point_column_wrapper{ + values.begin(), values.end(), numeric::scale_type{0}}; + auto list_offsets = indices{0, 3, 6, 9, 12, 15, 18, 21, 21, 24, 24}; + return make_lists_column(10, list_offsets.release(), decimals.release(), 0, {}); + }(); + auto search_key_one = make_fixed_point_scalar(typename T::rep{1}, numeric::scale_type{0}); + + // Search space: [[0,1,2], [3,4,5], [6,7,8], [9,0,1], [2,3,4], [5,6,7], [8,9,0], [], [1,2,3], []] + { + // CONTAINS + auto result = lists::contains(search_space->view(), *search_key_one); + auto expected = bools{1, 0, 0, 1, 0, 0, 0, 0, 1, 0}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST + auto result = lists::index_of(search_space->view(), *search_key_one, FIND_FIRST); + auto expected = indices{1, absent, absent, 2, absent, absent, absent, absent, 0, absent}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST + auto result = lists::index_of(search_space->view(), *search_key_one, FIND_LAST); + auto expected = indices{1, absent, absent, 2, absent, absent, absent, absent, 0, absent}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } } -TYPED_TEST(TypedContainsDecimalsTest, ListContainsVector) +TYPED_TEST(TypedContainsDecimalsTest, VectorKey) { using T = TypeParam; - auto const values = std::vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, - 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3}; - auto decimals = fixed_point_column_wrapper{ - values.begin(), values.end(), numeric::scale_type{0}}; - - auto list_offsets = fixed_width_column_wrapper{0, 3, 6, 9, 12, 15, 18, 21, 21, 24, 24}; - - auto const search_space = - make_lists_column(10, list_offsets.release(), decimals.release(), 0, {}); + auto const search_space = [] { + auto const values = std::vector{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, + 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3}; + auto decimals = fixed_point_column_wrapper{ + values.begin(), values.end(), numeric::scale_type{0}}; + auto list_offsets = indices{0, 3, 6, 9, 12, 15, 18, 21, 21, 24, 24}; + return make_lists_column(10, list_offsets.release(), decimals.release(), 0, {}); + }(); auto search_key = fixed_point_column_wrapper{ {1, 2, 3, 1, 2, 3, 1, 2, 3, 1}, numeric::scale_type{ 0}}.release(); - auto actual_result = lists::contains(search_space->view(), search_key->view()); - auto expected_result = fixed_width_column_wrapper{1, 0, 0, 1, 1, 0, 0, 0, 1, 0}; - - CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, *actual_result); + // Search space: [ [0,1,2], [3,4,5], [6,7,8], [9,0,1], [2,3,4], [5,6,7], [8,9,0], [], [1,2,3], [] + // ] Search keys: [ 1, 2, 3, 1, 2, 3, 1, 2, 3, 1 ] + { + // CONTAINS + auto result = lists::contains(search_space->view(), search_key->view()); + auto expected = bools{1, 0, 0, 1, 1, 0, 0, 0, 1, 0}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_FIRST + auto result = lists::index_of(search_space->view(), search_key->view(), FIND_FIRST); + auto expected = indices{1, absent, absent, 2, 0, absent, absent, absent, 2, absent}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } + { + // FIND_LAST + auto result = lists::index_of(search_space->view(), search_key->view(), FIND_LAST); + auto expected = indices{1, absent, absent, 2, 0, absent, absent, absent, 2, absent}; + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result); + } } } // namespace test diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java index 5153c5c1d2a..a2e080e02f6 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnView.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java @@ -3170,8 +3170,6 @@ public static ColumnView fromDeviceBuffer(BaseDeviceMemoryBuffer buffer, * Output `column[i]` is set to null if one or more of the following are true: * 1. The key is null * 2. The column vector list value is null - * 3. The list row does not contain the key, and contains at least - * one null. * @param key the scalar to look up * @return a Boolean ColumnVector with the result of the lookup */ @@ -3183,10 +3181,9 @@ public final ColumnVector listContains(Scalar key) { /** * Create a column of bool values indicating whether the list rows of the first * column contain the corresponding values in the second column. + * Output `column[i]` is set to null if one or more of the following are true: * 1. The key value is null * 2. The column vector list value is null - * 3. The list row does not contain the key, and contains at least - * one null. * @param key the ColumnVector with look up values * @return a Boolean ColumnVector with the result of the lookup */ @@ -3195,6 +3192,58 @@ public final ColumnVector listContainsColumn(ColumnView key) { return new ColumnVector(listContainsColumn(getNativeView(), key.getNativeView())); } + /** + * Create a column of bool values indicating whether the list rows of the specified + * column contain null elements. + * Output `column[i]` is set to null iff the input list row is null. + * @return a Boolean ColumnVector with the result of the lookup + */ + public final ColumnVector listContainsNulls() { + assert type.equals(DType.LIST) : "column type must be a LIST"; + return new ColumnVector(listContainsNulls(getNativeView())); + } + + /** + * Enum to choose behaviour of listIndexOf functions: + * 1. FIND_FIRST finds the first occurrence of a search key. + * 2. FIND_LAST finds the last occurrence of a search key. + */ + public enum FindOptions {FIND_FIRST, FIND_LAST}; + + /** + * Create a column of int32 indices, indicating the position of the scalar search key + * in each list row. + * All indices are 0-based. If a search key is not found, the index is set to -1. + * The index is set to null if one of the following is true: + * 1. The search key is null. + * 2. The list row is null. + * @param key The scalar search key + * @param findOption Whether to find the first index of the key, or the last. + * @return The resultant column of int32 indices + */ + public final ColumnVector listIndexOf(Scalar key, FindOptions findOption) { + assert type.equals(DType.LIST) : "column type must be a LIST"; + boolean isFindFirst = findOption == FindOptions.FIND_FIRST; + return new ColumnVector(listIndexOfScalar(getNativeView(), key.getScalarHandle(), isFindFirst)); + } + + /** + * Create a column of int32 indices, indicating the position of each row in the + * search key column in the corresponding row of the lists column. + * All indices are 0-based. If a search key is not found, the index is set to -1. + * The index is set to null if one of the following is true: + * 1. The search key row is null. + * 2. The list row is null. + * @param key ColumnView of search keys. + * @param findOption Whether to find the first index of the key, or the last. + * @return The resultant column of int32 indices + */ + public final ColumnVector listIndexOf(ColumnView keys, FindOptions findOption) { + assert type.equals(DType.LIST) : "column type must be a LIST"; + boolean isFindFirst = findOption == FindOptions.FIND_FIRST; + return new ColumnVector(listIndexOfColumn(getNativeView(), keys.getNativeView(), isFindFirst)); + } + /** * Segmented sort of the elements within a list in each row of a list column. * NOTICE: list columns with nested child are NOT supported yet. @@ -3616,6 +3665,33 @@ private static native long stringReplaceWithBackrefs(long columnView, String pat */ private static native long listContainsColumn(long nativeView, long keyColumn); + /** + * Native method to search list rows for null elements. + * @param nativeView the column view handle of the list + * @return column handle of the resultant boolean column + */ + private static native long listContainsNulls(long nativeView); + + /** + * Native method to find the first (or last) index of a specified scalar key, + * in each row of a list column. + * @param nativeView the column view handle of the list + * @param scalarKeyHandle handle to the scalar search key + * @param isFindFirst Whether to find the first index of the key, or the last. + * @return column handle of the resultant column of int32 indices + */ + private static native long listIndexOfScalar(long nativeView, long scalarKeyHandle, boolean isFindFirst); + + /** + * Native method to find the first (or last) index of each search key in the specified column, + * in each row of a list column. + * @param nativeView the column view handle of the list + * @param scalarColumnHandle handle to the search key column + * @param isFindFirst Whether to find the first index of the key, or the last. + * @return column handle of the resultant column of int32 indices + */ + private static native long listIndexOfColumn(long nativeView, long keyColumnHandle, boolean isFindFirst); + private static native long listSortRows(long nativeView, boolean isDescending, boolean isNullSmallest); private static native long getElement(long nativeView, int index); diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp index 4cd4b070aed..73ea49c18d9 100644 --- a/java/src/main/native/src/ColumnViewJni.cpp +++ b/java/src/main/native/src/ColumnViewJni.cpp @@ -511,6 +511,18 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_listContains(JNIEnv *env, CATCH_STD(env, 0); } +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_listContainsNulls(JNIEnv *env, jclass, + jlong column_view) { + JNI_NULL_CHECK(env, column_view, "column is null", 0); + try { + cudf::jni::auto_set_device(env); + auto cv = reinterpret_cast(column_view); + auto lcv = cudf::lists_column_view{*cv}; + return reinterpret_cast(cudf::lists::contains_nulls(lcv).release()); + } + CATCH_STD(env, 0); +} + JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_listContainsColumn(JNIEnv *env, jclass, jlong column_view, jlong lookup_key_cv) { @@ -528,6 +540,44 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_listContainsColumn(JNIEnv CATCH_STD(env, 0); } +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_listIndexOfScalar(JNIEnv *env, jclass, + jlong column_view, + jlong lookup_key, + jboolean is_find_first) { + JNI_NULL_CHECK(env, column_view, "column is null", 0); + JNI_NULL_CHECK(env, lookup_key, "lookup scalar is null", 0); + try { + cudf::jni::auto_set_device(env); + auto const cv = reinterpret_cast(column_view); + auto const lcv = cudf::lists_column_view{*cv}; + auto const lookup_key_scalar = reinterpret_cast(lookup_key); + auto const find_option = is_find_first ? cudf::lists::duplicate_find_option::FIND_FIRST : + cudf::lists::duplicate_find_option::FIND_LAST; + auto result = cudf::lists::index_of(lcv, *lookup_key_scalar, find_option); + return reinterpret_cast(result.release()); + } + CATCH_STD(env, 0); +} + +JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_listIndexOfColumn(JNIEnv *env, jclass, + jlong column_view, + jlong lookup_keys, + jboolean is_find_first) { + JNI_NULL_CHECK(env, column_view, "column is null", 0); + JNI_NULL_CHECK(env, lookup_keys, "lookup key column is null", 0); + try { + cudf::jni::auto_set_device(env); + auto const cv = reinterpret_cast(column_view); + auto const lcv = cudf::lists_column_view{*cv}; + auto const lookup_key_column = reinterpret_cast(lookup_keys); + auto const find_option = is_find_first ? cudf::lists::duplicate_find_option::FIND_FIRST : + cudf::lists::duplicate_find_option::FIND_LAST; + auto result = cudf::lists::index_of(lcv, *lookup_key_column, find_option); + return reinterpret_cast(result.release()); + } + CATCH_STD(env, 0); +} + JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_listSortRows(JNIEnv *env, jclass, jlong column_view, jboolean is_descending, diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index b78183692a3..0771de9492d 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -18,6 +18,7 @@ package ai.rapids.cudf; +import ai.rapids.cudf.ColumnView.FindOptions; import ai.rapids.cudf.HostColumnVector.*; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; @@ -4364,70 +4365,160 @@ void testDropListDuplicatesWithKeysValues() { } } + @SafeVarargs + private static ColumnVector makeListsColumn(DType childDType, List... rows) { + HostColumnVector.DataType childType = new HostColumnVector.BasicType(true, childDType); + HostColumnVector.DataType listType = new HostColumnVector.ListType(true, childType); + return ColumnVector.fromLists(listType, rows); + } + @Test void testListContainsString() { - List list1 = Arrays.asList("Héllo there", "thésé"); - List list2 = Arrays.asList("", "ARé some", "test strings"); - List list3 = Arrays.asList(null, "", "ARé some", "test strings", "thésé"); - List list4 = Arrays.asList(null, "", "ARé some", "test strings"); - List list5 = null; - try (ColumnVector v = ColumnVector.fromLists(new HostColumnVector.ListType(true, - new HostColumnVector.BasicType(true, DType.STRING)), list1, list2, list3, list4, list5); - ColumnVector expected = ColumnVector.fromBoxedBooleans(true, false, true, null, null); - Scalar strScalar = Scalar.fromString("thésé"); - ColumnVector result = v.listContains(strScalar)) { + List list0 = Arrays.asList("Héllo there", "thésé"); + List list1 = Arrays.asList("", "ARé some", "test strings"); + List list2 = Arrays.asList(null, "", "ARé some", "test strings", "thésé"); + List list3 = Arrays.asList(null, "", "ARé some", "test strings"); + List list4 = null; + try (ColumnVector input = makeListsColumn(DType.STRING, list0, list1, list2, list3, list4); + Scalar searchKey = Scalar.fromString("thésé"); + ColumnVector expected = ColumnVector.fromBoxedBooleans(true, false, true, false, null); + ColumnVector result = input.listContains(searchKey)) { assertColumnsAreEqual(expected, result); } } @Test void testListContainsInt() { - List list1 = Arrays.asList(1, 2, 3); - List list2 = Arrays.asList(4, 5, 6); - List list3 = Arrays.asList(7, 8, 9); - List list4 = null; - try (ColumnVector v = ColumnVector.fromLists(new HostColumnVector.ListType(true, - new HostColumnVector.BasicType(true, DType.INT32)), list1, list2, list3, list4); + List list0 = Arrays.asList(1, 2, 3); + List list1 = Arrays.asList(4, 5, 6); + List list2 = Arrays.asList(7, 8, 9); + List list3 = null; + try (ColumnVector input = makeListsColumn(DType.INT32, list0, list1, list2, list3); + Scalar searchKey = Scalar.fromInt(7); ColumnVector expected = ColumnVector.fromBoxedBooleans(false, false, true, null); - Scalar intScalar = Scalar.fromInt(7); - ColumnVector result = v.listContains(intScalar)) { + ColumnVector result = input.listContains(searchKey)) { assertColumnsAreEqual(expected, result); } } @Test void testListContainsStringCol() { - List list1 = Arrays.asList("Héllo there", "thésé"); - List list2 = Arrays.asList("", "ARé some", "test strings"); - List list3 = Arrays.asList("FOO", "", "ARé some", "test"); + List list0 = Arrays.asList("Héllo there", "thésé"); + List list1 = Arrays.asList("", "ARé some", "test strings"); + List list2 = Arrays.asList("FOO", "", "ARé some", "test"); + List list3 = Arrays.asList(null, "FOO", "", "ARé some", "test"); List list4 = Arrays.asList(null, "FOO", "", "ARé some", "test"); - List list5 = Arrays.asList(null, "FOO", "", "ARé some", "test"); - List list6 = null; - try (ColumnVector v = ColumnVector.fromLists(new HostColumnVector.ListType(true, - new HostColumnVector.BasicType(true, DType.STRING)), list1, list2, list3, list4, list5, list6); - ColumnVector expected = ColumnVector.fromBoxedBooleans(true, true, true, true, null, null); - ColumnVector strCol = ColumnVector.fromStrings("thésé", "", "test", "test", "iotA", null); - ColumnVector result = v.listContainsColumn(strCol)) { + List list5 = null; + try (ColumnVector input = makeListsColumn(DType.STRING, list0, list1, list2, list3, list4, list5); + ColumnVector searchKeys = ColumnVector.fromStrings("thésé", "", "test", "test", "iotA", null); + ColumnVector expected = ColumnVector.fromBoxedBooleans(true, true, true, true, false, null); + ColumnVector result = input.listContainsColumn(searchKeys)) { assertColumnsAreEqual(expected, result); } } @Test void testListContainsIntCol() { - List list1 = Arrays.asList(1, 2, 3); - List list2 = Arrays.asList(4, 5, 6); + List list0 = Arrays.asList(1, 2, 3); + List list1 = Arrays.asList(4, 5, 6); + List list2 = Arrays.asList(null, 8, 9); List list3 = Arrays.asList(null, 8, 9); - List list4 = Arrays.asList(null, 8, 9); - List list5 = null; - try (ColumnVector v = ColumnVector.fromLists(new HostColumnVector.ListType(true, - new HostColumnVector.BasicType(true, DType.INT32)), list1, list2, list3, list4, list5); - ColumnVector expected = ColumnVector.fromBoxedBooleans(true, false, true, null, null); - ColumnVector intCol = ColumnVector.fromBoxedInts(3, 3, 8, 3, null); - ColumnVector result = v.listContainsColumn(intCol)) { + List list4 = null; + try (ColumnVector input = makeListsColumn(DType.INT32, list0, list1, list2, list3, list4); + ColumnVector searchKeys = ColumnVector.fromBoxedInts(3, 3, 8, 3, null); + ColumnVector expected = ColumnVector.fromBoxedBooleans(true, false, true, false, null); + ColumnVector result = input.listContainsColumn(searchKeys)) { + assertColumnsAreEqual(expected, result); + } + } + + @Test + void testListContainsNulls() { + List list0 = Arrays.asList("Héllo there", "thésé"); + List list1 = Arrays.asList("", "ARé some", "test strings"); + List list2 = Arrays.asList("FOO", "", "ARé some", "test"); + List list3 = Arrays.asList(null, "FOO", "", "ARé some", "test"); + List list4 = Arrays.asList(null, "FOO", "", "ARé some", "test"); + List list5 = null; + try (ColumnVector input = makeListsColumn(DType.STRING, list0, list1, list2, list3, list4, list5); + ColumnVector result = input.listContainsNulls(); + ColumnVector expected = ColumnVector.fromBoxedBooleans(false, false, false, true, true, null)) { assertColumnsAreEqual(expected, result); } } + @Test + void testListIndexOfString() { + List list0 = Arrays.asList("Héllo there", "thésé"); + List list1 = Arrays.asList("", "ARé some", "test strings"); + List list2 = Arrays.asList(null, "", "ARé some", "thésé", "test strings", "thésé"); + List list3 = Arrays.asList(null, "", "ARé some", "test strings"); + List list4 = null; + try (ColumnVector input = makeListsColumn(DType.STRING, list0, list1, list2, list3, list4); + Scalar searchKey = Scalar.fromString("thésé"); + ColumnVector expectedFirst = ColumnVector.fromBoxedInts(1, -1, 3, -1, null); + ColumnVector resultFirst = input.listIndexOf(searchKey, FindOptions.FIND_FIRST); + ColumnVector expectedLast = ColumnVector.fromBoxedInts(1, -1, 5, -1, null); + ColumnVector resultLast = input.listIndexOf(searchKey, FindOptions.FIND_LAST)) { + assertColumnsAreEqual(expectedFirst, resultFirst); + assertColumnsAreEqual(expectedLast, resultLast); + } + } + + @Test + void testListIndexOfInt() { + List list0 = Arrays.asList(1, 2, 3); + List list1 = Arrays.asList(4, 5, 6); + List list2 = Arrays.asList(7, 8, 9, 7); + List list3 = null; + try (ColumnVector input = makeListsColumn(DType.INT32, list0, list1, list2, list3); + Scalar searchKey = Scalar.fromInt(7); + ColumnVector expectedFirst = ColumnVector.fromBoxedInts(-1, -1, 0, null); + ColumnVector resultFirst = input.listIndexOf(searchKey, FindOptions.FIND_FIRST); + ColumnVector expectedLast = ColumnVector.fromBoxedInts(-1, -1, 3, null); + ColumnVector resultLast = input.listIndexOf(searchKey, FindOptions.FIND_LAST)) { + assertColumnsAreEqual(expectedFirst, resultFirst); + assertColumnsAreEqual(expectedLast, resultLast); + } + } + + @Test + void testListIndexOfStringCol() { + List list0 = Arrays.asList("Héllo there", "thésé"); + List list1 = Arrays.asList("", "ARé some", "test strings"); + List list2 = Arrays.asList("FOO", "", "ARé some", "test"); + List list3 = Arrays.asList(null, "FOO", "", "test", "ARé some", "test"); + List list4 = Arrays.asList(null, "FOO", "", "ARé some", "test"); + List list5 = null; + try (ColumnVector input = makeListsColumn(DType.STRING, list0, list1, list2, list3, list4, list5); + ColumnVector searchKeys = ColumnVector.fromStrings("thésé", "", "test", "test", "iotA", null); + ColumnVector expectedFirst = ColumnVector.fromBoxedInts(1, 0, 3, 3, -1, null); + ColumnVector resultFirst = input.listIndexOf(searchKeys, FindOptions.FIND_FIRST); + ColumnVector expectedLast = ColumnVector.fromBoxedInts(1, 0, 3, 5, -1, null); + ColumnVector resultLast = input.listIndexOf(searchKeys, FindOptions.FIND_LAST)) { + assertColumnsAreEqual(expectedFirst, resultFirst); + assertColumnsAreEqual(expectedLast, resultLast); + } + } + + @Test + void testListIndexOfIntCol() { + List list0 = Arrays.asList(1, 2, 3); + List list1 = Arrays.asList(4, 5, 6); + List list2 = Arrays.asList(null, 8, 9, 8); + List list3 = Arrays.asList(null, 8, 9); + List list4 = null; + try (ColumnVector input = makeListsColumn(DType.INT32, list0, list1, list2, list3, list4); + ColumnVector searchKeys = ColumnVector.fromBoxedInts(3, 3, 8, 3, null); + ColumnVector expectedFirst = ColumnVector.fromBoxedInts(2, -1, 1, -1, null); + ColumnVector resultFirst = input.listIndexOf(searchKeys, FindOptions.FIND_FIRST); + ColumnVector expectedLast = ColumnVector.fromBoxedInts(2, -1, 3, -1, null); + ColumnVector resultLast = input.listIndexOf(searchKeys, FindOptions.FIND_LAST)) { + assertColumnsAreEqual(expectedFirst, resultFirst); + assertColumnsAreEqual(expectedLast, resultLast); + } + } + @Test void testListSortRowsWithIntChild() { List list1 = Arrays.asList(1, 3, 0, 2); diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py index b898222d7d7..44749103b54 100644 --- a/python/cudf/cudf/tests/test_list.py +++ b/python/cudf/cudf/tests/test_list.py @@ -304,8 +304,8 @@ def test_get_nulls(): ([[1, 2, 3], [], [3, 4, 5]], 6, [False, False, False],), ([[1.0, 2.0, 3.0], None, []], 2.0, [True, None, False],), ([[None, "b", "c"], [], ["b", "e", "f"]], "b", [True, False, True],), - ([[None, 2, 3], None, []], 1, [None, None, False]), - ([[None, "b", "c"], [], ["b", "e", "f"]], "d", [None, False, False],), + ([[None, 2, 3], None, []], 1, [False, None, False]), + ([[None, "b", "c"], [], ["b", "e", "f"]], "d", [False, False, False],), ], ) def test_contains_scalar(data, scalar, expect):