-
Notifications
You must be signed in to change notification settings - Fork 1k
Support nan_equality in cudf::distinct
#11118
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c22b11c
5a6602c
79fa051
a7e3463
3b7124e
4f933bf
bf2a717
11f9dd1
b8832f7
f12a509
edcb612
5b40152
f2dc1eb
8a800e9
c0144b3
7d89f4c
ae891b3
1786dbb
6dfb4f4
3352967
52e29b1
819a669
2e5d41a
8393809
5cdefa3
3626efb
bcc4abe
edbcc78
2f1ce5a
882a67a
cdae2ac
5b21d88
3f18057
21456e7
8a17581
e05ad48
03fb093
3c12942
6ab9673
37dfdcb
b78cf5b
8de0948
9e8c4a5
7fa65ee
ff6e03e
374545a
9bf540a
e1c3cd5
70d3164
bba15c2
d895f48
56e791c
6ffc9b0
dd8c845
d9c0ab9
7770265
96a36c4
0210228
65190cc
a4db720
6c90c53
4cc2f2e
55895e7
df05dc8
ec48856
5f7d778
154645a
8f04d50
d806278
3734344
f731d35
e44c85d
a74f71e
f9de181
a339d83
68652f4
6cec1eb
661400a
700e465
1c783e8
64e03f6
13ad653
7811611
47c5eec
4db34db
fab367b
9ec27af
1359ee0
aa0a4ed
01e03b6
cdc3000
45dec2a
16ba20c
37a23e4
38603fc
e32daf4
cba4759
7247101
120377b
68133d4
aefdadf
faf6778
e839323
f5646b3
538ff08
a755bea
f0ee266
0b35671
db886ea
1ac6501
d0af0e6
be3b2fe
c121268
2410c08
3227af9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -95,6 +95,7 @@ rmm::device_uvector<size_type> hash_reduce_by_row( | |
| cudf::nullate::DYNAMIC has_nulls, | ||
| duplicate_keep_option keep, | ||
| null_equality nulls_equal, | ||
| nan_equality nans_equal, | ||
| rmm::cuda_stream_view stream, | ||
| rmm::mr::device_memory_resource* mr) | ||
| { | ||
|
|
@@ -111,15 +112,26 @@ rmm::device_uvector<size_type> hash_reduce_by_row( | |
| auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input); | ||
| auto const key_hasher = experimental::compaction_hash(row_hasher.device_hasher(has_nulls)); | ||
|
|
||
| auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); | ||
| auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal); | ||
|
|
||
| thrust::for_each( | ||
| rmm::exec_policy(stream), | ||
| thrust::make_counting_iterator(0), | ||
| thrust::make_counting_iterator(num_rows), | ||
| reduce_by_row_fn{ | ||
| map.get_device_view(), key_hasher, key_equal, keep, reduction_results.begin()}); | ||
| auto const row_comp = cudf::experimental::row::equality::self_comparator(preprocessed_input); | ||
|
|
||
| auto const reduce_by_row = [&](auto const value_comp) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Along the lines of what @PointKernel was suggesting -- one alternative I considered was making this lambda actually allocate and return the output vector, rather than binding in edit: Initially wrote "IIFE" where I meant lambda. Fixed.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we did that, however, almost the entire body of |
||
| auto const key_equal = row_comp.equal_to(has_nulls, nulls_equal, value_comp); | ||
| thrust::for_each( | ||
| rmm::exec_policy(stream), | ||
| thrust::make_counting_iterator(0), | ||
| thrust::make_counting_iterator(num_rows), | ||
| reduce_by_row_fn{ | ||
| map.get_device_view(), key_hasher, key_equal, keep, reduction_results.begin()}); | ||
| }; | ||
|
|
||
| if (nans_equal == nan_equality::ALL_EQUAL) { | ||
| using nan_equal_comparator = | ||
| cudf::experimental::row::equality::nan_equal_physical_equality_comparator; | ||
| reduce_by_row(nan_equal_comparator{}); | ||
| } else { | ||
| using nan_unequal_comparator = cudf::experimental::row::equality::physical_equality_comparator; | ||
| reduce_by_row(nan_unequal_comparator{}); | ||
| } | ||
PointKernel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| return reduction_results; | ||
| } | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.