diff --git a/CMakeLists.txt b/CMakeLists.txt index a00476b4e..2bf5f58e3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -88,6 +88,12 @@ target_include_directories(cuco INTERFACE target_link_libraries(cuco INTERFACE CCCL::CCCL CUDA::toolkit) target_compile_features(cuco INTERFACE cxx_std_17 cuda_std_17) +################################################################################################### +# - Optionally download RoaringFormatSpec test data ----------------------------------------------- + +option(CUCO_DOWNLOAD_ROARING_TESTDATA "Download RoaringFormatSpec test data" ON) +include(${CMAKE_CURRENT_LIST_DIR}/cmake/roaring_testdata.cmake) + ################################################################################################### # - optionally build tests ------------------------------------------------------------------------ diff --git a/README.md b/README.md index 4c04ac88f..ae00028e9 100644 --- a/README.md +++ b/README.md @@ -259,4 +259,11 @@ We plan to add many GPU-accelerated, concurrent data structures to `cuCollection `cuco::bloom_filter` implements a Blocked Bloom Filter for approximate set membership queries. #### Examples: -- [Host-bulk APIs (Default fingerprinting policy)](https://github.com/NVIDIA/cuCollections/blob/dev/examples/bloom_filter/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/clientstate/eJydVmtvGjkU_StXsx8WmuEVbVUJQiSapLtoK5IF2qpaVsjj8TBWBnvqBwRF-e977ZmBgZBqtVRqwL6Pc889vvZzoJnWXAod9P9-Dngc9HthkBGxsmTFgn5AbUyCMNDSKup-d94tBLyDG5nvFF-lBhq0CZfdy99CmHwd345HcHM_fbifjubj-0nb2Xr7z5wyoVkMVsRMgUkZjHJC8U-5E8JXphwQuGx3oeEMFkG5twiaAx9lJy2syQ6ENGA1wzBcQ8IzBuyJstwAF0DlOs84EZTBlpvUpyrjeDjwvQwiI0PQnqBHjr-SuiUQs4fuPqkxeb_T2W63beJht6VadbLCWHc-j2_uJrO7FkLfu30RGTILiv2wXGHh0Q5IjsgoiRBvRrYgFZCVYrhnpEO-VdxwsQpBy8RsiWI-Tsy1UTyy5oi8CifWXzdA-ohA4kYzGM8WAXwczcaz0Mf5Np7_cf9lDt9G0-loMh_fzeB-is2a3I5dq_DXJxhNvsOf48ltCAypw1TsKVeuCoTKHa0sLjicMXYEI5EFLJ0zyhNOoVIQrOSGKYFlQc7UmhdaQ5Cxj5PxNTfE-LVXxflUnYVYiF-4oJmNGVxRS2UnyqRcL7Hvhqk2ten1sY1JldWmQ6UVpu02X23FbIMplhtGjVTnTdgTo9YBW-YSm7Y7b6Wxuwyl1j7FwCU2hZG1X-bCoOK4aGwkj5sL8YyFgVukWLdxHIOw6-Uj22kntiH0ur92u90B7D-dTucKfmeCKWJYuQ3O_nwkkxduw0Pcd9Btvx-UkcbIrjKe64QrbSAlWeLjuWCy3PD0vpFAvErQKjMPXntoG5Xd0uhx6SvzOPCr22xVmygR8M0tszvynKmDBmaXF0I7tgCcBE5eaLL0JkOXfbB3neVk60553auYDDFLiM0MFA12mjwF5Kt3kuv365q7qnJdl_GeaxW-lKm1ift91KCBqys8kR9t9ojAPO-e47fxJFgOU7lyJK6Knru5WMDsY6yFgNoHw_tcTMRZmbtQZ79_pPMaatewRtU5P1v3LpWiG26rHbEVyrYZeo82ZnDfe80yDbFOK_nSWyHvdZdB3QAdvVj2thd1sRRWYh-mcDjaqgIcYBwafF7M5Tz3Ds6wlDOJ40aFIiwzNX_KWiRldu0scRBidxoF8BASkml2zN1ZR1F3PDoptRhlKX9Zpna107efqziFN1xane1KDeGs35eGjnN3EepU2iyGIh34q80oy1q51HixbBj44YHEzB-mw16dFTyq7i7Ur6gJD4VXjf0vcH2ZJiUG8Bbzt7W7RpjwqE6Gizui3N3W9QOhvWOuZEQyvN7wQomJIahzZamxGCushSmjsKeUR9yga8HrSd2fHqbXXTxjOZbl5oUskGArIkSOrHhfXtVWq8oNhmPTlOiUafcEif3MwnrP0yn2dIqSTvEGnUkmkS7HtgM7LI64X2xU8vIXWuNYbGcaVF8qT6yTQbMJnTJgIb9Cu0Xe5H_kPa2kvvSTvKJ5fkA6UeKLBb9XJPhl17dyPTmsnw48xVAVArr48wVfq-4NiK9CdXjUBmJDae_yve3htsxN8eINWhhoSC8ueh-gRRRNh3q9_NCFVgvvLYP_GczB4lZG1pF_Bmc8qsWklGa4uCkerriA9YrH4CWs9vHmONpH7oKXf_y_fwHeCexw)) \ No newline at end of file +- [Host-bulk APIs (Default fingerprinting policy)](https://github.com/NVIDIA/cuCollections/blob/dev/examples/bloom_filter/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/clientstate/eJydVmtvGjkU_StXsx8WmuEVbVUJQiSapLtoK5IF2qpaVsjj8TBWBnvqBwRF-e977ZmBgZBqtVRqwL6Pc889vvZzoJnWXAod9P9-Dngc9HthkBGxsmTFgn5AbUyCMNDSKup-d94tBLyDG5nvFF-lBhq0CZfdy99CmHwd345HcHM_fbifjubj-0nb2Xr7z5wyoVkMVsRMgUkZjHJC8U-5E8JXphwQuGx3oeEMFkG5twiaAx9lJy2syQ6ENGA1wzBcQ8IzBuyJstwAF0DlOs84EZTBlpvUpyrjeDjwvQwiI0PQnqBHjr-SuiUQs4fuPqkxeb_T2W63beJht6VadbLCWHc-j2_uJrO7FkLfu30RGTILiv2wXGHh0Q5IjsgoiRBvRrYgFZCVYrhnpEO-VdxwsQpBy8RsiWI-Tsy1UTyy5oi8CifWXzdA-ohA4kYzGM8WAXwczcaz0Mf5Np7_cf9lDt9G0-loMh_fzeB-is2a3I5dq_DXJxhNvsOf48ltCAypw1TsKVeuCoTKHa0sLjicMXYEI5EFLJ0zyhNOoVIQrOSGKYFlQc7UmhdaQ5Cxj5PxNTfE-LVXxflUnYVYiF-4oJmNGVxRS2UnyqRcL7Hvhqk2ten1sY1JldWmQ6UVpu02X23FbIMplhtGjVTnTdgTo9YBW-YSm7Y7b6Wxuwyl1j7FwCU2hZG1X-bCoOK4aGwkj5sL8YyFgVukWLdxHIOw6-Uj22kntiH0ur92u90B7D-dTucKfmeCKWJYuQ3O_nwkkxduw0Pcd9Btvx-UkcbIrjKe64QrbSAlWeLjuWCy3PD0vpFAvErQKjMPXntoG5Xd0uhx6SvzOPCr22xVmygR8M0tszvynKmDBmaXF0I7tgCcBE5eaLL0JkOXfbB3neVk60553auYDDFLiM0MFA12mjwF5Kt3kuv365q7qnJdl_GeaxW-lKm1ift91KCBqys8kR9t9ojAPO-e47fxJFgOU7lyJK6Knru5WMDsY6yFgNoHw_tcTMRZmbtQZ79_pPMaatewRtU5P1v3LpWiG26rHbEVyrYZeo82ZnDfe80yDbFOK_nSWyHvdZdB3QAdvVj2thd1sRRWYh-mcDjaqgIcYBwafF7M5Tz3Ds6wlDOJ40aFIiwzNX_KWiRldu0scRBidxoF8BASkml2zN1ZR1F3PDoptRhlKX9Zpna107efqziFN1xane1KDeGs35eGjnN3EepU2iyGIh34q80oy1q51HixbBj44YHEzB-mw16dFTyq7i7Ur6gJD4VXjf0vcH2ZJiUG8Bbzt7W7RpjwqE6Gizui3N3W9QOhvWOuZEQyvN7wQomJIahzZamxGCushSmjsKeUR9yga8HrSd2fHqbXXTxjOZbl5oUskGArIkSOrHhfXtVWq8oNhmPTlOiUafcEif3MwnrP0yn2dIqSTvEGnUkmkS7HtgM7LI64X2xU8vIXWuNYbGcaVF8qT6yTQbMJnTJgIb9Cu0Xe5H_kPa2kvvSTvKJ5fkA6UeKLBb9XJPhl17dyPTmsnw48xVAVArr48wVfq-4NiK9CdXjUBmJDae_yve3htsxN8eINWhhoSC8ueh-gRRRNh3q9_NCFVgvvLYP_GczB4lZG1pF_Bmc8qsWklGa4uCkerriA9YrH4CWs9vHmONpH7oKXf_y_fwHeCexw)) + +### roaring_bitmap + +`cuco::experimental::roaring_bitmap` implements a Roaring bitmap following the [Roaring bitmap format specification](https://github.com/RoaringBitmap/RoaringFormatSpec). + +#### Examples: +- [Host-bulk APIs](https://github.com/NVIDIA/cuCollections/blob/dev/examples/roaring_bitmap/host_bulk_example.cu) (see [live example in godbolt](https://godbolt.org/clientstate/eJy9WA1v2zYT_iv3qsAgN7aVpB_ZnI_NjdPNWF97sN0VQ1MIlETbhGVRI6k4XpD__h5JfSbK2nV75wCxRR7vnjs-dzzqzpFUSsYT6Qw-3jkscgZHXScmySojK-oMnDCLiNN1JM9EqJ-959cJPIdLnu4FW60VuGEHjg-PX8Hk1_FoPITL6eyX6Wy4GE8nfS1qxN-xkCaSRpAlERWg1hSGKQnxK5_pwq9UaBxw3D8EVwtcO_nctdM5NVr2PIMt2UPCFWSSohomYcliCvQ2pKkClkDIt2nMSBJS2DG1NqZyPQYO_JYr4YEiKE9wRYpPy7okEFVC15-1UunA83a7XZ8Y2H0uVl5shaX3bnx5NZlf9RB6uex9EmNgQdDfMybQ8WAPJEVkIQkQb0x2wAWQlaA4p7hGvhNMsWTVBcmXakcENXoiJpVgQaYawStwov91AQwfSTBwwzmM59cOvBnOx_Ou0fNhvPhp-n4BH4az2XCyGF_NYTrDzZqMxnqr8OktDCe_wc_jyagLFEOHpuhtKrQXCJXpsNLIxnBOaQPGkltYMqUhW7IQCgLBit9QkaBbkFKxZZZqCDIyemK2ZYooM_bIOWPKu06uk2csCeMsonAWZiH3BCcCNfoBU1uS9sNsffFIJlMsZmrvKUGYkv11ml481BQRT6rIC_FfRJcXT06yRLVPqn1KfWugIaDWIpPKi-gNOuLf0FBx0V-3icR8hYSI2yezhGHsJInrKupymvlyLxXdNpYvkQ-UNMcYbxnUvElWjSFrydjxnlsi_2ASbI0K_CCLNz69JUgFimG304FgdAkjusVNxGAoilsp9dbnKdXcLSSn1oLcjDnfZKlRDMNfxrIqFuPEZnZuCXaYL5xgmuw4vDjuoSKwygyRkPQUXr-sDYObcqFMniExt0R1YCn4VqMx-j_OLKQ3RvqtEZkjcz-5Os8lJvoK6Z8FfawlXkO2eKrWdDDDUy4ZRm1f0hoLRLgBZv3X7hbOop8orzKBfNdzIRcCI45jMouRpjAhWxrvu9plDKQyQksex3ynU8hs-MCY6MFH66wucjxTIktkP2CJmXRtlDpf45AXxDzwXh2dnJDoO0-DiIgiXquxzmMo_x6ORyCKTc-Z9vplhcPS4x_E8fql12quU5L4BzykKCzqPM7Pgnzrt2SDOZLq4ge90eX7y6k_mn6YvJsOR_5sOpyNJz_6i6v5YjRcDM-nE31IBFhhqSoTxdRGzP40xqzDsoHlKEH-wM90v8DfmMUB57Flo4u4BwOb8Ug7TNVv8mzxNav8lKg1gr9DvUAytLWiCdXZ7G_oXsI5fPzkdqB3AbY4DQaN6nZWmASjADT3jRF9gGCHgBVTG0cETPoSMfo3xZIu1GYzrLUvjn110SkUAXgeXGLpQg9_zygmmcFTHDgVHe6Ql_cFJ6pMKVT8YwScXQ1H_73qb6NneqinxwozxoU8IG1OGeinhbR2wW0Rgw1G-_AUv87g6FB_9O-Dc_NQiwsYdf00k2s_ILjDm06p-75hBBUbpZW2M-zY7O-Dg82f6XyBTP6s3pOa3m-_TG-bTlsYnyBYWzxdozSgK5a4na41QZPI7RTK74HGukn6ejK-fvkXyNhaEQwV_x9MxBJUcfGRaVv6PkdL497naanFLC1vzfZmcWx3G5-_K56_Ysc_Z2vYtHV0-DdsPRA5vD3OtVWyrSKvGiJPImY2adkDoEwn7rFW8SdwD2-_zRfAAbC_mRlmS780M-4qjmAXHvpESiqUqztobQbPk4gmyl8SFC5rfBd7uPw3bBGQPpbK4oXxKVDk17a6B3f3hX39pR_0NybFFE2ZTNHPNjfzNtaMug-Pqm4uxOVggFQnYm9tYaq7_9FSfcxtjkrR5cJLsyKkQsDZGbrwlqCYuXtpORzQww_t6DGzDiMR59hzX0xQzNB96caPVNn7qGR_0PIoNdr0CHLEKKsad_vbTD7ysVOFZ4gdYKirjumWt9hrY-1pnoFl9AqWVNcH07unLElo1EKaYK_w3A6y5ZIKt0RTMz6j2Hsbr3BjeW5dz5k44x5FrqA4RQVeFpUfEqnOwjURzy_cAosgOz_lRsbMu9ZcX9cx3CGkaMNwrjqMuaRuDUleevMbRel43tuXEbD0xWpPBdsif0mMCBq3kKpfaY67doe_FPZpbeNtt1Q_FlyCCS2xX44jnSJ4BOkXDkg5ltT2rVPSJG-0Go1Xi_cE7B5q5kr8tjfx_AZRJ0CzTOhW8KICYSuE4Z2OvuZyzdQbvLP08jtLzSOywrX5rcSi1_IPr-PWhHyyBnUrFMV03cvi7qSjpw1Zy-LJAOp1ps_FBX4lc17GQY_zpfvIaB1HDgzrQ6070LWPqf3dveVknvcNMzhhsl-3QluitZoe-hlbRngpNk190cvrPt4fjWdlhas346Ap5UdMIO7WVaelmzILQ_1Opvk5t21-e59UKj_AMtd-lSuKdaH9m69S-Fe12cOqqa39GucUFLFVHJHbKp5bGOT121Y0DBOJ0zUxIzmEB4W8tqGFxPdwCAM4wsln-nCsjJVHRvttrnW_8usassCwESsHwzJg3kg2bn7XTtUW5J9r5wuug52HCx8dVLl31p8kYkukqtN19PtRrJOiet_rJDdheHT8KjvCaQsLJ50eqjsPDw6OTqBHRLg-l1v_5BB6PSysCv8p3RxEvZhsA_OGOGZBTWcYhjEO3tiXujigmbZx7rvFPNboxjxWK-f- \ No newline at end of file diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 34e63fd28..9940c82b9 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -28,7 +28,7 @@ function(ConfigureBench BENCH_NAME) target_include_directories(${BENCH_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}") target_compile_options(${BENCH_NAME} PRIVATE --compiler-options=-Wall --compiler-options=-Wextra - --compiler-options=-Werror -Wno-deprecated-gpu-targets --expt-extended-lambda) + --compiler-options=-Werror -Wno-deprecated-gpu-targets --expt-extended-lambda -lineinfo) # Add GCC-specific warning suppression only for GCC if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") target_compile_options(${BENCH_NAME} PRIVATE -Xcompiler -Wno-subobject-linkage) @@ -104,3 +104,8 @@ ConfigureBench(HYPERLOGLOG_BENCH ConfigureBench(BLOOM_FILTER_BENCH bloom_filter/add_bench.cu bloom_filter/contains_bench.cu) + +################################################################################################### +# - roaring_bitmap benchmarks --------------------------------------------------------------------- +ConfigureBench(ROARING_BITMAP_BENCH + roaring_bitmap/contains_bench.cu) \ No newline at end of file diff --git a/benchmarks/roaring_bitmap/contains_bench.cu b/benchmarks/roaring_bitmap/contains_bench.cu new file mode 100644 index 000000000..20cb27a27 --- /dev/null +++ b/benchmarks/roaring_bitmap/contains_bench.cu @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include + +#include +#include +#include + +using namespace cuco::benchmark; // defaults +using namespace cuco::utility; // key_generator, distribution + +template +void roaring_bitmap_contains(nvbench::state& state, nvbench::type_list) +{ + auto const num_items = state.get_int64("NumInputs"); + auto const bitmap_file = state.get_string_or_default("BitmapFile", {}); + + std::ifstream file(bitmap_file, std::ios::binary); + if (!file.is_open()) { state.skip("Bitmap file not found"); } + + // Get file size + auto const file_size = std::filesystem::file_size(bitmap_file); + + thrust::universal_host_pinned_vector buffer(file_size); + + file.read(reinterpret_cast(thrust::raw_pointer_cast(buffer.data())), file_size); + file.close(); + + cuco::experimental::roaring_bitmap roaring_bitmap(thrust::raw_pointer_cast(buffer.data())); + + thrust::device_vector items(num_items); + + key_generator gen{}; + gen.generate(distribution::unique{}, items.begin(), items.end()); + + thrust::device_vector contained(items.size(), false); + + state.add_element_count(items.size()); + state.add_global_memory_reads(items.size(), "InputSize"); + + auto& summ = state.add_summary("BitmapSizeMB"); + summ.set_string("hint", "BitmapSize"); + summ.set_string("short_name", "BitmapSizeMB"); + summ.set_string("description", "Bitmap size in MB"); + summ.set_float64("value", static_cast(file_size) / (1024 * 1024)); + + state.exec([&](nvbench::launch& launch) { + roaring_bitmap.contains_async( + items.begin(), items.end(), contained.begin(), {launch.get_stream()}); + }); +} + +NVBENCH_BENCH_TYPES(roaring_bitmap_contains, + NVBENCH_TYPE_AXES(nvbench::type_list)) + .set_name("roaring_bitmap_contains") + .add_int64_power_of_two_axis("NumInputs", {32}) +// Default benchmark is only available if the Roaring bitmap testdata has been downloaded +#ifdef CUCO_ROARING_DATA_DIR + .add_string_axis("BitmapFile", {std::string(CUCO_ROARING_DATA_DIR) + "/bitmapwithruns.bin"}) +#endif + .set_max_noise(cuco::benchmark::defaults::MAX_NOISE); + +NVBENCH_BENCH_TYPES(roaring_bitmap_contains, + NVBENCH_TYPE_AXES(nvbench::type_list)) + .set_name("roaring_bitmap_contains") + .add_int64_power_of_two_axis("NumInputs", {31}) +// Default benchmark is only available if the Roaring bitmap testdata has been downloaded +#ifdef CUCO_ROARING_DATA_DIR + .add_string_axis("BitmapFile", {std::string(CUCO_ROARING_DATA_DIR) + "/portable_bitmap64.bin"}) +#endif + .set_max_noise(cuco::benchmark::defaults::MAX_NOISE); \ No newline at end of file diff --git a/cmake/roaring_testdata.cmake b/cmake/roaring_testdata.cmake new file mode 100644 index 000000000..168519866 --- /dev/null +++ b/cmake/roaring_testdata.cmake @@ -0,0 +1,41 @@ +# ============================================================================= +# Copyright (c) 2025, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Only act if enabled +if(NOT CUCO_DOWNLOAD_ROARING_TESTDATA) + return() +endif() + +set(CUCO_ROARING_DATA_DIR "${CMAKE_BINARY_DIR}/data/roaring_bitmap") + +file(MAKE_DIRECTORY "${CUCO_ROARING_DATA_DIR}") + +set(ROARING_FORMATSPEC_BASE "https://raw.githubusercontent.com/RoaringBitmap/RoaringFormatSpec/5177ad9") + +rapids_cmake_download_with_retry("${ROARING_FORMATSPEC_BASE}/testdata/bitmapwithoutruns.bin" + "${CUCO_ROARING_DATA_DIR}/bitmapwithoutruns.bin" + "d719ae2e0150a362ef7cf51c361527585891f01460b1a92bcfb6a7257282a442") + +rapids_cmake_download_with_retry("${ROARING_FORMATSPEC_BASE}/testdata/bitmapwithruns.bin" + "${CUCO_ROARING_DATA_DIR}/bitmapwithruns.bin" + "1f1909bfdd354fa2f0694fe88b8076833ca5383ad9fc3f68f2709c84a2ab70e3") + +rapids_cmake_download_with_retry("${ROARING_FORMATSPEC_BASE}/testdata64/portable_bitmap64.bin" + "${CUCO_ROARING_DATA_DIR}/portable_bitmap64.bin" + "b5a553a759167f5f9ccb3fa21552d943b4c73235635b753376f4faf62067d178") + +message(STATUS "Roaring Bitmap test data downloaded to: ${CUCO_ROARING_DATA_DIR}") + +# Define macro only when data is available +add_compile_definitions(CUCO_ROARING_DATA_DIR="${CUCO_ROARING_DATA_DIR}") \ No newline at end of file diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 68e35931f..3318c911e 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -52,3 +52,4 @@ ConfigureExample(STATIC_MULTIMAP_HOST_BULK_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/ ConfigureExample(HYPERLOGLOG_HOST_BULK_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/hyperloglog/host_bulk_example.cu") ConfigureExample(HYPERLOGLOG_DEVICE_REF_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/hyperloglog/device_ref_example.cu") ConfigureExample(BLOOM_FILTER_HOST_BULK_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/bloom_filter/host_bulk_example.cu") +ConfigureExample(ROARING_BITMAP_HOST_BULK_EXAMPLE "${CMAKE_CURRENT_SOURCE_DIR}/roaring_bitmap/host_bulk_example.cu") diff --git a/examples/roaring_bitmap/host_bulk_example.cu b/examples/roaring_bitmap/host_bulk_example.cu new file mode 100644 index 000000000..e70e6db3a --- /dev/null +++ b/examples/roaring_bitmap/host_bulk_example.cu @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2025 NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +/** + * @file host_bulk_example.cu + * @brief Demonstrates usage of the roaring_bitmap "bulk" lookup host APIs. + * + * In this example we load two 32-bit bitmaps and one 64-bit bitmap (portable format) from the + * [RoaringBitmapFormatSpec](https://github.com/RoaringBitmap/RoaringFormatSpec) repository and + * check if the bulk lookup API returns the correct results. Namely, we test the following files: + * - [bitmapwithoutruns.bin + * (32-bit)](https://github.com/RoaringBitmap/RoaringFormatSpec/blob/5177ad9/testdata/bitmapwithoutruns.bin) + * - [bitmapwithruns.bin + * (32-bit)](https://github.com/RoaringBitmap/RoaringFormatSpec/blob/5177ad9/testdata/bitmapwithruns.bin) + * - [portable_bitmap64.bin + * (64-bit)](https://github.com/RoaringBitmap/RoaringFormatSpec/blob/5177ad9/testdata64/portable_bitmap64.bin) + * + * @note This example requires the cmake option -DCUCO_DOWNLOAD_ROARING_TESTDATA=ON to be set. + * + */ + +template +bool check(std::string const& bitmap_file_path) +{ + auto generate_keys = []() -> thrust::device_vector { + if constexpr (cuda::std::is_same_v) { + // Create query keys for the bitmapwith{out}runs.bin files: + // https://github.com/RoaringBitmap/RoaringFormatSpec/blob/5177ad9/testdata/README.md#test-data + std::vector keys; + for (cuda::std::uint32_t k = 0; k < 100000; k += 1000) { + keys.push_back(k); + } + for (int k = 100000; k < 200000; ++k) { + keys.push_back(3 * k); + } + for (int k = 700000; k < 800000; ++k) { + keys.push_back(k); + } + return thrust::device_vector(keys.begin(), keys.end()); + } else if constexpr (cuda::std::is_same_v) { + // Create query keys for the portable_bitmap64.bin file: + // https://github.com/RoaringBitmap/RoaringFormatSpec/blob/5177ad9/testdata64/README.md#portable_bitmap64bin + std::vector keys; + for (cuda::std::uint64_t k = 0x00000ull; k < 0x09000ull; ++k) { + keys.push_back(k); + } + for (cuda::std::uint64_t k = 0x0A000ull; k < 0x10000ull; ++k) { + keys.push_back(k); + } + keys.push_back(0x20000ull); + keys.push_back(0x20005ull); + for (cuda::std::uint64_t i = 0; i < 0x10000ull; i += 2ull) { + keys.push_back(0x80000ull + i); + } + return thrust::device_vector(keys.begin(), keys.end()); + } else { + static_assert(cuco::dependent_false, "KeyType must be uint32_t or uint64_t"); + return {}; + } + }; + + // Open file + std::ifstream file(bitmap_file_path, std::ios::binary); + if (!file.is_open()) { + std::cerr << "Failed to open " << bitmap_file_path << std::endl; + return false; + } + + // Get file size + auto file_size = std::filesystem::file_size(bitmap_file_path); + + // Allocate host memory for the bitmap file + thrust::universal_host_pinned_vector buffer(file_size); + + // Read file into memory + file.read(reinterpret_cast(thrust::raw_pointer_cast(buffer.data())), file_size); + file.close(); + + // Create roaring bitmap from the file + cuco::experimental::roaring_bitmap roaring_bitmap( + thrust::raw_pointer_cast(buffer.data())); + + // Generate query keys (all should be contained in the bitmap) + auto keys = generate_keys(); + + // Create a vector to store the results + thrust::device_vector contained(keys.size(), false); + + // Bulk-lookup query keys against the bitmap + roaring_bitmap.contains(keys.begin(), keys.end(), contained.begin()); + + // Check if all the keys are contained in the bitmap + bool all_contained = thrust::all_of(contained.begin(), contained.end(), ::cuda::std::identity{}); + return all_contained; +} + +int main() +{ +#ifdef CUCO_ROARING_DATA_DIR + std::string const data_dir = CUCO_ROARING_DATA_DIR; + bool success = check(data_dir + "/bitmapwithoutruns.bin"); + success &= check(data_dir + "/bitmapwithruns.bin"); + success &= check(data_dir + "/portable_bitmap64.bin"); + + std::cout << "success: " << std::boolalpha << success << std::endl; + + return success ? 0 : 1; +#else + std::cerr << "This example requires CUCO_ROARING_DATA_DIR to be defined (build with cmake option " + "-DCUCO_DOWNLOAD_ROARING_TESTDATA=ON)" + << std::endl; + return 1; +#endif +} \ No newline at end of file diff --git a/include/cuco/detail/roaring_bitmap/roaring_bitmap.inl b/include/cuco/detail/roaring_bitmap/roaring_bitmap.inl new file mode 100644 index 000000000..ff8dc3d13 --- /dev/null +++ b/include/cuco/detail/roaring_bitmap/roaring_bitmap.inl @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2025 NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace cuco::experimental { + +template +roaring_bitmap::roaring_bitmap(cuda::std::byte const* bitmap, + Allocator const& alloc, + cuda::stream_ref stream) + : storage_{bitmap, alloc, stream} +{ +} + +template +template +void roaring_bitmap::contains(InputIt first, + InputIt last, + OutputIt output, + cuda::stream_ref stream) const +{ + ref_type{storage_.ref()}.contains(first, last, output, stream); +} + +template +template +void roaring_bitmap::contains_async(InputIt first, + InputIt last, + OutputIt output, + cuda::stream_ref stream) const noexcept +{ + ref_type{storage_.ref()}.contains_async(first, last, output, stream); +} + +template +cuda::std::size_t roaring_bitmap::size() const noexcept +{ + return ref_type{storage_.ref()}.size(); +} + +template +bool roaring_bitmap::empty() const noexcept +{ + return ref_type{storage_.ref()}.empty(); +} + +template +cuda::std::byte const* roaring_bitmap::data() const noexcept +{ + return ref_type{storage_.ref()}.data(); +} + +template +cuda::std::size_t roaring_bitmap::size_bytes() const noexcept +{ + return ref_type{storage_.ref()}.size_bytes(); +} + +template +typename roaring_bitmap::allocator_type roaring_bitmap::allocator() + const noexcept +{ + return storage_.allocator(); +} + +template +typename roaring_bitmap::ref_type roaring_bitmap::ref() const noexcept +{ + return ref_type{storage_.ref()}; +} +} // namespace cuco::experimental \ No newline at end of file diff --git a/include/cuco/detail/roaring_bitmap/roaring_bitmap_impl.cuh b/include/cuco/detail/roaring_bitmap/roaring_bitmap_impl.cuh new file mode 100644 index 000000000..7276dfae8 --- /dev/null +++ b/include/cuco/detail/roaring_bitmap/roaring_bitmap_impl.cuh @@ -0,0 +1,375 @@ +/* + * Copyright (c) 2025 NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace cuco::experimental::detail { + +// primary template +template +class roaring_bitmap_impl { + static_assert(cuco::dependent_false, "T must be either uint32_t or uint64_t"); +}; + +template <> +class roaring_bitmap_impl { + public: + using storage_ref_type = roaring_bitmap_storage_ref; + + static constexpr cuda::std::uint32_t binary_search_threshold = 8; // TODO determine optimal value + + __host__ __device__ roaring_bitmap_impl(storage_ref_type const& storage_ref) + : storage_ref_{storage_ref}, + offsets_aligned_{(reinterpret_cast( + storage_ref_.data() + storage_ref_.metadata().container_offsets)) % + sizeof(cuda::std::uint32_t) == + 0}, + aligned_16_{(reinterpret_cast(storage_ref_.data() + + storage_ref_.metadata().key_cards)) % + sizeof(cuda::std::uint16_t) == + 0} // if base address of key_cards is aligned, then all containers are aligned + { + } + + template + __host__ void contains(InputIt first, + InputIt last, + OutputIt contained, + cuda::stream_ref stream = {}) const + { + this->contains_async(first, last, contained, stream); + stream.wait(); + } + + template + __host__ void contains_async(InputIt first, + InputIt last, + OutputIt contained, + cuda::stream_ref stream = {}) const noexcept + { + if (this->empty()) { + cub::DeviceTransform::Transform( + thrust::constant_iterator(false), + contained, + cuda::std::distance(first, last), + cuda::proclaim_return_type([] __device__(auto /* dummy */) { return false; }), + stream.get()); + } else { + cub::DeviceTransform::Transform( + first, + contained, + cuda::std::distance(first, last), + cuda::proclaim_return_type( + [*this] __device__(auto key) { return this->contains(key); }), + stream.get()); + } + } + + __device__ bool contains(cuda::std::uint32_t value) const + { + if (storage_ref_.metadata().num_keys == 0) { return false; } + + if (aligned_16_) { + return this->dispatch_contains(value); + } else { + return this->dispatch_contains(value); + } + } + + template + __device__ bool dispatch_contains(cuda::std::uint32_t value) const + { + cuda::std::uint16_t const upper = value >> 16; + cuda::std::uint16_t const lower = value & 0xFFFF; + cuda::std::uint16_t key; + + if (storage_ref_.metadata().num_containers < binary_search_threshold) { +// linear search +#pragma unroll + for (cuda::std::uint32_t i = 0; i < storage_ref_.metadata().num_containers; i++) { + cuda::std::byte const* key_ptr = + storage_ref_.key_cards() + (i * 2) * sizeof(cuda::std::uint16_t); + if constexpr (Aligned) { + key = aligned_load(key_ptr); + } else { + key = misaligned_load(key_ptr); + } + if (key == upper) { return this->contains_container(lower, i); } + if (key > upper) { return false; } + } + } else { + // binary search + cuda::std::uint32_t left = 0; + cuda::std::uint32_t right = storage_ref_.metadata().num_containers; + while (left < right) { + cuda::std::uint32_t mid = left + (right - left) / 2; + cuda::std::byte const* key_ptr = + storage_ref_.key_cards() + (mid * 2) * sizeof(cuda::std::uint16_t); + if constexpr (Aligned) { + key = aligned_load(key_ptr); + } else { + key = misaligned_load(key_ptr); + } + + if (key == upper) { + return this->contains_container(lower, mid); + } else if (key < upper) { + left = mid + 1; + } else { + right = mid; + } + } + } + return false; + } + + [[nodiscard]] __host__ __device__ cuda::std::size_t size() const noexcept + { + return storage_ref_.metadata().num_keys; + } + + [[nodiscard]] __host__ __device__ bool empty() const noexcept { return this->size() == 0; } + + [[nodiscard]] __host__ __device__ cuda::std::byte const* data() const noexcept + { + return storage_ref_.data(); + } + + [[nodiscard]] __host__ __device__ cuda::std::size_t size_bytes() const noexcept + { + return storage_ref_.metadata().size_bytes; + } + + template + __device__ bool contains_container(cuda::std::uint16_t lower, cuda::std::uint32_t index) const + { + cuda::std::uint32_t offset; + cuda::std::byte const* offset_ptr = + storage_ref_.container_offsets() + index * sizeof(cuda::std::uint32_t); + if (offsets_aligned_) { + offset = aligned_load(offset_ptr); + } else { + offset = misaligned_load(offset_ptr); + } + cuda::std::byte const* container = storage_ref_.data() + offset; + if (storage_ref_.metadata().has_run and check_bit(storage_ref_.run_container_bitmap(), index)) { + return this->contains_run_container(container, lower); + } else { + cuda::std::uint32_t card; + cuda::std::byte const* card_ptr = + storage_ref_.key_cards() + (index * 2 + 1) * sizeof(cuda::std::uint16_t); + if constexpr (Aligned) { + card = 1u + aligned_load(card_ptr); + } else { + card = 1u + misaligned_load(card_ptr); + } + if (card <= storage_ref_type::metadata_type::max_array_container_card) { + return this->contains_array_container(container, lower, card); + } else { + return this->contains_bitset_container(container, lower); + } + } + } + + template + __device__ bool contains_array_container(cuda::std::byte const* container, + cuda::std::uint16_t lower, + cuda::std::uint32_t card) const + { + cuda::std::uint16_t elem; + // Use linear search for small arrays, binary search for larger ones + if (card < binary_search_threshold) { + for (cuda::std::uint32_t i = 0; i < card; i++) { + cuda::std::byte const* elem_ptr = container + i * sizeof(cuda::std::uint16_t); + if constexpr (Aligned) { + elem = aligned_load(elem_ptr); + } else { + elem = misaligned_load(elem_ptr); + } + if (elem == lower) { return true; } + } + return false; + } else { + cuda::std::uint32_t left = 0; + cuda::std::uint32_t right = card; + + while (left < right) { + cuda::std::uint32_t mid = left + (right - left) / 2; + cuda::std::byte const* elem_ptr = container + mid * sizeof(cuda::std::uint16_t); + if constexpr (Aligned) { + elem = aligned_load(elem_ptr); + } else { + elem = misaligned_load(elem_ptr); + } + if (elem == lower) { + return true; + } else if (elem < lower) { + left = mid + 1; + } else { + right = mid; + } + } + return false; + } + } + + __device__ bool contains_bitset_container(cuda::std::byte const* container, + cuda::std::uint16_t lower) const + { + return check_bit(container, lower); + } + + template + __device__ bool contains_run_container(cuda::std::byte const* container, + cuda::std::uint16_t lower) const + { + // TODO implement binary search + cuda::std::uint16_t num_runs; + if constexpr (Aligned) { + num_runs = aligned_load(container); + } else { + num_runs = misaligned_load(container); + } + + cuda::std::uint16_t start; + cuda::std::uint32_t end; + + for (cuda::std::uint32_t i = 0; i < num_runs; i++) { + // the first 16 bits of the run container denotes the number of runs + // followed by the sequence of runs as (start, end) U16 pairs + cuda::std::byte const* start_ptr = container + (i * 2 + 1) * sizeof(cuda::std::uint16_t); + // TODO load start+end in one instruction + if constexpr (Aligned) { + start = aligned_load(start_ptr); + end = static_cast(start) + + aligned_load(start_ptr + sizeof(cuda::std::uint16_t)); + } else { + start = misaligned_load(start_ptr); + end = static_cast(start) + + misaligned_load(start_ptr + sizeof(cuda::std::uint16_t)); + } + if (start <= lower && end >= lower) { return true; } + if (start > lower) { break; } + } + return false; + } + + storage_ref_type storage_ref_; + bool offsets_aligned_; + bool aligned_16_; +}; + +template <> +class roaring_bitmap_impl { + public: + using bucket_type = roaring_bitmap_impl; + using storage_ref_type = roaring_bitmap_storage_ref; + + __host__ __device__ roaring_bitmap_impl(storage_ref_type const& storage_ref) + : storage_ref_{storage_ref} + { + } + + template + __host__ void contains(InputIt first, + InputIt last, + OutputIt contained, + cuda::stream_ref stream = {}) const + { + this->contains_async(first, last, contained, stream); + stream.wait(); + } + + template + __host__ void contains_async(InputIt first, + InputIt last, + OutputIt contained, + cuda::stream_ref stream = {}) const noexcept + { + if (this->empty()) { + cub::DeviceTransform::Transform( + thrust::constant_iterator(false), + contained, + cuda::std::distance(first, last), + cuda::proclaim_return_type([] __device__(auto /* dummy */) { return false; }), + stream.get()); + } else { + cub::DeviceTransform::Transform( + first, + contained, + cuda::std::distance(first, last), + cuda::proclaim_return_type( + [*this] __device__(auto key) { return this->contains(key); }), + stream.get()); + } + } + + __device__ bool contains(cuda::std::uint64_t value) const + { + cuda::std::uint32_t bucket_key = value >> 32; + cuda::std::uint32_t bucket_value = value & 0xFFFFFFFF; + + // binary search in storage_ref_.buckets() + cuda::std::uint32_t left = 0; + cuda::std::uint32_t right = storage_ref_.metadata().num_buckets; + while (left < right) { + cuda::std::uint32_t mid = left + (right - left) / 2; + if (storage_ref_.buckets()[mid].first == bucket_key) { + return bucket_type{storage_ref_.buckets()[mid].second}.contains( + bucket_value); // TODO is constructing the ref in-place a bad idea? + } else if (storage_ref_.buckets()[mid].first < bucket_key) { + left = mid + 1; + } else { + right = mid; + } + } + return false; + } + + [[nodiscard]] __host__ __device__ cuda::std::size_t size() const noexcept + { + return storage_ref_.metadata().num_keys; + } + + [[nodiscard]] __host__ __device__ bool empty() const noexcept { return this->size() == 0; } + + [[nodiscard]] __host__ __device__ cuda::std::byte const* data() const noexcept + { + return storage_ref_.data(); + } + + [[nodiscard]] __host__ __device__ cuda::std::size_t size_bytes() const noexcept + { + return storage_ref_.metadata().size_bytes; + } + + storage_ref_type storage_ref_; +}; + +} // namespace cuco::experimental::detail \ No newline at end of file diff --git a/include/cuco/detail/roaring_bitmap/roaring_bitmap_ref.inl b/include/cuco/detail/roaring_bitmap/roaring_bitmap_ref.inl new file mode 100644 index 000000000..01738ac7f --- /dev/null +++ b/include/cuco/detail/roaring_bitmap/roaring_bitmap_ref.inl @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2025 NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include +#include + +namespace cuco::experimental { + +template +__host__ __device__ roaring_bitmap_ref::roaring_bitmap_ref(storage_ref_type const& storage_ref) + : impl_{storage_ref} +{ +} + +template +template > */> +__device__ roaring_bitmap_ref::roaring_bitmap_ref(cuda::std::byte const* bitmap) : impl_{bitmap} +{ +} + +template +template +__host__ void roaring_bitmap_ref::contains(InputIt first, + InputIt last, + OutputIt output, + cuda::stream_ref stream) const +{ + impl_.contains(first, last, output, stream); +} + +template +template +__host__ void roaring_bitmap_ref::contains_async(InputIt first, + InputIt last, + OutputIt output, + cuda::stream_ref stream) const noexcept +{ + impl_.contains_async(first, last, output, stream); +} + +template +__device__ bool roaring_bitmap_ref::contains(T value) const +{ + return impl_.contains(value); +} + +template +__host__ __device__ cuda::std::size_t roaring_bitmap_ref::size() const noexcept +{ + return impl_.size(); +} + +template +__host__ __device__ bool roaring_bitmap_ref::empty() const noexcept +{ + return impl_.empty(); +} + +template +__host__ __device__ cuda::std::byte const* roaring_bitmap_ref::data() const noexcept +{ + return impl_.data(); +} + +template +__host__ __device__ cuda::std::size_t roaring_bitmap_ref::size_bytes() const noexcept +{ + return impl_.size_bytes(); +} + +} // namespace cuco::experimental \ No newline at end of file diff --git a/include/cuco/detail/roaring_bitmap/roaring_bitmap_storage.cuh b/include/cuco/detail/roaring_bitmap/roaring_bitmap_storage.cuh new file mode 100644 index 000000000..4c33f5ee4 --- /dev/null +++ b/include/cuco/detail/roaring_bitmap/roaring_bitmap_storage.cuh @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2025 NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +namespace cuco::experimental::detail { + +template +struct roaring_bitmap_storage_ref { + static_assert(cuco::dependent_false, "T must be either uint32_t or uint64_t"); +}; + +template <> +class roaring_bitmap_storage_ref { + public: + using metadata_type = roaring_bitmap_metadata; + __host__ __device__ roaring_bitmap_storage_ref(cuda::std::byte const* bitmap, + metadata_type const& metadata) + : metadata_{metadata}, + data_{bitmap}, + run_container_bitmap_{bitmap + metadata.run_container_bitmap}, + key_cards_{bitmap + metadata.key_cards}, + container_offsets_{bitmap + metadata.container_offsets} + { + assert(metadata.valid); + } + + __device__ roaring_bitmap_storage_ref(cuda::std::byte const* bitmap) + : roaring_bitmap_storage_ref{bitmap, metadata_type{bitmap}} + { + } + + __host__ __device__ metadata_type const& metadata() const noexcept { return metadata_; } + + __host__ __device__ cuda::std::byte const* data() const noexcept { return data_; } + + __host__ __device__ cuda::std::size_t size_bytes() const noexcept { return metadata_.size_bytes; } + + __host__ __device__ cuda::std::byte const* run_container_bitmap() const noexcept + { + return run_container_bitmap_; + } + + __host__ __device__ cuda::std::byte const* key_cards() const noexcept { return key_cards_; } + + __host__ __device__ cuda::std::byte const* container_offsets() const noexcept + { + return container_offsets_; + } + + private: + metadata_type metadata_; + cuda::std::byte const* data_; + cuda::std::byte const* run_container_bitmap_; + cuda::std::byte const* key_cards_; + cuda::std::byte const* container_offsets_; +}; + +template <> +class roaring_bitmap_storage_ref { + public: + using metadata_type = roaring_bitmap_metadata; + + __host__ __device__ roaring_bitmap_storage_ref( + cuda::std::byte const* bitmap, + metadata_type const& metadata, + cuda::std::pair>* buckets) + : metadata_{metadata}, data_{bitmap}, buckets_{buckets} + { + } + + __host__ __device__ metadata_type const& metadata() const noexcept { return metadata_; } + + __host__ __device__ cuda::std::byte const* data() const noexcept { return data_; } + + __host__ __device__ cuda::std::size_t size_bytes() const noexcept { return metadata_.size_bytes; } + + __host__ __device__ + cuda::std::pair>* + buckets() const noexcept + { + return buckets_; + } + + private: + metadata_type metadata_; + cuda::std::byte const* data_; + cuda::std::pair>* buckets_; +}; + +template +struct roaring_bitmap_storage { + static_assert(cuco::dependent_false, "T must be either uint32_t or uint64_t"); +}; + +template +class roaring_bitmap_storage { + public: + using allocator_type = + typename std::allocator_traits::template rebind_alloc; + using ref_type = roaring_bitmap_storage_ref; + + roaring_bitmap_storage(roaring_bitmap_storage const& other) = default; + roaring_bitmap_storage(roaring_bitmap_storage&& other) = default; + roaring_bitmap_storage& operator=(roaring_bitmap_storage const& other) = default; + roaring_bitmap_storage& operator=(roaring_bitmap_storage&& other) = default; + + ~roaring_bitmap_storage() = default; + + roaring_bitmap_storage(cuda::std::byte const* bitmap, + Allocator const& alloc, + cuda::stream_ref stream) + : allocator_{alloc}, + metadata_{bitmap}, + data_{allocator_.allocate(metadata_.size_bytes), + cuco::detail::custom_deleter{metadata_.size_bytes, + allocator_}}, + ref_{data_.get(), metadata_} + { + CUCO_CUDA_TRY(cudaMemcpyAsync( + data_.get(), bitmap, metadata_.size_bytes, cudaMemcpyHostToDevice, stream.get())); + } + + ref_type ref() const noexcept { return ref_; } + + private: + allocator_type allocator_; + typename ref_type::metadata_type metadata_; + std::unique_ptr> + data_; + ref_type ref_; +}; + +template +class roaring_bitmap_storage { + public: + using allocator_type = + typename std::allocator_traits::template rebind_alloc; + using ref_type = roaring_bitmap_storage_ref; + using bucket_ref_type = roaring_bitmap_storage_ref; + using bucket_allocator_type = typename std::allocator_traits::template rebind_alloc< + cuda::std::pair>; + + roaring_bitmap_storage(roaring_bitmap_storage const& other) = default; + roaring_bitmap_storage(roaring_bitmap_storage&& other) = default; + roaring_bitmap_storage& operator=(roaring_bitmap_storage const& other) = default; + roaring_bitmap_storage& operator=(roaring_bitmap_storage&& other) = default; + + ~roaring_bitmap_storage() = default; + + roaring_bitmap_storage(cuda::std::byte const* bitmap, + Allocator const& alloc, + cuda::stream_ref stream) + : allocator_{alloc}, + bucket_allocator_{alloc}, + bucket_metadata_{}, + buckets_h_{}, + metadata_{ + [bitmap](std::vector& bucket_metadata) { + return typename ref_type::metadata_type{bitmap, bucket_metadata}; + }(bucket_metadata_)}, + data_{allocator_.allocate(metadata_.size_bytes), + cuco::detail::custom_deleter{metadata_.size_bytes, + allocator_}}, + buckets_{bucket_allocator_.allocate(metadata_.num_buckets), + cuco::detail::custom_deleter{ + metadata_.num_buckets, bucket_allocator_}}, + ref_{data_.get(), metadata_, buckets_.get()} + { + assert(metadata_.valid); + buckets_h_.reserve(bucket_metadata_.size()); + for (auto const& meta : bucket_metadata_) { + buckets_h_.emplace_back(meta.key, + bucket_ref_type{data_.get() + meta.byte_offset, meta.metadata}); + } + CUCO_CUDA_TRY(cudaMemcpyAsync( + data_.get(), bitmap, metadata_.size_bytes, cudaMemcpyHostToDevice, stream.get())); + CUCO_CUDA_TRY(cudaMemcpyAsync( + buckets_.get(), + buckets_h_.data(), + metadata_.num_buckets * sizeof(cuda::std::pair), + cudaMemcpyHostToDevice, + stream.get())); + } + + ref_type ref() const noexcept { return ref_; } + + private: + allocator_type allocator_; + bucket_allocator_type bucket_allocator_; + std::vector bucket_metadata_; + std::vector> buckets_h_; + typename ref_type::metadata_type metadata_; + std::unique_ptr> + data_; + std::unique_ptr, + cuco::detail::custom_deleter> + buckets_; + ref_type ref_; +}; + +} // namespace cuco::experimental::detail \ No newline at end of file diff --git a/include/cuco/detail/roaring_bitmap/util.cuh b/include/cuco/detail/roaring_bitmap/util.cuh new file mode 100644 index 000000000..1807b471d --- /dev/null +++ b/include/cuco/detail/roaring_bitmap/util.cuh @@ -0,0 +1,240 @@ +/* + * Copyright (c) 2025 NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include +#include +#include + +#include +#include + +namespace cuco::experimental::detail { + +template +__host__ __device__ __forceinline__ T aligned_load(cuda::std::byte const* ptr) +{ + return *reinterpret_cast(cuda::std::assume_aligned(ptr)); +} + +template +__host__ __device__ __forceinline__ T misaligned_load(cuda::std::byte const* ptr) +{ + T value; + cuda::std::memcpy(&value, ptr, sizeof(T)); + return value; +} + +__host__ __device__ __forceinline__ bool check_bit(cuda::std::byte const* bitmap, + cuda::std::uint32_t index) +{ + // check if the bit at index is set + return static_cast(bitmap[index / 8]) & + (cuda::std::uint8_t(1) << (index % 8)); +} + +template +struct roaring_bitmap_metadata { + static_assert(cuco::dependent_false, "T must be either uint32_t or uint64_t"); +}; + +template <> +struct roaring_bitmap_metadata { + static constexpr cuda::std::uint32_t max_array_container_card = 4096; + + cuda::std::size_t size_bytes = 0; + cuda::std::size_t num_keys = 0; + cuda::std::uint32_t run_container_bitmap = 0; + cuda::std::uint32_t key_cards = 0; + cuda::std::uint32_t container_offsets = 0; + cuda::std::int32_t num_containers = 0; + bool has_run = false; + bool valid = false; + + __host__ __device__ roaring_bitmap_metadata(cuda::std::byte const* bitmap) + { + constexpr cuda::std::uint32_t serial_cookie_no_runcontainer = 12346; + constexpr cuda::std::uint32_t serial_cookie = 12347; + // constexpr cuda::std::uint32_t frozen_cookie = 13766; // not implemented + constexpr cuda::std::int32_t no_offset_threshold = 4; + constexpr cuda::std::int32_t max_containers = 1 << 16; + constexpr cuda::std::uint32_t cookie_mask = 0xFFFF; + constexpr cuda::std::uint32_t cookie_shift = 16; + constexpr cuda::std::uint32_t bitset_container_bytes = 8192; + + cuda::std::byte const* buf = bitmap; + + cuda::std::uint32_t cookie; + cuda::std::memcpy(&cookie, buf, sizeof(cuda::std::uint32_t)); + buf += sizeof(cuda::std::uint32_t); + if ((cookie & cookie_mask) != serial_cookie && cookie != serial_cookie_no_runcontainer) { + valid = false; + NV_IF_TARGET( + NV_IS_HOST, + CUCO_FAIL( + "Invalid bitmap format: cookie type invalid or not supported");) // TODO device error + // handling + return; + } + + if ((cookie & cookie_mask) == serial_cookie) + // upper 16 bits of cookie are the number of containers - 1 + num_containers = (cookie >> cookie_shift) + 1; + else { + // following 4 bytes are the number of containers + cuda::std::memcpy(&num_containers, buf, sizeof(cuda::std::uint32_t)); + buf += sizeof(cuda::std::uint32_t); + } + if (num_containers < 0 or num_containers > max_containers) { + valid = false; + NV_IF_TARGET( + NV_IS_HOST, + CUCO_FAIL( + "Invalid bitmap format: num_containers out of range");) // TODO device error handling + return; + } + + has_run = (cookie & cookie_mask) == serial_cookie; + if (has_run) { + cuda::std::size_t s = (num_containers + 7) / 8; // ceil bytes to store run container bitmap + run_container_bitmap = cuda::std::distance(bitmap, buf); + buf += s; + } + + key_cards = cuda::std::distance(bitmap, buf); + // if the current address is aligned to 2 bytes, then all containers are aligned to at least 2 + // bytes + bool const aligned_16 = (reinterpret_cast(bitmap + key_cards) % + sizeof(cuda::std::uint16_t)) == 0; + buf += num_containers * 2 * sizeof(cuda::std::uint16_t); + + if ((!has_run) || (num_containers >= no_offset_threshold)) { + container_offsets = cuda::std::distance(bitmap, buf); + buf += num_containers * sizeof(cuda::std::uint32_t); + } else { + valid = false; + NV_IF_TARGET( + NV_IS_HOST, + CUCO_FAIL("Invalid bitmap format: not implemented");) // TODO device error handling + return; + } + + cuda::std::uint32_t card = 0; + for (cuda::std::int32_t i = 0; i < num_containers; i++) { + cuda::std::byte const* card_ptr = + bitmap + key_cards + (i * 2 + 1) * sizeof(cuda::std::uint16_t); + if (aligned_16) { + card = 1u + aligned_load(card_ptr); + } else { + card = 1u + misaligned_load(card_ptr); + } + num_keys += card; + } + + // find end of roaring bitmap (re-use card from last container) + cuda::std::byte const* end = + bitmap + misaligned_load( + bitmap + container_offsets + (num_containers - 1) * sizeof(cuda::std::uint32_t)); + if (has_run and check_bit(bitmap + run_container_bitmap, num_containers - 1)) { + cuda::std::uint16_t const num_runs = misaligned_load(end); + end += sizeof(cuda::std::uint16_t) + num_runs * 2 * sizeof(cuda::std::uint16_t); + } else { + if (card <= max_array_container_card) { + end += card * sizeof(cuda::std::uint16_t); + } else { + end += bitset_container_bytes; // fixed size bitset container + } + } + + size_bytes = static_cast(cuda::std::distance(bitmap, end)); + valid = true; + } +}; + +template <> +struct roaring_bitmap_metadata { + cuda::std::size_t num_buckets = 0; + cuda::std::size_t size_bytes = 0; + cuda::std::size_t num_keys = 0; + bool valid = false; + + struct bucket_metadata { + cuda::std::size_t byte_offset; + cuda::std::uint32_t key; + roaring_bitmap_metadata metadata; + + bucket_metadata(cuda::std::size_t offset, + cuda::std::uint32_t k, + roaring_bitmap_metadata const& meta) + : byte_offset{offset}, key{k}, metadata{meta} + { + } + }; + + __host__ roaring_bitmap_metadata(cuda::std::byte const* bitmap, + std::vector& bucket_metadata) + { + cuda::std::size_t byte_offset = 0; + cuda::std::byte const* bitmap_ptr = bitmap; + cuda::std::memcpy(&num_buckets, bitmap_ptr, sizeof(cuda::std::uint64_t)); + byte_offset += sizeof(cuda::std::uint64_t); // skip num_buckets + + bucket_metadata.clear(); + bucket_metadata.reserve(num_buckets); + + for (cuda::std::size_t i = 0; i < num_buckets; ++i) { + cuda::std::uint32_t bucket_key; + cuda::std::memcpy(&bucket_key, bitmap_ptr + byte_offset, sizeof(cuda::std::uint32_t)); + byte_offset += sizeof(cuda::std::uint32_t); // skip bucket key + roaring_bitmap_metadata bucket_meta{bitmap_ptr + byte_offset}; + if (!bucket_meta.valid) { + valid = false; + return; + } + bucket_metadata.emplace_back(byte_offset, bucket_key, bucket_meta); + num_keys += bucket_meta.num_keys; + byte_offset += bucket_meta.size_bytes; // skip bucket + } + size_bytes = byte_offset; + valid = true; + } + + __host__ __device__ roaring_bitmap_metadata(cuda::std::byte const* bitmap) + { + cuda::std::size_t byte_offset = 0; + cuda::std::byte const* bitmap_ptr = bitmap; + cuda::std::memcpy(&num_buckets, bitmap_ptr, sizeof(cuda::std::uint64_t)); + byte_offset += sizeof(cuda::std::uint64_t); // skip num_buckets + + for (cuda::std::size_t i = 0; i < num_buckets; ++i) { + byte_offset += sizeof(cuda::std::uint32_t); // skip bucket key + roaring_bitmap_metadata bucket_meta{bitmap_ptr + byte_offset}; + if (!bucket_meta.valid) { + valid = false; + return; + } + num_keys += bucket_meta.num_keys; + byte_offset += bucket_meta.size_bytes; // skip bucket + } + size_bytes = byte_offset; + valid = true; + } +}; +} // namespace cuco::experimental::detail \ No newline at end of file diff --git a/include/cuco/roaring_bitmap.cuh b/include/cuco/roaring_bitmap.cuh new file mode 100644 index 000000000..e7c083bf3 --- /dev/null +++ b/include/cuco/roaring_bitmap.cuh @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2025 NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include +#include + +namespace cuco::experimental { + +/** + * @brief GPU-accelerated container that owns a serialized Roaring bitmap. + * + * The `roaring_bitmap` provides host-side bulk membership queries over a bitmap stored in the + * [Roaring bitmap format specification](https://github.com/RoaringBitmap/RoaringFormatSpec). + * The serialized bytes are copied to device-accessible storage upon construction, and queries are + * executed on the GPU. + * + * In addition to bulk host APIs such as `contains`/`contains_async`, this container exposes a + * non-owning reference object via `ref()` that can be used for device-side per-thread queries. + * + * @tparam T Key type. Must be `cuda::std::uint32_t` or `cuda::std::uint64_t`. + * @tparam Allocator Allocator type used to manage device-accessible storage for the serialized + * bytes. + */ +template > +class roaring_bitmap { + public: + using value_type = T; ///< Key type + using storage_type = detail::roaring_bitmap_storage; ///< Storage implementation + using allocator_type = typename storage_type::allocator_type; ///< Allocator type + using ref_type = roaring_bitmap_ref; ///< Non-owning reference type + + /** + * @brief Constructs a `roaring_bitmap` by copying the serialized bytes to device-accessible + * storage. + * + * @param bitmap Pointer to the beginning of the serialized bitmap in host memory + * @param alloc Allocator used to allocate device-accessible storage + * @param stream CUDA stream used for device memory operations during construction + */ + roaring_bitmap(cuda::std::byte const* bitmap, + Allocator const& alloc = {}, + cuda::stream_ref stream = {}); + + roaring_bitmap(roaring_bitmap const& other) = default; ///< Copy constructor + roaring_bitmap(roaring_bitmap&& other) = default; ///< Move constructor + roaring_bitmap& operator=(roaring_bitmap const& other) = default; ///< Copy assignment + roaring_bitmap& operator=(roaring_bitmap&& other) = default; ///< Move assignment + + ~roaring_bitmap() = default; ///< Destructor + + /** + * @brief Bulk membership query for keys in `[first, last)`. + * + * @note This function synchronizes the given stream. For asynchronous execution use + * `contains_async`. + * + * @tparam InputIt Device-accessible random access input iterator of keys convertible to `T` + * @tparam OutputIt Device-accessible random access output iterator whose `value_type` is + * constructible from `bool` + * + * @param first Beginning of the sequence of keys + * @param last End of the sequence of keys + * @param contained Output iterator where results are written; `true` iff the corresponding key + * is present in the bitmap + * @param stream CUDA stream used for device memory operations and kernel launches + */ + template + void contains(InputIt first, + InputIt last, + OutputIt contained, + cuda::stream_ref stream = {}) const; + + /** + * @brief Asynchronously performs a bulk membership query for keys in `[first, last)`. + * + * @tparam InputIt Device-accessible random access input iterator of keys convertible to `T` + * @tparam OutputIt Device-accessible random access output iterator to `bool` + * + * @param first Beginning of the sequence of keys + * @param last End of the sequence of keys + * @param contained Output iterator where results are written; `true` iff the corresponding key + * is present in the bitmap + * @param stream CUDA stream used for device memory operations and kernel launches + */ + template + void contains_async(InputIt first, + InputIt last, + OutputIt contained, + cuda::stream_ref stream = {}) const noexcept; + + /** + * @brief Number of keys stored in the bitmap. + * + * @return Count of keys in the bitmap + */ + [[nodiscard]] cuda::std::size_t size() const noexcept; + + /** + * @brief Checks whether the bitmap contains no keys. + * + * @return `true` iff `size() == 0` + */ + [[nodiscard]] bool empty() const noexcept; + + /** + * @brief Returns a pointer to the beginning of the serialized bitmap bytes in device-accessible + * storage. + * + * @return Pointer to the serialized storage + */ + [[nodiscard]] cuda::std::byte const* data() const noexcept; + + /** + * @brief Size in bytes of the serialized bitmap storage. + * + * @return Number of bytes occupied by the serialized bitmap + */ + [[nodiscard]] cuda::std::size_t size_bytes() const noexcept; + + /** + * @brief Returns the allocator used to manage device-accessible storage. + * + * @return Allocator instance + */ + [[nodiscard]] allocator_type allocator() const noexcept; + + /** + * @brief Returns a non-owning reference to the underlying bitmap suitable for device-side use. + * + * The returned reference type provides device functions such as `contains(T)` for per-thread + * membership testing. + * + * @return Non-owning reference to the underlying bitmap + */ + [[nodiscard]] ref_type ref() const noexcept; + + private: + storage_type storage_; ///< Storage type +}; + +} // namespace cuco::experimental + +#include \ No newline at end of file diff --git a/include/cuco/roaring_bitmap_ref.cuh b/include/cuco/roaring_bitmap_ref.cuh new file mode 100644 index 000000000..071640a3b --- /dev/null +++ b/include/cuco/roaring_bitmap_ref.cuh @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2025 NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include + +namespace cuco::experimental { + +/** + * @brief Non-owning reference to a Roaring bitmap stored in its serialized format. + * + * A `roaring_bitmap_ref` provides device and host APIs to query membership against a bitmap that + * is laid out according to the [Roaring bitmap format + * specification](https://github.com/RoaringBitmap/RoaringFormatSpec). The object does not own the + * underlying storage; it simply provides algorithms over the referenced bytes. + * + * @note The reference reads directly from the serialized representation without deserializing. + * It supports 32-bit and 64-bit key types. For 32-bit bitmaps the layout follows the + * "Standard 32-bit Roaring Bitmap" format; for 64-bit bitmaps, the "portable" format is + * supported. + * + * @tparam T Key type stored in the bitmap. Must be `cuda::std::uint32_t` or `cuda::std::uint64_t`. + */ +template +class roaring_bitmap_ref { + using impl_type = detail::roaring_bitmap_impl; + + public: + using value_type = T; ///< Key type stored in the bitmap + using storage_ref_type = typename impl_type::storage_ref_type; ///< Implementation storage ref + + /** + * @brief Constructs a non-owning reference from an implementation-specific storage reference. + * + * @param storage_ref Reference to the underlying serialized bitmap storage + */ + __host__ __device__ roaring_bitmap_ref(storage_ref_type const& storage_ref); + + /** + * @brief Constructs a device-side reference from a raw pointer to a 32-bit Roaring bitmap. + * + * @note This constructor is only available when `T == cuda::std::uint32_t` and can be used in + * device code to create a lightweight view over device-resident serialized bytes. + * + * @param bitmap Pointer to the beginning of the serialized bitmap in device memory + */ + template >> + __device__ roaring_bitmap_ref(cuda::std::byte const* bitmap); + + /** + * @brief Bulk membership query for keys in `[first, last)`. + * + * @note This function synchronizes the given stream. For asynchronous execution use + * `contains_async`. + * + * @tparam InputIt Device-accessible random access input iterator of keys convertible to `T` + * @tparam OutputIt Device-accessible random access output iterator to `bool` + * + * @param first Beginning of the sequence of keys + * @param last End of the sequence of keys + * @param contained Output iterator where results are written; `true` iff the corresponding key + * is present in the bitmap + * @param stream CUDA stream used for device memory operations and kernel launches + */ + template + __host__ void contains(InputIt first, + InputIt last, + OutputIt contained, + cuda::stream_ref stream = {}) const; + + /** + * @brief Asynchronously performs a bulk membership query for keys in `[first, last)`. + * + * @tparam InputIt Device-accessible random access input iterator of keys convertible to `T` + * @tparam OutputIt Device-accessible random access output iterator to `bool` + * + * @param first Beginning of the sequence of keys + * @param last End of the sequence of keys + * @param contained Output iterator where results are written; `true` iff the corresponding key + * is present in the bitmap + * @param stream CUDA stream used for device memory operations and kernel launches + */ + template + __host__ void contains_async(InputIt first, + InputIt last, + OutputIt contained, + cuda::stream_ref stream = {}) const noexcept; + + /** + * @brief Device-side membership query for a single key. + * + * @param value Key to test for membership + * + * @return `true` iff `value` is contained in the bitmap + */ + __device__ bool contains(T value) const; + + /** + * @brief Number of keys stored in the bitmap. + * + * @return Count of keys in the bitmap + */ + [[nodiscard]] __host__ __device__ cuda::std::size_t size() const noexcept; + + /** + * @brief Checks whether the bitmap contains no keys. + * + * @return `true` iff `size() == 0` + */ + [[nodiscard]] __host__ __device__ bool empty() const noexcept; + + /** + * @brief Returns a pointer to the beginning of the serialized bitmap bytes. + * + * @return Pointer to the serialized storage + */ + [[nodiscard]] __host__ __device__ cuda::std::byte const* data() const noexcept; + + /** + * @brief Size in bytes of the serialized bitmap storage. + * + * @return Number of bytes occupied by the serialized bitmap + */ + [[nodiscard]] __host__ __device__ cuda::std::size_t size_bytes() const noexcept; + + private: + impl_type impl_; +}; + +} // namespace cuco::experimental + +#include \ No newline at end of file diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 21828b360..23258d445 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -152,3 +152,8 @@ ConfigureTest(BLOOM_FILTER_TEST bloom_filter/unique_sequence_test.cu bloom_filter/arrow_policy_test.cu bloom_filter/variable_cg_test.cu) + +################################################################################################### +# - roaring_bitmap --------------------------------------------------------------------------------- +ConfigureTest(ROARING_BITMAP_TEST + roaring_bitmap/contains_test.cu) diff --git a/tests/roaring_bitmap/contains_test.cu b/tests/roaring_bitmap/contains_test.cu new file mode 100644 index 000000000..42e0db3c8 --- /dev/null +++ b/tests/roaring_bitmap/contains_test.cu @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2025 NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +namespace { +template +bool check(std::string const& bitmap_file_path) +{ + auto generate_keys = []() -> thrust::device_vector { + if constexpr (cuda::std::is_same_v) { + std::vector keys; + for (cuda::std::uint32_t k = 0; k < 100000; k += 1000) { + keys.push_back(k); + } + for (int k = 100000; k < 200000; ++k) { + keys.push_back(3 * k); + } + for (int k = 700000; k < 800000; ++k) { + keys.push_back(k); + } + return thrust::device_vector(keys.begin(), keys.end()); + } else if constexpr (cuda::std::is_same_v) { + std::vector keys; + for (cuda::std::uint64_t k = 0x00000ull; k < 0x09000ull; ++k) { + keys.push_back(k); + } + for (cuda::std::uint64_t k = 0x0A000ull; k < 0x10000ull; ++k) { + keys.push_back(k); + } + keys.push_back(0x20000ull); + keys.push_back(0x20005ull); + for (cuda::std::uint64_t i = 0; i < 0x10000ull; i += 2ull) { + keys.push_back(0x80000ull + i); + } + return thrust::device_vector(keys.begin(), keys.end()); + } else { + static_assert(cuco::dependent_false, "KeyType must be uint32_t or uint64_t"); + return {}; + } + }; + + std::ifstream file(bitmap_file_path, std::ios::binary); + if (!file.is_open()) { return false; } + + auto file_size = std::filesystem::file_size(bitmap_file_path); + + thrust::universal_host_pinned_vector buffer(file_size); + + file.read(reinterpret_cast(thrust::raw_pointer_cast(buffer.data())), file_size); + file.close(); + + cuco::experimental::roaring_bitmap roaring_bitmap( + thrust::raw_pointer_cast(buffer.data())); + + auto keys = generate_keys(); + thrust::device_vector contained(keys.size(), false); + + roaring_bitmap.contains(keys.begin(), keys.end(), contained.begin()); + + bool const all_contained = + thrust::all_of(contained.begin(), contained.end(), ::cuda::std::identity{}); + return all_contained; +} +} // namespace + +TEST_CASE("roaring_bitmap bulk contains from RoaringFormatSpec testdata", "[roaring_bitmap]") +{ +#ifndef CUCO_ROARING_DATA_DIR + SKIP( + "CUCO_ROARING_DATA_DIR is not defined. Configure with -DCUCO_DOWNLOAD_ROARING_TESTDATA=ON to " + "run this test."); +#else + std::string const data_dir = CUCO_ROARING_DATA_DIR; + + SECTION("32-bit: bitmapwithoutruns.bin") + { + std::string const path = data_dir + "/bitmapwithoutruns.bin"; + if (!std::ifstream(path).good()) { + std::string const msg = std::string("Missing file: ") + path; + SKIP(msg.c_str()); + } + REQUIRE(check(path)); + } + + SECTION("32-bit: bitmapwithruns.bin") + { + std::string const path = data_dir + "/bitmapwithruns.bin"; + if (!std::ifstream(path).good()) { + std::string const msg = std::string("Missing file: ") + path; + SKIP(msg.c_str()); + } + REQUIRE(check(path)); + } + + SECTION("64-bit: portable_bitmap64.bin") + { + std::string const path = data_dir + "/portable_bitmap64.bin"; + if (!std::ifstream(path).good()) { + std::string const msg = std::string("Missing file: ") + path; + SKIP(msg.c_str()); + } + REQUIRE(check(path)); + } +#endif +} \ No newline at end of file