-
Notifications
You must be signed in to change notification settings - Fork 1.5k
feat(cudf): Add cuDF based OrderBy operator #12735
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
cf55f95
2c03cbf
e846132
5ee96b3
d8feb54
46f0a94
47913ae
5effbf2
0e43fc1
9dc09e1
e0ccfbd
d00db4f
5accb50
5fe2575
c9b2c1a
5a75b9e
3f4ca09
6f7d72e
2d679a4
757bce9
436d721
5538888
71624fa
6ccdb57
ab399f2
327f718
5754b3c
538398e
7570d56
e633371
86738ed
7b18528
7f723db
bee2494
5575205
01f1aaf
4e0476d
652825f
51abf30
13a4a87
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,95 @@ | ||
| # Copyright (c) Facebook, Inc. and its affiliates. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| include_guard(GLOBAL) | ||
|
|
||
| # 3.30.4 is the minimum version required by cudf | ||
| cmake_minimum_required(VERSION 3.30.4) | ||
|
|
||
| set(VELOX_rapids_cmake_VERSION 25.04) | ||
| set(VELOX_rapids_cmake_BUILD_SHA256_CHECKSUM | ||
| 458c14eaff9000067b32d65c8c914f4521090ede7690e16eb57035ce731386db) | ||
| set(VELOX_rapids_cmake_SOURCE_URL | ||
| "https://github.com/rapidsai/rapids-cmake/archive/7828fc8ff2e9f4fa86099f3c844505c2f47ac672.tar.gz" | ||
| ) | ||
| velox_resolve_dependency_url(rapids_cmake) | ||
|
|
||
| set(VELOX_rmm_VERSION 25.04) | ||
| set(VELOX_rmm_BUILD_SHA256_CHECKSUM | ||
| 294905094213a2d1fd8e024500359ff871bc52f913a3fbaca3514727c49f62de) | ||
| set(VELOX_rmm_SOURCE_URL | ||
| "https://github.com/rapidsai/rmm/archive/d8b7dacdeda302d2e37313c02d14ef5e1d1e98ea.tar.gz" | ||
| ) | ||
| velox_resolve_dependency_url(rmm) | ||
|
|
||
| set(VELOX_kvikio_VERSION 25.04) | ||
| set(VELOX_kvikio_BUILD_SHA256_CHECKSUM | ||
| 4a0b15295d0a397433930bf9a309e4ad2361b25dc7a7b3e6a35d0c9419d0cb62) | ||
| set(VELOX_kvikio_SOURCE_URL | ||
| "https://github.com/rapidsai/kvikio/archive/5c710f37236bda76e447e929e17b1efbc6c632c3.tar.gz" | ||
| ) | ||
| velox_resolve_dependency_url(kvikio) | ||
|
|
||
| set(VELOX_cudf_VERSION 25.04) | ||
| set(VELOX_cudf_BUILD_SHA256_CHECKSUM | ||
| e5a1900dfaf23dab2c5808afa17a2d04fa867d2892ecec1cb37908f3b73715c2) | ||
| set(VELOX_cudf_SOURCE_URL | ||
| "https://github.com/rapidsai/cudf/archive/4c1c99011da2c23856244e05adda78ba66697105.tar.gz" | ||
| ) | ||
| velox_resolve_dependency_url(cudf) | ||
|
|
||
| # Use block so we don't leak variables | ||
| block(SCOPE_FOR VARIABLES) | ||
| # Setup libcudf build to not have testing components | ||
| set(BUILD_TESTS OFF) | ||
| set(CUDF_BUILD_TESTUTIL OFF) | ||
| set(BUILD_SHARED_LIBS ON) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we support the static cudf library? In Gluten, we always link the static library, otherwise, we need to install cudf in Gluten |
||
|
|
||
| FetchContent_Declare( | ||
| rapids-cmake | ||
| URL ${VELOX_rapids_cmake_SOURCE_URL} | ||
| URL_HASH ${VELOX_rapids_cmake_BUILD_SHA256_CHECKSUM} | ||
| UPDATE_DISCONNECTED 1) | ||
|
|
||
| FetchContent_Declare( | ||
| rmm | ||
| URL ${VELOX_rmm_SOURCE_URL} | ||
| URL_HASH ${VELOX_rmm_BUILD_SHA256_CHECKSUM} | ||
| UPDATE_DISCONNECTED 1) | ||
|
|
||
| FetchContent_Declare( | ||
| kvikio | ||
| URL ${VELOX_kvikio_SOURCE_URL} | ||
| URL_HASH ${VELOX_kvikio_BUILD_SHA256_CHECKSUM} | ||
| SOURCE_SUBDIR cpp | ||
| UPDATE_DISCONNECTED 1) | ||
|
|
||
| FetchContent_Declare( | ||
| cudf | ||
| URL ${VELOX_cudf_SOURCE_URL} | ||
| URL_HASH ${VELOX_cudf_BUILD_SHA256_CHECKSUM} | ||
| SOURCE_SUBDIR cpp | ||
| UPDATE_DISCONNECTED 1) | ||
|
|
||
| FetchContent_MakeAvailable(cudf) | ||
|
|
||
| # cudf sets all warnings as errors, and therefore fails to compile with velox | ||
| # expanded set of warnings. We selectively disable problematic warnings just for | ||
|
assignUser marked this conversation as resolved.
|
||
| # cudf | ||
| target_compile_options( | ||
| cudf PRIVATE -Wno-non-virtual-dtor -Wno-missing-field-initializers | ||
| -Wno-deprecated-copy) | ||
|
|
||
| unset(BUILD_SHARED_LIBS) | ||
| endblock() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -238,14 +238,16 @@ find_package(OpenSSL REQUIRED) | |
|
|
||
| if(VELOX_ENABLE_CCACHE | ||
| AND NOT CMAKE_C_COMPILER_LAUNCHER | ||
| AND NOT CMAKE_CXX_COMPILER_LAUNCHER) | ||
| AND NOT CMAKE_CXX_COMPILER_LAUNCHER | ||
| AND NOT CMAKE_CUDA_COMPILER_LAUNCHER) | ||
|
|
||
| find_program(CCACHE_FOUND ccache) | ||
|
|
||
| if(CCACHE_FOUND) | ||
| message(STATUS "Using ccache: ${CCACHE_FOUND}") | ||
| set(CMAKE_C_COMPILER_LAUNCHER ${CCACHE_FOUND}) | ||
| set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_FOUND}) | ||
| set(CMAKE_CUDA_COMPILER_LAUNCHER ${CCACHE_FOUND}) | ||
| # keep comments as they might matter to the compiler | ||
| set(ENV{CCACHE_COMMENTS} "1") | ||
| endif() | ||
|
|
@@ -384,7 +386,7 @@ if(ENABLE_ALL_WARNINGS) | |
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra ${KNOWN_WARNINGS}") | ||
| endif() | ||
|
|
||
| if(${VELOX_ENABLE_GPU}) | ||
| if(VELOX_ENABLE_GPU) | ||
|
assignUser marked this conversation as resolved.
|
||
| enable_language(CUDA) | ||
| # Determine CUDA_ARCHITECTURES automatically. | ||
| cmake_policy(SET CMP0104 NEW) | ||
|
|
@@ -396,6 +398,19 @@ if(${VELOX_ENABLE_GPU}) | |
| add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:-G>") | ||
| endif() | ||
| find_package(CUDAToolkit REQUIRED) | ||
| if(VELOX_ENABLE_CUDF) | ||
| foreach(arch ${CMAKE_CUDA_ARCHITECTURES}) | ||
| if(arch LESS 70) | ||
| message( | ||
| FATAL_ERROR | ||
| "CUDA architecture ${arch} is below 70. CUDF requires Volta (SM 70) or newer GPUs." | ||
| ) | ||
| endif() | ||
| endforeach() | ||
| set(VELOX_ENABLE_ARROW ON) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. cuDF itself does not need Arrow (cuDF uses nanoarrow), but the Velox-cuDF interop requires Arrow functionality in Velox. |
||
| velox_set_source(cudf) | ||
| velox_resolve_dependency(cudf) | ||
| endif() | ||
| endif() | ||
|
|
||
| # Set after the test of the CUDA compiler. Otherwise, the test fails with | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -68,7 +68,7 @@ function install_build_prerequisites { | |
| dnf_install ninja-build cmake ccache gcc-toolset-12 git wget which | ||
| dnf_install autoconf automake python3-devel pip libtool | ||
|
|
||
| pip install cmake==3.28.3 | ||
| pip install cmake==3.30.4 | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. cuDF and its dependencies require CMake 3.30.4. That CMake version shipped with a fix for finding some CUDA Toolkit components that cuDF and its dependencies use.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you add a |
||
|
|
||
| if [[ ${USE_CLANG} != "false" ]]; then | ||
| install_clang15 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,27 @@ | ||
| BasedOnStyle: InheritParentConfig | ||
| IncludeBlocks: Regroup | ||
| IncludeCategories: | ||
| - Regex: '^"velox/experimental/' # velox/experimental includes | ||
| Priority: 0 | ||
| - Regex: '^"' # quoted includes | ||
| Priority: 1 | ||
| - Regex: '^<(benchmarks|tests)/' # benchmark includes | ||
| Priority: 2 | ||
| - Regex: '^<cudf_test/' # cuDF includes | ||
| Priority: 3 | ||
| - Regex: '^<cudf/' # cuDF includes | ||
| Priority: 4 | ||
| - Regex: '^<(nvtext|cudf_kafka)' # other libcudf includes | ||
| Priority: 5 | ||
| - Regex: '^<(cugraph|cuml|cuspatial|raft|kvikio)' # Other RAPIDS includes | ||
| Priority: 6 | ||
| - Regex: '^<rmm/' # RMM includes | ||
| Priority: 7 | ||
| - Regex: '^<(thrust|cub|cuda)/' # CCCL includes | ||
| Priority: 8 | ||
| - Regex: '^<(cooperative_groups|cuco|cuda.h|cuda_runtime|device_types|math_constants|nvtx3)' # CUDA includes | ||
| Priority: 8 | ||
| - Regex: '^<.*\..*' # other system includes (e.g. with a '.') | ||
| Priority: 9 | ||
| - Regex: '^<[^.]+' # STL includes (no '.') | ||
| Priority: 10 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,54 @@ | ||
| --- | ||
| Checks: > | ||
| readability-identifier-naming, | ||
| modernize-use-nullptr, | ||
| modernize-use-using | ||
|
|
||
| HeaderFilterRegex: '.*' | ||
|
|
||
| WarningsAsErrors: '' | ||
|
|
||
| CheckOptions: | ||
| # Naming conventions as explicitly stated in CODING_STYLE.md | ||
| - key: readability-identifier-naming.ClassCase | ||
| value: CamelCase | ||
| - key: readability-identifier-naming.StructCase | ||
| value: CamelCase | ||
| - key: readability-identifier-naming.EnumCase | ||
| value: CamelCase | ||
| - key: readability-identifier-naming.TypeAliasCase | ||
| value: CamelCase | ||
| - key: readability-identifier-naming.TypeTemplateParameterCase | ||
| value: CamelCase | ||
| - key: readability-identifier-naming.FunctionCase | ||
| value: camelBack | ||
| - key: readability-identifier-naming.VariableCase | ||
| value: camelBack | ||
| - key: readability-identifier-naming.ParameterCase | ||
| value: camelBack | ||
| - key: readability-identifier-naming.PrivateMemberCase | ||
| value: camelBack | ||
| - key: readability-identifier-naming.PrivateMemberSuffix | ||
| value: _ | ||
| - key: readability-identifier-naming.ProtectedMemberCase | ||
| value: camelBack | ||
| - key: readability-identifier-naming.ProtectedMemberSuffix | ||
| value: _ | ||
| - key: readability-identifier-naming.MacroDefinitionCase | ||
| value: UPPER_CASE | ||
| - key: readability-identifier-naming.NamespaceCase | ||
| value: lower_case | ||
| - key: readability-identifier-naming.StaticConstantPrefix | ||
| value: k | ||
| - key: readability-identifier-naming.EnumConstantCase | ||
| value: CamelCase | ||
| - key: readability-identifier-naming.EnumConstantPrefix | ||
| value: k | ||
|
|
||
| # Use nullptr instead of NULL or 0 | ||
| - key: modernize-use-nullptr.NullMacros | ||
| value: 'NULL' | ||
|
|
||
| # Prefer enum class over enum | ||
| - key: modernize-use-using.IgnoreUsingStdAllocator | ||
| value: 1 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| # Copyright (c) Facebook, Inc. and its affiliates. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| add_subdirectory(exec) | ||
|
|
||
| if(VELOX_BUILD_TESTING) | ||
| add_subdirectory(tests) | ||
| endif() |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,32 @@ | ||
| # Copyright (c) Facebook, Inc. and its affiliates. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| add_library( | ||
|
assignUser marked this conversation as resolved.
|
||
| velox_cudf_exec | ||
| CudfConversion.cpp | ||
| CudfOrderBy.cpp | ||
| ToCudf.cpp | ||
| Utilities.cpp | ||
| VeloxCudfInterop.cpp) | ||
|
|
||
| target_link_libraries( | ||
| velox_cudf_exec | ||
| cudf::cudf | ||
| arrow | ||
| velox_arrow_bridge | ||
| velox_exception | ||
| velox_common_base | ||
| velox_exec) | ||
|
assignUser marked this conversation as resolved.
|
||
|
|
||
| target_compile_options(velox_cudf_exec PRIVATE -Wno-missing-field-initializers) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We are pinning this to specific commits of cuDF and its dependencies to avoid breakage from any final changes in the 25.04 release. Once the RAPIDS 25.04 release is out (currently targeting April 9-10), we can remove a lot of this logic for rapids-cmake, rmm, and kvikio -- and just pin cuDF to the stable release.