Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
cf55f95
Add minimal code that demonstrates cudf integration
devavret Mar 11, 2025
2c03cbf
Remove all other operators from ToCudf
devavret Mar 11, 2025
e846132
Make it compile
devavret Mar 11, 2025
5ee96b3
remove unused interop code
devavret Mar 11, 2025
d8feb54
Merge branch 'main-rapids' into cmake-upstreaming
devavret Mar 12, 2025
46f0a94
Pin rmm and kvikio
devavret Mar 12, 2025
47913ae
Remove manually adding fmt
devavret Mar 12, 2025
5effbf2
Remove debug prints
devavret Mar 12, 2025
0e43fc1
Re-enable some warnings
devavret Mar 12, 2025
9dc09e1
update cmake on centos
devavret Mar 13, 2025
e0ccfbd
Add our team to codeowners
devavret Mar 13, 2025
d00db4f
Check off some todos
devavret Mar 13, 2025
5accb50
remove commented tests
devavret Mar 13, 2025
5fe2575
Ignore known warnings just for cudf_exec target
devavret Mar 17, 2025
c9b2c1a
Misc review fixes:
devavret Mar 17, 2025
5a75b9e
Misc review changes requested by @bdice
devavret Mar 19, 2025
3f4ca09
Remove only cudf adapter
devavret Mar 19, 2025
6f7d72e
Add clang format to our subdir
devavret Mar 19, 2025
2d679a4
Fix style
devavret Mar 20, 2025
757bce9
Fix naming
devavret Mar 21, 2025
436d721
Fix more style
devavret Mar 21, 2025
5538888
Error out when cuda architecture is less than 70
devavret Mar 21, 2025
71624fa
Misc. review changes
devavret Mar 25, 2025
6ccdb57
Misc review changes
devavret Mar 25, 2025
ab399f2
Prevent merging vectors whose total size exceeds vector_size_t max
devavret Mar 25, 2025
327f718
Misc review changes
devavret Mar 25, 2025
5754b3c
replace env variable with gflags and CudfOptions
karthikeyann Mar 26, 2025
538398e
replace gpu batch size env variable with a QueryConfig entry
karthikeyann Mar 26, 2025
7570d56
Add back optional debug printing of plans
devavret Mar 26, 2025
e633371
Misc review changes
devavret Mar 26, 2025
86738ed
Add clang tidy
devavret Mar 31, 2025
7b18528
remove aacidental flags added to all of velox
devavret Mar 31, 2025
7f723db
cmake min required
devavret Apr 7, 2025
bee2494
Make sure last operator from task produces velox RowVector
devavret Apr 7, 2025
5575205
Cudf driver adapter without storing plan nodes
devavret Apr 8, 2025
01f1aaf
re-fix conversion to RowVector in sink
devavret Apr 8, 2025
4e0476d
Merge branch 'main' into cmake-upstreaming
bdice Apr 9, 2025
652825f
remove fixDictionaryIndices
karthikeyann Apr 9, 2025
51abf30
Merge branch 'main-meta' into cmake-upstreaming
devavret Apr 11, 2025
13a4a87
Remove codeowners for now
devavret Apr 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 95 additions & 0 deletions CMake/resolve_dependency_modules/cudf.cmake
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are pinning this to specific commits of cuDF and its dependencies to avoid breakage from any final changes in the 25.04 release. Once the RAPIDS 25.04 release is out (currently targeting April 9-10), we can remove a lot of this logic for rapids-cmake, rmm, and kvikio -- and just pin cuDF to the stable release.

Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

include_guard(GLOBAL)

# 3.30.4 is the minimum version required by cudf
cmake_minimum_required(VERSION 3.30.4)

set(VELOX_rapids_cmake_VERSION 25.04)
set(VELOX_rapids_cmake_BUILD_SHA256_CHECKSUM
458c14eaff9000067b32d65c8c914f4521090ede7690e16eb57035ce731386db)
set(VELOX_rapids_cmake_SOURCE_URL
"https://github.com/rapidsai/rapids-cmake/archive/7828fc8ff2e9f4fa86099f3c844505c2f47ac672.tar.gz"
)
velox_resolve_dependency_url(rapids_cmake)

set(VELOX_rmm_VERSION 25.04)
set(VELOX_rmm_BUILD_SHA256_CHECKSUM
294905094213a2d1fd8e024500359ff871bc52f913a3fbaca3514727c49f62de)
set(VELOX_rmm_SOURCE_URL
"https://github.com/rapidsai/rmm/archive/d8b7dacdeda302d2e37313c02d14ef5e1d1e98ea.tar.gz"
)
velox_resolve_dependency_url(rmm)

set(VELOX_kvikio_VERSION 25.04)
set(VELOX_kvikio_BUILD_SHA256_CHECKSUM
4a0b15295d0a397433930bf9a309e4ad2361b25dc7a7b3e6a35d0c9419d0cb62)
set(VELOX_kvikio_SOURCE_URL
"https://github.com/rapidsai/kvikio/archive/5c710f37236bda76e447e929e17b1efbc6c632c3.tar.gz"
)
velox_resolve_dependency_url(kvikio)

set(VELOX_cudf_VERSION 25.04)
set(VELOX_cudf_BUILD_SHA256_CHECKSUM
e5a1900dfaf23dab2c5808afa17a2d04fa867d2892ecec1cb37908f3b73715c2)
set(VELOX_cudf_SOURCE_URL
"https://github.com/rapidsai/cudf/archive/4c1c99011da2c23856244e05adda78ba66697105.tar.gz"
)
velox_resolve_dependency_url(cudf)

# Use block so we don't leak variables
block(SCOPE_FOR VARIABLES)
# Setup libcudf build to not have testing components
set(BUILD_TESTS OFF)
set(CUDF_BUILD_TESTUTIL OFF)
set(BUILD_SHARED_LIBS ON)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we support the static cudf library? In Gluten, we always link the static library, otherwise, we need to install cudf in Gluten


FetchContent_Declare(
rapids-cmake
URL ${VELOX_rapids_cmake_SOURCE_URL}
URL_HASH ${VELOX_rapids_cmake_BUILD_SHA256_CHECKSUM}
UPDATE_DISCONNECTED 1)

FetchContent_Declare(
rmm
URL ${VELOX_rmm_SOURCE_URL}
URL_HASH ${VELOX_rmm_BUILD_SHA256_CHECKSUM}
UPDATE_DISCONNECTED 1)

FetchContent_Declare(
kvikio
URL ${VELOX_kvikio_SOURCE_URL}
URL_HASH ${VELOX_kvikio_BUILD_SHA256_CHECKSUM}
SOURCE_SUBDIR cpp
UPDATE_DISCONNECTED 1)

FetchContent_Declare(
cudf
URL ${VELOX_cudf_SOURCE_URL}
URL_HASH ${VELOX_cudf_BUILD_SHA256_CHECKSUM}
SOURCE_SUBDIR cpp
UPDATE_DISCONNECTED 1)

FetchContent_MakeAvailable(cudf)

# cudf sets all warnings as errors, and therefore fails to compile with velox
# expanded set of warnings. We selectively disable problematic warnings just for
Comment thread
assignUser marked this conversation as resolved.
# cudf
target_compile_options(
cudf PRIVATE -Wno-non-virtual-dtor -Wno-missing-field-initializers
-Wno-deprecated-copy)

unset(BUILD_SHARED_LIBS)
endblock()
19 changes: 17 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -238,14 +238,16 @@ find_package(OpenSSL REQUIRED)

if(VELOX_ENABLE_CCACHE
AND NOT CMAKE_C_COMPILER_LAUNCHER
AND NOT CMAKE_CXX_COMPILER_LAUNCHER)
AND NOT CMAKE_CXX_COMPILER_LAUNCHER
AND NOT CMAKE_CUDA_COMPILER_LAUNCHER)

find_program(CCACHE_FOUND ccache)

if(CCACHE_FOUND)
message(STATUS "Using ccache: ${CCACHE_FOUND}")
set(CMAKE_C_COMPILER_LAUNCHER ${CCACHE_FOUND})
set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_FOUND})
set(CMAKE_CUDA_COMPILER_LAUNCHER ${CCACHE_FOUND})
# keep comments as they might matter to the compiler
set(ENV{CCACHE_COMMENTS} "1")
endif()
Expand Down Expand Up @@ -384,7 +386,7 @@ if(ENABLE_ALL_WARNINGS)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra ${KNOWN_WARNINGS}")
endif()

if(${VELOX_ENABLE_GPU})
if(VELOX_ENABLE_GPU)
Comment thread
assignUser marked this conversation as resolved.
enable_language(CUDA)
# Determine CUDA_ARCHITECTURES automatically.
cmake_policy(SET CMP0104 NEW)
Expand All @@ -396,6 +398,19 @@ if(${VELOX_ENABLE_GPU})
add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:-G>")
endif()
find_package(CUDAToolkit REQUIRED)
if(VELOX_ENABLE_CUDF)
foreach(arch ${CMAKE_CUDA_ARCHITECTURES})
if(arch LESS 70)
message(
FATAL_ERROR
"CUDA architecture ${arch} is below 70. CUDF requires Volta (SM 70) or newer GPUs."
)
endif()
endforeach()
set(VELOX_ENABLE_ARROW ON)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cuDF itself does not need Arrow (cuDF uses nanoarrow), but the Velox-cuDF interop requires Arrow functionality in Velox.

velox_set_source(cudf)
velox_resolve_dependency(cudf)
endif()
endif()

# Set after the test of the CUDA compiler. Otherwise, the test fails with
Expand Down
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ cmake: #: Use CMake to create a Makefile build system
${EXTRA_CMAKE_FLAGS}

cmake-gpu:
$(MAKE) EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DVELOX_ENABLE_GPU=ON" cmake
$(MAKE) EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DVELOX_ENABLE_GPU=ON -DVELOX_ENABLE_CUDF=ON" cmake

build: #: Build the software based in BUILD_DIR and BUILD_TYPE variables
cmake --build $(BUILD_BASE_DIR)/$(BUILD_DIR) -j $(NUM_THREADS)
Expand All @@ -125,11 +125,11 @@ minimal: #: Minimal build
$(MAKE) build BUILD_DIR=release

gpu: #: Build with GPU support
$(MAKE) cmake BUILD_DIR=release BUILD_TYPE=release EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DVELOX_ENABLE_GPU=ON"
$(MAKE) cmake BUILD_DIR=release BUILD_TYPE=release EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DVELOX_ENABLE_GPU=ON -DVELOX_ENABLE_CUDF=ON"
$(MAKE) build BUILD_DIR=release

gpu_debug: #: Build with debugging symbols and GPU support
$(MAKE) cmake BUILD_DIR=debug BUILD_TYPE=debug EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DVELOX_ENABLE_GPU=ON"
$(MAKE) cmake BUILD_DIR=debug BUILD_TYPE=debug EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DVELOX_ENABLE_GPU=ON -DVELOX_ENABLE_CUDF=ON"
$(MAKE) build BUILD_DIR=debug

dwio: #: Minimal build with dwio enabled.
Expand Down
2 changes: 1 addition & 1 deletion scripts/setup-centos9.sh
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ function install_build_prerequisites {
dnf_install ninja-build cmake ccache gcc-toolset-12 git wget which
dnf_install autoconf automake python3-devel pip libtool

pip install cmake==3.28.3
pip install cmake==3.30.4
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cuDF and its dependencies require CMake 3.30.4. That CMake version shipped with a fix for finding some CUDA Toolkit components that cuDF and its dependencies use.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add a cmake_minimum_required to the top of cudf.cmake with a comment?


if [[ ${USE_CLANG} != "false" ]]; then
install_clang15
Expand Down
3 changes: 3 additions & 0 deletions velox/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ if(${VELOX_ENABLE_DUCKDB})
endif()

if(${VELOX_ENABLE_GPU})
if(${VELOX_ENABLE_CUDF})
add_subdirectory(experimental/cudf)
endif()
add_subdirectory(experimental/gpu)
add_subdirectory(experimental/wave)
add_subdirectory(external/jitify)
Expand Down
27 changes: 27 additions & 0 deletions velox/experimental/cudf/.clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
BasedOnStyle: InheritParentConfig
IncludeBlocks: Regroup
IncludeCategories:
- Regex: '^"velox/experimental/' # velox/experimental includes
Priority: 0
- Regex: '^"' # quoted includes
Priority: 1
- Regex: '^<(benchmarks|tests)/' # benchmark includes
Priority: 2
- Regex: '^<cudf_test/' # cuDF includes
Priority: 3
- Regex: '^<cudf/' # cuDF includes
Priority: 4
- Regex: '^<(nvtext|cudf_kafka)' # other libcudf includes
Priority: 5
- Regex: '^<(cugraph|cuml|cuspatial|raft|kvikio)' # Other RAPIDS includes
Priority: 6
- Regex: '^<rmm/' # RMM includes
Priority: 7
- Regex: '^<(thrust|cub|cuda)/' # CCCL includes
Priority: 8
- Regex: '^<(cooperative_groups|cuco|cuda.h|cuda_runtime|device_types|math_constants|nvtx3)' # CUDA includes
Priority: 8
- Regex: '^<.*\..*' # other system includes (e.g. with a '.')
Priority: 9
- Regex: '^<[^.]+' # STL includes (no '.')
Priority: 10
54 changes: 54 additions & 0 deletions velox/experimental/cudf/.clang-tidy
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
---
Checks: >
readability-identifier-naming,
modernize-use-nullptr,
modernize-use-using

HeaderFilterRegex: '.*'

WarningsAsErrors: ''

CheckOptions:
# Naming conventions as explicitly stated in CODING_STYLE.md
- key: readability-identifier-naming.ClassCase
value: CamelCase
- key: readability-identifier-naming.StructCase
value: CamelCase
- key: readability-identifier-naming.EnumCase
value: CamelCase
- key: readability-identifier-naming.TypeAliasCase
value: CamelCase
- key: readability-identifier-naming.TypeTemplateParameterCase
value: CamelCase
- key: readability-identifier-naming.FunctionCase
value: camelBack
- key: readability-identifier-naming.VariableCase
value: camelBack
- key: readability-identifier-naming.ParameterCase
value: camelBack
- key: readability-identifier-naming.PrivateMemberCase
value: camelBack
- key: readability-identifier-naming.PrivateMemberSuffix
value: _
- key: readability-identifier-naming.ProtectedMemberCase
value: camelBack
- key: readability-identifier-naming.ProtectedMemberSuffix
value: _
- key: readability-identifier-naming.MacroDefinitionCase
value: UPPER_CASE
- key: readability-identifier-naming.NamespaceCase
value: lower_case
- key: readability-identifier-naming.StaticConstantPrefix
value: k
- key: readability-identifier-naming.EnumConstantCase
value: CamelCase
- key: readability-identifier-naming.EnumConstantPrefix
value: k

# Use nullptr instead of NULL or 0
- key: modernize-use-nullptr.NullMacros
value: 'NULL'

# Prefer enum class over enum
- key: modernize-use-using.IgnoreUsingStdAllocator
value: 1
19 changes: 19 additions & 0 deletions velox/experimental/cudf/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

add_subdirectory(exec)

if(VELOX_BUILD_TESTING)
add_subdirectory(tests)
endif()
32 changes: 32 additions & 0 deletions velox/experimental/cudf/exec/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

add_library(
Comment thread
assignUser marked this conversation as resolved.
velox_cudf_exec
CudfConversion.cpp
CudfOrderBy.cpp
ToCudf.cpp
Utilities.cpp
VeloxCudfInterop.cpp)

target_link_libraries(
velox_cudf_exec
cudf::cudf
arrow
velox_arrow_bridge
velox_exception
velox_common_base
velox_exec)
Comment thread
assignUser marked this conversation as resolved.

target_compile_options(velox_cudf_exec PRIVATE -Wno-missing-field-initializers)
Loading
Loading