-
Notifications
You must be signed in to change notification settings - Fork 205
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adds ceil_div #1825
Merged
Merged
Adds ceil_div #1825
Changes from all commits
Commits
Show all changes
12 commits
Select commit
Hold shift + click to select a range
ec45117
Add ceil_div
gonzalobg 75d0081
Fix style and formatting issues
miscco a810b52
Actually use the correct implementation
miscco 087666e
Actually return the original type to suppport 128 bit types
miscco 4416d2e
Fix documentation
miscco 17129fa
We do not round to multiples
miscco d17d727
Try to avoid modulus when calculating the remainder
miscco a0d4eb4
No byte for you
miscco 3602029
And no includes
miscco bd1599c
No constexpr for you sir
miscco 5c123b9
Missing the morning coffee
miscco 9f94e47
Fix incorrect define
miscco File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,3 +15,4 @@ Extended API | |
extended_api/functional | ||
extended_api/streams | ||
extended_api/memory_resource | ||
extended_api/math |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
.. _libcudacxx-extended-api-math: | ||
|
||
Math | ||
===== | ||
|
||
.. code:: cuda | ||
|
||
template <typename T> | ||
[[nodiscard]] __host__ __device__ constexpr T ceil_div(T a, T b) noexcept; | ||
|
||
ceil_div | ||
--------- | ||
|
||
- _Requires_: `is_integral_v<T>` is true. | ||
- _Preconditions_: `a >= 0` is true and `b > 0` is true. | ||
- _Returns_: divides `a` by `b`. If `a` is not a multiple of `b` rounds the result up to the next integer value. | ||
|
||
.. note:: | ||
|
||
The function is only constexpr from C++14 onwards | ||
|
||
**Example**: This API is very useful for determining the _number of thread blocks_ required to process a fixed amount of work, given a fixed number of threads per block: | ||
|
||
.. code:: cuda | ||
|
||
#include <vector> | ||
#include <cuda/cmath> | ||
|
||
__global__ void vscale(int n, float s, float *x) { | ||
int i = blockIdx.x * blockDim.x + threadIdx.x; | ||
if (i < n) x[i] *= s; | ||
} | ||
|
||
int main() { | ||
const int n = 100000; | ||
const float s = 2.f; | ||
std::vector<float> x(n, 1.f); | ||
|
||
// Given a fixed number of threads per block... | ||
constexpr int threads_per_block = 256; | ||
|
||
// ...dividing some "n" by "threads_per_block" may lead to a remainder, | ||
// requiring the kernel to be launched with an extra thread block to handle it. | ||
const int thread_blocks = cuda::ceil_div(n, threads_per_block); | ||
|
||
vscale<<<thread_blocks, threads_per_block>>>(n, s, x.data()); | ||
cudaDeviceSynchronize(); | ||
|
||
return 0; | ||
} | ||
|
||
`See it on Godbolt TODO` | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Part of libcu++, the C++ Standard Library for your entire system, | ||
// under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef _CUDA_CMATH | ||
#define _CUDA_CMATH | ||
|
||
#include <cuda/std/detail/__config> | ||
|
||
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC) | ||
# pragma GCC system_header | ||
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG) | ||
# pragma clang system_header | ||
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC) | ||
# pragma system_header | ||
#endif // no system header | ||
|
||
#include <cuda/std/__type_traits/enable_if.h> | ||
#include <cuda/std/__type_traits/is_integral.h> | ||
#include <cuda/std/cmath> | ||
#include <cuda/std/detail/libcxx/include/__debug> | ||
|
||
_LIBCUDACXX_BEGIN_NAMESPACE_CUDA | ||
|
||
template <class _Tp, _CUDA_VSTD::__enable_if_t<_CCCL_TRAIT(_CUDA_VSTD::is_integral, _Tp), int> = 0> | ||
_CCCL_NODISCARD _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14 _Tp ceil_div(const _Tp __a, const _Tp __b) noexcept | ||
{ | ||
_LIBCUDACXX_DEBUG_ASSERT(__a >= _Tp(0), "cuda::ceil_div: a must be non negative"); | ||
_LIBCUDACXX_DEBUG_ASSERT(__b > _Tp(0), "cuda::ceil_div: b must be positive"); | ||
const _Tp __res = static_cast<_Tp>(__a / __b); | ||
return static_cast<_Tp>(__res + (__res * __b != __a)); | ||
} | ||
|
||
_LIBCUDACXX_END_NAMESPACE_CUDA | ||
|
||
#endif // _CUDA_CMATH |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Part of the libcu++ Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include <cuda/cmath> | ||
#include <cuda/std/cassert> | ||
#include <cuda/std/cstddef> | ||
#include <cuda/std/limits> | ||
#include <cuda/std/utility> | ||
|
||
#include "test_macros.h" | ||
|
||
#if !defined(TEST_COMPILER_NVRTC) | ||
# include <cstdint> | ||
#endif // !TEST_COMPILER_NVRTC | ||
|
||
template <class T> | ||
__host__ __device__ TEST_CONSTEXPR_CXX14 void test() | ||
{ | ||
constexpr T maxv = cuda::std::numeric_limits<T>::max(); | ||
|
||
assert(cuda::ceil_div(T(0), T(1)) == T(0)); | ||
assert(cuda::ceil_div(T(1), T(1)) == T(1)); | ||
assert(cuda::ceil_div(T(126), T(64)) == T(2)); | ||
|
||
// ensure that we are resilient against overflow | ||
assert(cuda::ceil_div(maxv, T(1)) == maxv); | ||
assert(cuda::ceil_div(maxv, maxv) == T(1)); | ||
} | ||
|
||
__host__ __device__ TEST_CONSTEXPR_CXX14 bool test() | ||
{ | ||
// Builtin integer types: | ||
test<char>(); | ||
test<signed char>(); | ||
test<unsigned char>(); | ||
|
||
test<short>(); | ||
test<unsigned short>(); | ||
|
||
test<int>(); | ||
test<unsigned int>(); | ||
|
||
test<long>(); | ||
test<unsigned long>(); | ||
|
||
test<long long>(); | ||
test<unsigned long long>(); | ||
|
||
#if !defined(TEST_COMPILER_NVRTC) | ||
// cstdint types: | ||
test<std::size_t>(); | ||
test<std::ptrdiff_t>(); | ||
test<std::intptr_t>(); | ||
test<std::uintptr_t>(); | ||
|
||
test<std::int8_t>(); | ||
test<std::int16_t>(); | ||
test<std::int32_t>(); | ||
test<std::int64_t>(); | ||
|
||
test<std::uint8_t>(); | ||
test<std::uint16_t>(); | ||
test<std::uint32_t>(); | ||
test<std::uint64_t>(); | ||
#endif // !TEST_COMPILER_NVRTC | ||
|
||
#if !defined(TEST_HAS_NO_INT128_T) | ||
test<__int128_t>(); | ||
test<__uint128_t>(); | ||
#endif // !TEST_HAS_NO_INT128_T | ||
|
||
return true; | ||
} | ||
|
||
int main(int arg, char** argv) | ||
{ | ||
test(); | ||
#if TEST_STD_VER >= 2014 | ||
static_assert(test(), ""); | ||
#endif // TEST_STD_VER >= 2014 | ||
return 0; | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Remark: unadressed TODO. I guess we want to fix this when
ceil_div
becomes available on godbolt with the next CCCL release? But we should maybe create an issue.