-
Notifications
You must be signed in to change notification settings - Fork 190
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add
cuda::device::barrier_expect_tx
(#498)
- Loading branch information
1 parent
b269438
commit a3faeb1
Showing
11 changed files
with
271 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
34 changes: 34 additions & 0 deletions
34
libcudacxx/.upstream-tests/test/cuda/barrier/expect_tx_cta.pass.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Part of libcu++, the C++ Standard Library for your entire system, | ||
// under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// UNSUPPORTED: libcpp-has-no-threads | ||
// UNSUPPORTED: pre-sm-90 | ||
|
||
// <cuda/barrier> | ||
|
||
#include "arrive_tx.h" | ||
|
||
int main(int, char**) | ||
{ | ||
NV_DISPATCH_TARGET( | ||
NV_IS_HOST, ( | ||
// Required by concurrent_agents_launch to know how many we're | ||
// launching. This can only be an int, because the nvrtc tests use grep | ||
// to figure out how many threads to launch. | ||
cuda_thread_count = 256; | ||
), | ||
NV_IS_DEVICE, ( | ||
constexpr bool split_arrive_and_expect = true; | ||
test<split_arrive_and_expect>(); | ||
) | ||
); | ||
|
||
return 0; | ||
} |
39 changes: 39 additions & 0 deletions
39
libcudacxx/.upstream-tests/test/cuda/barrier/expect_tx_device.runfail.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Part of libcu++, the C++ Standard Library for your entire system, | ||
// under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// UNSUPPORTED: libcpp-has-no-threads | ||
// UNSUPPORTED: pre-sm-90 | ||
|
||
// <cuda/barrier> | ||
|
||
#include <cuda/barrier> | ||
#include "test_macros.h" | ||
|
||
// Suppress warning about barrier in shared memory | ||
TEST_NV_DIAG_SUPPRESS(static_var_with_dynamic_init) | ||
|
||
__device__ uint64_t bar_storage; | ||
|
||
int main(int, char**){ | ||
NV_IF_TARGET( | ||
NV_IS_DEVICE, ( | ||
cuda::barrier<cuda::thread_scope_block> *bar_ptr; | ||
bar_ptr = reinterpret_cast<cuda::barrier<cuda::thread_scope_block> *>(bar_storage); | ||
|
||
if (threadIdx.x == 0) { | ||
init(bar_ptr, blockDim.x); | ||
} | ||
__syncthreads(); | ||
|
||
// Should fail because the barrier is in device memory. | ||
cuda::device::barrier_expect_tx(*bar_ptr, 1); | ||
)); | ||
return 0; | ||
} |
34 changes: 34 additions & 0 deletions
34
libcudacxx/.upstream-tests/test/cuda/barrier/expect_tx_thread.pass.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Part of libcu++, the C++ Standard Library for your entire system, | ||
// under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// UNSUPPORTED: libcpp-has-no-threads | ||
// UNSUPPORTED: pre-sm-90 | ||
|
||
// <cuda/barrier> | ||
|
||
#include "arrive_tx.h" | ||
|
||
int main(int, char**) | ||
{ | ||
NV_DISPATCH_TARGET( | ||
NV_IS_HOST, ( | ||
// Required by concurrent_agents_launch to know how many we're | ||
// launching. This can only be an int, because the nvrtc tests use grep | ||
// to figure out how many threads to launch. | ||
cuda_thread_count = 2; | ||
), | ||
NV_IS_DEVICE, ( | ||
constexpr bool split_arrive_and_expect = true; | ||
test<split_arrive_and_expect>(); | ||
) | ||
); | ||
|
||
return 0; | ||
} |
34 changes: 34 additions & 0 deletions
34
libcudacxx/.upstream-tests/test/cuda/barrier/expect_tx_warp.pass.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Part of libcu++, the C++ Standard Library for your entire system, | ||
// under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// UNSUPPORTED: libcpp-has-no-threads | ||
// UNSUPPORTED: pre-sm-90 | ||
|
||
// <cuda/barrier> | ||
|
||
#include "arrive_tx.h" | ||
|
||
int main(int, char**) | ||
{ | ||
NV_DISPATCH_TARGET( | ||
NV_IS_HOST, ( | ||
// Required by concurrent_agents_launch to know how many we're | ||
// launching. This can only be an int, because the nvrtc tests use grep | ||
// to figure out how many threads to launch. | ||
cuda_thread_count = 32; | ||
), | ||
NV_IS_DEVICE, ( | ||
constexpr bool split_arrive_and_expect = true; | ||
test<split_arrive_and_expect>(); | ||
) | ||
); | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
75 changes: 75 additions & 0 deletions
75
...dacxx/docs/extended_api/synchronization_primitives/barrier/barrier_expect_tx.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
--- | ||
grand_parent: Extended API | ||
parent: Barriers | ||
--- | ||
|
||
# `cuda::device::barrier_expect_tx` | ||
|
||
Defined in header `<cuda/barrier>`: | ||
|
||
```cuda | ||
__device__ | ||
void cuda::device::barrier_expect_tx( | ||
cuda::barrier<cuda::thread_scope_block>& bar, | ||
ptrdiff_t transaction_count_update); | ||
``` | ||
|
||
Updates the expected transaction count of a barrier in shared memory. | ||
|
||
## Preconditions | ||
|
||
* `__isShared(&bar) == true` | ||
* `0 <= transaction_count_update && transaction_count_update <= (1 << 20) - 1` | ||
|
||
## Effects | ||
|
||
* This function increments the expected transaction count by `transaction_count_update`. | ||
* This function executes atomically. | ||
|
||
## Notes | ||
|
||
This function can only be used under CUDA Compute Capability 9.0 (Hopper) or | ||
higher. | ||
|
||
## Example | ||
|
||
```cuda | ||
#include <cuda/barrier> | ||
#include <cuda/std/utility> // cuda::std::move | ||
#if defined(__CUDA_MINIMUM_ARCH__) && __CUDA_MINIMUM_ARCH__ < 900 | ||
static_assert(false, "Insufficient CUDA Compute Capability: cuda::device::memcpy_expect_tx is not available."); | ||
#endif // __CUDA_MINIMUM_ARCH__ | ||
__device__ alignas(16) int gmem_x[2048]; | ||
__global__ void example_kernel() { | ||
using barrier_t = cuda::barrier<cuda::thread_scope_block>; | ||
__shared__ alignas(16) int smem_x[1024]; | ||
__shared__ barrier_t bar; | ||
if (threadIdx.x == 0) { | ||
init(&bar, blockDim.x); | ||
} | ||
__syncthreads(); | ||
if (threadIdx.x == 0) { | ||
cuda::device::memcpy_async_tx(smem_x, gmem_x, cuda::aligned_size_t<16>(sizeof(smem_x)), bar); | ||
cuda::device::barrier_expect_tx(bar, sizeof(smem_x)); | ||
} | ||
auto token = bar.arrive(1); | ||
bar.wait(cuda::std::move(token)); | ||
// smem_x contains the contents of gmem_x[0], ..., gmem_x[1023] | ||
smem_x[threadIdx.x] += 1; | ||
} | ||
``` | ||
|
||
[See it on Godbolt](https://godbolt.org/z/9Yj89P76z){: .btn } | ||
|
||
|
||
[`cuda::thread_scope`]: ./memory_model.md | ||
[Tracking asynchronous operations by the mbarrier object]: https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#tracking-asynchronous-operations-by-the-mbarrier-object | ||
[thread.barrier.class paragraph 12]: https://eel.is/c++draft/thread.barrier.class#12 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters