Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions cpp/include/rmm/device_scalar.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION.
* SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

Expand Down Expand Up @@ -156,9 +156,6 @@ class device_scalar {
/**
* @brief Sets the value of the `device_scalar` to the value of `v`.
*
* This specialization for fundamental types is optimized to use `cudaMemsetAsync` when
* `v` is zero.
*
* @note If the stream specified to this function is different from the stream specified
* to the constructor, then appropriate dependencies must be inserted between the streams
* (e.g. using `cudaStreamWaitEvent()` or `cudaStreamSynchronize()`) before and after calling
Expand All @@ -168,8 +165,7 @@ class device_scalar {
* referenced by `v` should not be destroyed or modified until `stream` has been
* synchronized. Otherwise, behavior is undefined.
*
* @note This function incurs a host to device memcpy or device memset and should be used
* carefully.
* @note This function incurs a host to device memcpy and should be used carefully.
*
* Example:
* \code{cpp}
Expand Down
17 changes: 0 additions & 17 deletions cpp/include/rmm/device_uvector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,6 @@ class device_uvector {
/**
* @brief Performs an asynchronous copy of `v` to the specified element in device memory.
*
* This specialization for fundamental types is optimized to use `cudaMemsetAsync` when
* `host_value` is zero.
*
* This function does not synchronize stream `s` before returning. Therefore, the object
* referenced by `v` should not be destroyed or modified until `stream` has been synchronized.
* Otherwise, behavior is undefined.
Expand Down Expand Up @@ -212,20 +209,6 @@ class device_uvector {
{
RMM_EXPECTS(
element_index < size(), "Attempt to access out of bounds element.", rmm::out_of_range);

if constexpr (std::is_same_v<value_type, bool>) {
RMM_CUDA_TRY(
cudaMemsetAsync(element_ptr(element_index), value, sizeof(value), stream.value()));
return;
}

if constexpr (std::is_fundamental_v<value_type>) {
if (value == value_type{0}) {
set_element_to_zero_async(element_index, stream);
return;
}
}

RMM_CUDA_TRY(cudaMemcpyAsync(
element_ptr(element_index), &value, sizeof(value), cudaMemcpyDefault, stream.value()));
}
Expand Down
23 changes: 22 additions & 1 deletion cpp/tests/device_uvector_tests.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

/*
* SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION.
* SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

Expand All @@ -14,6 +14,7 @@
#include <gtest/gtest.h>
#include <gtest/internal/gtest-type-util.h>

#include <cmath>
#include <cstdint>
#include <iterator>
#include <utility>
Expand Down Expand Up @@ -238,6 +239,26 @@ TYPED_TEST(TypedUVectorTest, SetElementZeroAsync)
}
}

TEST(NegativeZeroTest, PreservesFloatNegativeZero)
{
rmm::device_uvector<float> vec(1, rmm::cuda_stream_view{});
float const neg_zero = -0.0f;
vec.set_element_async(0, neg_zero, rmm::cuda_stream_view{});
float const result = vec.element(0, rmm::cuda_stream_view{});
EXPECT_TRUE(std::signbit(result)) << "sign bit of -0.0f was lost";
EXPECT_EQ(result, 0.0f);
}

TEST(NegativeZeroTest, PreservesDoubleNegativeZero)
{
rmm::device_uvector<double> vec(1, rmm::cuda_stream_view{});
double const neg_zero = -0.0;
vec.set_element_async(0, neg_zero, rmm::cuda_stream_view{});
double const result = vec.element(0, rmm::cuda_stream_view{});
EXPECT_TRUE(std::signbit(result)) << "sign bit of -0.0 was lost";
EXPECT_EQ(result, 0.0);
}

TYPED_TEST(TypedUVectorTest, FrontBackElement)
{
auto const size{12345};
Expand Down
Loading