diff --git a/cpp/include/rmm/device_scalar.hpp b/cpp/include/rmm/device_scalar.hpp index b6a88a714..0e62505f0 100644 --- a/cpp/include/rmm/device_scalar.hpp +++ b/cpp/include/rmm/device_scalar.hpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2019-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2019-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -156,9 +156,6 @@ class device_scalar { /** * @brief Sets the value of the `device_scalar` to the value of `v`. * - * This specialization for fundamental types is optimized to use `cudaMemsetAsync` when - * `v` is zero. - * * @note If the stream specified to this function is different from the stream specified * to the constructor, then appropriate dependencies must be inserted between the streams * (e.g. using `cudaStreamWaitEvent()` or `cudaStreamSynchronize()`) before and after calling @@ -168,8 +165,7 @@ class device_scalar { * referenced by `v` should not be destroyed or modified until `stream` has been * synchronized. Otherwise, behavior is undefined. * - * @note This function incurs a host to device memcpy or device memset and should be used - * carefully. + * @note This function incurs a host to device memcpy and should be used carefully. * * Example: * \code{cpp} diff --git a/cpp/include/rmm/device_uvector.hpp b/cpp/include/rmm/device_uvector.hpp index b798cb04b..f2d1bfd93 100644 --- a/cpp/include/rmm/device_uvector.hpp +++ b/cpp/include/rmm/device_uvector.hpp @@ -175,9 +175,6 @@ class device_uvector { /** * @brief Performs an asynchronous copy of `v` to the specified element in device memory. * - * This specialization for fundamental types is optimized to use `cudaMemsetAsync` when - * `host_value` is zero. - * * This function does not synchronize stream `s` before returning. Therefore, the object * referenced by `v` should not be destroyed or modified until `stream` has been synchronized. * Otherwise, behavior is undefined. @@ -212,20 +209,6 @@ class device_uvector { { RMM_EXPECTS( element_index < size(), "Attempt to access out of bounds element.", rmm::out_of_range); - - if constexpr (std::is_same_v) { - RMM_CUDA_TRY( - cudaMemsetAsync(element_ptr(element_index), value, sizeof(value), stream.value())); - return; - } - - if constexpr (std::is_fundamental_v) { - if (value == value_type{0}) { - set_element_to_zero_async(element_index, stream); - return; - } - } - RMM_CUDA_TRY(cudaMemcpyAsync( element_ptr(element_index), &value, sizeof(value), cudaMemcpyDefault, stream.value())); } diff --git a/cpp/tests/device_uvector_tests.cpp b/cpp/tests/device_uvector_tests.cpp index 3b032ed13..552dc7b7c 100644 --- a/cpp/tests/device_uvector_tests.cpp +++ b/cpp/tests/device_uvector_tests.cpp @@ -1,6 +1,6 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2020-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2020-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -238,6 +239,26 @@ TYPED_TEST(TypedUVectorTest, SetElementZeroAsync) } } +TEST(NegativeZeroTest, PreservesFloatNegativeZero) +{ + rmm::device_uvector vec(1, rmm::cuda_stream_view{}); + float const neg_zero = -0.0f; + vec.set_element_async(0, neg_zero, rmm::cuda_stream_view{}); + float const result = vec.element(0, rmm::cuda_stream_view{}); + EXPECT_TRUE(std::signbit(result)) << "sign bit of -0.0f was lost"; + EXPECT_EQ(result, 0.0f); +} + +TEST(NegativeZeroTest, PreservesDoubleNegativeZero) +{ + rmm::device_uvector vec(1, rmm::cuda_stream_view{}); + double const neg_zero = -0.0; + vec.set_element_async(0, neg_zero, rmm::cuda_stream_view{}); + double const result = vec.element(0, rmm::cuda_stream_view{}); + EXPECT_TRUE(std::signbit(result)) << "sign bit of -0.0 was lost"; + EXPECT_EQ(result, 0.0); +} + TYPED_TEST(TypedUVectorTest, FrontBackElement) { auto const size{12345};