diff --git a/projects/rocrand/.jenkins/common.groovy b/projects/rocrand/.jenkins/common.groovy deleted file mode 100644 index d4cad14e141..00000000000 --- a/projects/rocrand/.jenkins/common.groovy +++ /dev/null @@ -1,124 +0,0 @@ -// This file is for internal AMD use. -// If you are interested in running your own Jenkins, please raise a github issue for assistance. - -def runCompileCommand(platform, project, jobName, boolean debug=false, boolean staticLibrary=false, boolean codeCoverage=false) -{ - project.paths.construct_build_prefix() - - project.paths.build_command = './install -c' - String buildTypeArg = debug ? '-DCMAKE_BUILD_TYPE=Debug' : '-DCMAKE_BUILD_TYPE=Release' - String buildTypeDir = debug ? 'debug' : 'release' - String buildStatic = staticLibrary ? '-DBUILD_SHARED_LIBS=OFF' : '-DBUILD_SHARED_LIBS=ON' - String codeCovFlag = codeCoverage ? '-DCODE_COVERAGE=ON' : '' - String cmake = platform.jenkinsLabel.contains('centos') ? 'cmake3' : 'cmake' - String cmakePrefixPath = '-DCMAKE_PREFIX_PATH="/opt/rocm;/opt/rocm/llvm;/opt/rocm/bin"' - //Set CI node's gfx arch as target if PR, otherwise use default targets of the library - String amdgpuTargets = env.BRANCH_NAME.startsWith('PR-') ? '-DAMDGPU_TARGETS=\$gfx_arch' : '' - - def command = """#!/usr/bin/env bash - set -x - cd ${project.paths.project_build_prefix} - mkdir -p build/${buildTypeDir} && cd build/${buildTypeDir} - # gfxTargetParser reads gfxarch and adds target features such as xnack - ${auxiliary.gfxTargetParser()} - ${cmake} --toolchain=toolchain-linux.cmake ${cmakePrefixPath} ${buildTypeArg} ${buildStatic} ${amdgpuTargets} ${codeCovFlag} -DBUILD_TEST=ON -DBUILD_BENCHMARK=ON ../.. - make -j\$(nproc) - """ - - platform.runCommand(this, command) -} - -def runTestCommand (platform, project, boolean rocmExamples=false) -{ - String sudo = auxiliary.sudo(platform.jenkinsLabel) - // String centos = platform.jenkinsLabel.contains('centos') ? '3' : '' - // Disable xorwow test for now as it is a known failure with gfx90a. - // def testCommand = "ctest${centos} --output-on-failure" - def testCommand = "ctest --output-on-failure" - - def command = """#!/usr/bin/env bash - set -x - cd ${project.paths.project_build_prefix}/build/release - make -j4 - ${sudo} LD_LIBRARY_PATH=/opt/rocm/lib/ ${testCommand} - """ - - platform.runCommand(this, command) - //ROCM Examples - if (rocmExamples){ - String buildString = "" - if (platform.os.contains("ubuntu")){ - buildString += "sudo dpkg -i *.deb" - } - else { - buildString += "sudo rpm -i *.rpm" - } - testCommand = """#!/usr/bin/env bash - set -ex - cd ${project.paths.project_build_prefix}/build/release/package - ${buildString} - cd ../../.. - testDirs=("Libraries/rocRAND") - git clone https://github.com/ROCm/rocm-examples.git - rocm_examples_dir=\$(readlink -f rocm-examples) - for testDir in \${testDirs[@]}; do - cd \${rocm_examples_dir}/\${testDir} - cmake -S . -B build - cmake --build build - cd ./build - ctest --output-on-failure - done - """ - platform.runCommand(this, testCommand, "ROCM Examples") - - } -} - -def runPackageCommand(platform, project) -{ - def packageHelper = platform.makePackage(platform.jenkinsLabel,"${project.paths.project_build_prefix}/build/release") - - platform.runCommand(this, packageHelper[0]) - platform.archiveArtifacts(this, packageHelper[1]) -} - -def runCodeCovTestCommand(platform, project, jobName) -{ - withCredentials([string(credentialsId: 'mathlibs-codecov-token-rocrand', variable: 'CODECOV_TOKEN')]) - { - String prflag = env.CHANGE_ID ? "--pr \"${env.CHANGE_ID}\"" : '' - - String objectFlags = "-object ./library/librocrand.so" - - String profdataFile = "./rocRand.profdata" - String reportFile = "./code_cov_rocRand.report" - String coverageFile = "./code_cov_rocRand.txt" - String coverageFilter = "(.*googletest-src.*)|(.*/yaml-cpp-src/.*)|(.*hip/include.*)|(.*/include/llvm/.*)|(.*test/unit.*)|(.*/spdlog/.*)|(.*/msgpack-src/.*)" - - def command = """#!/usr/bin/env bash - set -ex - cd ${project.paths.project_build_prefix}/build/release - #Remove any preexisting prof files. - rm -rf ./test/*.profraw - - #The `%m` creates a different prof file for each object file. - LLVM_PROFILE_FILE=./rocRand_%m.profraw ctest --output-on-failure - - #this combines them back together. - /opt/rocm/llvm/bin/llvm-profdata merge -sparse ./test/*.profraw -o ${profdataFile} - - #For some reason, with the -object flag, we can't just specify the source directory, so we have to filter out the files we don't want. - /opt/rocm/llvm/bin/llvm-cov report ${objectFlags} -instr-profile=${profdataFile} -ignore-filename-regex="${coverageFilter}" > ${reportFile} - cat ${reportFile} - /opt/rocm/llvm/bin/llvm-cov show -Xdemangler=/opt/rocm/llvm/bin/llvm-cxxfilt ${objectFlags} -instr-profile=${profdataFile} -ignore-filename-regex="${coverageFilter}" > ${coverageFile} - - #Upload report to codecov - curl -Os https://uploader.codecov.io/latest/linux/codecov - chmod +x codecov - ./codecov -t ${CODECOV_TOKEN} ${prflag} --flags "${platform.gpu}" --sha \$(git rev-parse HEAD) --name "CI: ${jobName}" --file ${coverageFile} -v - """ - platform.runCommand(this, command) - } -} - -return this diff --git a/projects/rocrand/.jenkins/precheckin.groovy b/projects/rocrand/.jenkins/precheckin.groovy deleted file mode 100644 index 02a84826007..00000000000 --- a/projects/rocrand/.jenkins/precheckin.groovy +++ /dev/null @@ -1,78 +0,0 @@ -#!/usr/bin/env groovy -@Library('rocJenkins@pong') _ -import com.amd.project.* -import com.amd.docker.* -import java.nio.file.Path; - -def runCI = -{ - nodeDetails, jobName-> - - def prj = new rocProject('rocRAND', 'PreCheckin') - - def nodes = new dockerNodes(nodeDetails, jobName, prj) - - def commonGroovy - - boolean formatCheck = false - - def compileCommand = - { - platform, project-> - - commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" - commonGroovy.runCompileCommand(platform, project, jobName) - } - - - def testCommand = - { - platform, project-> - - commonGroovy.runTestCommand(platform, project, true) - } - - def packageCommand = - { - platform, project-> - - commonGroovy.runPackageCommand(platform, project) - } - - buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand) -} - -ci: { - String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) - - def propertyList = ["compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])]] - propertyList = auxiliary.appendPropertyList(propertyList) - - def jobNameList = ["compute-rocm-dkms-no-npi-hipclang":([ubuntu18:['gfx900'],centos7:['gfx906'],centos8:['gfx906'],sles15sp1:['gfx908']])] - jobNameList = auxiliary.appendJobNameList(jobNameList) - - propertyList.each - { - jobName, property-> - if (urlJobName == jobName) - properties(auxiliary.addCommonProperties(property)) - } - - jobNameList.each - { - jobName, nodeDetails-> - if (urlJobName == jobName) - stage(jobName) { - runCI(nodeDetails, jobName) - } - } - - // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 - if(!jobNameList.keySet().contains(urlJobName)) - { - properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) - stage(urlJobName) { - runCI([ubuntu16:['gfx906']], urlJobName) - } - } -} \ No newline at end of file diff --git a/projects/rocrand/.jenkins/static.groovy b/projects/rocrand/.jenkins/static.groovy deleted file mode 100644 index cee8d3def84..00000000000 --- a/projects/rocrand/.jenkins/static.groovy +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env groovy -@Library('rocJenkins@pong') _ -import com.amd.project.* -import com.amd.docker.* -import java.nio.file.Path; - -def runCI = -{ - nodeDetails, jobName-> - - def prj = new rocProject('rocRAND', 'static') - - def nodes = new dockerNodes(nodeDetails, jobName, prj) - - def commonGroovy - - boolean formatCheck = false - - def compileCommand = - { - platform, project-> - - commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" - commonGroovy.runCompileCommand(platform, project, jobName, debug=false, staticLibrary=true) - } - - def testCommand = - { - platform, project-> - - commonGroovy.runTestCommand(platform, project) - } - - def packageCommand = - { - platform, project-> - - commonGroovy.runPackageCommand(platform, project) - } - - buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand) -} - -ci: { - String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) - - def propertyList = ["compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])]] - propertyList = auxiliary.appendPropertyList(propertyList) - - def jobNameList = ["compute-rocm-dkms-no-npi-hipclang":([ubuntu18:['gfx900'],centos7:['gfx906'],centos8:['gfx906'],sles15sp1:['gfx908']])] - jobNameList = auxiliary.appendJobNameList(jobNameList) - - propertyList.each - { - jobName, property-> - if (urlJobName == jobName) - properties(auxiliary.addCommonProperties(property)) - } - - jobNameList.each - { - jobName, nodeDetails-> - if (urlJobName == jobName) - stage(jobName) { - runCI(nodeDetails, jobName) - } - } - - // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 - if(!jobNameList.keySet().contains(urlJobName)) - { - properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) - stage(urlJobName) { - runCI([ubuntu16:['gfx906']], urlJobName) - } - } -} diff --git a/projects/rocrand/.jenkins/staticanalysis.groovy b/projects/rocrand/.jenkins/staticanalysis.groovy deleted file mode 100644 index c41e309f893..00000000000 --- a/projects/rocrand/.jenkins/staticanalysis.groovy +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env groovy -// This shared library is available at https://github.com/ROCmSoftwarePlatform/rocJENKINS/ -@Library('rocJenkins@pong') _ - -// This is file for internal AMD use. -// If you are interested in running your own Jenkins, please raise a github issue for assistance. - -import com.amd.project.* -import com.amd.docker.* -import java.nio.file.Path - -def runCompileCommand(platform, project, jobName, boolean debug=false) -{ - project.paths.construct_build_prefix() -} - -def runCI = -{ - nodeDetails, jobName-> - - def prj = new rocProject('rocRAND', 'StaticAnalysis') - - // Define test architectures, optional rocm version argument is available - def nodes = new dockerNodes(nodeDetails, jobName, prj) - - boolean formatCheck = false - boolean staticAnalysis = true - - def compileCommand = - { - platform, project-> - - runCompileCommand(platform, project, jobName, false) - } - - buildProject(prj , formatCheck, nodes.dockerArray, compileCommand, null, null, staticAnalysis) -} - -ci: { - String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) - - properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * 6')])])) - - def jobNameList = ["main":([ubuntu22:['any']])] - jobNameList = auxiliary.appendJobNameList(jobNameList, 'rocBLAS') - - jobNameList.each - { - jobName, nodeDetails-> - if (urlJobName == jobName) - stage(jobName) { - runCI(nodeDetails, jobName) - } - } -} diff --git a/projects/rocrand/test/internal/test_normal_distribution.cpp b/projects/rocrand/test/internal/test_normal_distribution.cpp index b1226fd6460..ff7404cf2fa 100644 --- a/projects/rocrand/test/internal/test_normal_distribution.cpp +++ b/projects/rocrand/test/internal/test_normal_distribution.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -24,7 +24,6 @@ #include #include - using namespace rocrand_impl::host; TEST(normal_distribution_tests, float_test) diff --git a/projects/rocrand/test/internal/test_rocrand_config_dispatch.cpp b/projects/rocrand/test/internal/test_rocrand_config_dispatch.cpp index 06bef1c4dd9..1d1b45cdada 100644 --- a/projects/rocrand/test/internal/test_rocrand_config_dispatch.cpp +++ b/projects/rocrand/test/internal/test_rocrand_config_dispatch.cpp @@ -23,7 +23,8 @@ #include "test_common.hpp" #include -__global__ void write_target_arch(rocrand_impl::host::target_arch* dest_arch) +__global__ +void write_target_arch(rocrand_impl::host::target_arch* dest_arch) { constexpr auto arch = rocrand_impl::host::get_device_arch(); *dest_arch = arch; @@ -61,8 +62,8 @@ template::template device_config(true) .threads> -__global__ __launch_bounds__(BlockSize) void write_config(unsigned int* block_size, - unsigned int* grid_size) +__global__ __launch_bounds__(BlockSize) +void write_config(unsigned int* block_size, unsigned int* grid_size) { if(blockIdx.x == 0 && threadIdx.x == 0 && BlockSize == blockDim.x) { @@ -174,8 +175,8 @@ TEST(rocrand_config_dispatch_tests, device_id_from_stream) } template -__global__ void least_common_grid_size_kernel(unsigned int* least_common_grid_size, - rocrand_ordering order) +__global__ +void least_common_grid_size_kernel(unsigned int* least_common_grid_size, rocrand_ordering order) { *least_common_grid_size = rocrand_impl::host::get_least_common_grid_size( rocrand_impl::host::is_ordering_dynamic(order)); @@ -226,7 +227,8 @@ TEST(rocrand_config_dispatch_tests, default_config_provider) } template -__global__ void config_selector_kernel(unsigned int* output) +__global__ +void config_selector_kernel(unsigned int* output) { if(threadIdx.x == 0 && blockIdx.x == 0) { @@ -241,14 +243,16 @@ namespace rocrand_impl::host template<> struct generator_config_selector { - __host__ __device__ static constexpr unsigned int get_threads(const target_arch arch) + __host__ __device__ + static constexpr unsigned int get_threads(const target_arch arch) { if(arch == target_arch::gfx906) return 64; return generator_config_defaults::threads; } - __host__ __device__ static constexpr unsigned int get_blocks(const target_arch /*arch*/) + __host__ __device__ + static constexpr unsigned int get_blocks(const target_arch /*arch*/) { return generator_config_defaults::blocks; } @@ -263,8 +267,8 @@ TEST(rocrand_config_dispatch_tests, config_selection) HIP_CHECK(hipMallocHelper(&d_output, size * sizeof(*d_output))); using config_provider_t = rocrand_impl::host::default_config_provider; - config_provider_t config_provider{}; - rocrand_impl::host::generator_config config{}; + config_provider_t config_provider{}; + rocrand_impl::host::generator_config config{}; static constexpr hipStream_t default_stream = 0; static constexpr rocrand_ordering ordering = ROCRAND_ORDERING_PSEUDO_DYNAMIC; diff --git a/projects/rocrand/test/internal/test_rocrand_discrete.cpp b/projects/rocrand/test/internal/test_rocrand_discrete.cpp new file mode 100644 index 00000000000..835e2cbfd45 --- /dev/null +++ b/projects/rocrand/test/internal/test_rocrand_discrete.cpp @@ -0,0 +1,944 @@ +// Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include +#include + +#include +#include + +#include +#include +#include + +#define HIP_CHECK(cmd) \ + do \ + { \ + auto error = (cmd); \ + if(error != hipSuccess) \ + { \ + std::cerr << "Encountered HIP error (" << hipGetErrorString(error) << ") at line " \ + << __LINE__ << " in file " << __FILE__ << "\n"; \ + exit(-1); \ + } \ + } \ + while(0) + +#define ROCRAND_CHECK(cmd) \ + do \ + { \ + auto status = cmd; \ + if(status != 0) \ + { \ + std::cerr << "Encountered ROCRAND error: " << status << "at line" << __LINE__ \ + << " in file " << __FILE__ << "\n"; \ + exit(-1); \ + } \ + } \ + while(0) + +struct GlobalSizes +{ + static constexpr size_t items_per_thread = 256; + static constexpr size_t block_size = 32; + static constexpr size_t items_per_block = items_per_thread * block_size; + static constexpr size_t grid_size = 1234; + static constexpr size_t size = grid_size * items_per_block; +}; + +using DiscreteDataType = ::testing::Types; + +template +class InternalDiscreteDistributionTests : public ::testing::Test{ + public: + using T = DT; +}; + +TYPED_TEST_SUITE(InternalDiscreteDistributionTests, DiscreteDataType); + +template +__global__ void internal_discrete_kernel(T * device_input, unsigned int * device_output, rocrand_discrete_distribution_st &dis, const DiscreteFunc & f){ + const size_t items_per_block = GlobalSizes::items_per_thread * GlobalSizes::block_size; + const size_t offset = (items_per_block * blockIdx.x) + (GlobalSizes::items_per_thread * threadIdx.x); + + for(size_t i = 0; i < GlobalSizes::items_per_thread; i++){ + device_output[offset + i] = f(device_input[offset + i], dis); + } +} + +template +void run_internal_discrete_tests(const DiscreteFunc & f){ + std::vector> all_distributions = { + {10, 10, 10, 10}, + {1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 1}, + {1234, 1677, 1519, 1032, 561, 254, 98, 33, 10, 2}, + {1, 2, 8, 4, 3, 2, 1} + }; + + std::random_device rd; + std::mt19937 gen(rd()); + + T * host_input = new T[GlobalSizes::size]; + unsigned int * host_output = new unsigned int[GlobalSizes::size]; + + // Check for different types of data input and generate the input data + if constexpr (std::is_same_v){ + std::uniform_real_distribution dis(0, 1); + for(size_t i = 0; i < GlobalSizes::size; i++) host_input[i] = dis(gen); + } + else if constexpr(std::is_same_v || std::is_same_v){ + std::uniform_int_distribution dis(0, std::numeric_limits::max()); + for(size_t i = 0; i < GlobalSizes::size; i++) host_input[i] = dis(gen); + } + else{ + std::uniform_int_distribution dis(0, std::numeric_limits::max()); + for(size_t i = 0; i < GlobalSizes::size; i++) host_input[i] = dis(gen); + } + + T * device_input; + unsigned int * device_output; + + HIP_CHECK(hipMalloc(&device_input, sizeof(T) * GlobalSizes::size)); + HIP_CHECK(hipMalloc(&device_output, sizeof(unsigned int) * GlobalSizes::size)); + + HIP_CHECK(hipMemcpy(device_input, host_input, sizeof(T) * GlobalSizes::size, hipMemcpyHostToDevice)); + + // Generate different discrete distributions and check them against expected + for(std::vector distribution : all_distributions){ + // Getting expected Results + double sum = std::accumulate(distribution.begin(), distribution.end(), 0); + std::vector expected_prob(distribution.size()); + for(size_t i = 0; i < distribution.size(); i++) + expected_prob[i] = distribution[i] / sum; + + // Creating the discrete distribution + rocrand_discrete_distribution discrete_distribution; + ROCRAND_CHECK(rocrand_create_discrete_distribution(expected_prob.data(), expected_prob.size(), 0, &discrete_distribution)); + + hipLaunchKernelGGL( + HIP_KERNEL_NAME(internal_discrete_kernel), + dim3(GlobalSizes::grid_size), dim3(GlobalSizes::block_size), 0, 0, + device_input, device_output, *discrete_distribution, f + ); + + HIP_CHECK(hipMemcpy(host_output, device_output, sizeof(unsigned int) * GlobalSizes::size, hipMemcpyDeviceToHost)); + + std::vector histogram(distribution.size()); + + // Calculating the actual results + for(size_t i = 0; i < GlobalSizes::size; i++) + histogram[host_output[i]]++; + + std::vector actual_prob(distribution.size()); + for(size_t i = 0; i < actual_prob.size(); i++) + actual_prob[i] = histogram[i] / static_cast(GlobalSizes::size); + + // If the original probability is bigger than 5% then expected should be within 1% difference. + // Otherwise it should be within 0.01 + for(size_t i = 0; i < expected_prob.size(); i++){ + double eps = expected_prob[i] > 0.05 ? expected_prob[i] * 0.01 : 0.01; + ASSERT_NEAR(expected_prob[i], actual_prob[i], eps); + } + + ROCRAND_CHECK(rocrand_destroy_discrete_distribution(discrete_distribution)); + } + + delete [] host_input; + delete [] host_output; + + HIP_CHECK(hipFree(device_input)); + HIP_CHECK(hipFree(device_output)); +} + +TYPED_TEST(InternalDiscreteDistributionTests, InternalDiscreteAliasTest){ + using T = typename TestFixture::T; + run_internal_discrete_tests( + [=] __device__(T val, rocrand_discrete_distribution_st & dis) + {return rocrand_device::detail::discrete_alias(val, dis);} + ); +} + +TYPED_TEST(InternalDiscreteDistributionTests, InternalDiscreteCDFTest){ + using T = typename TestFixture::T; + run_internal_discrete_tests( + [=] __device__(T val, rocrand_discrete_distribution_st & dis) + {return rocrand_device::detail::discrete_cdf(val, dis);} + ); +} + +template +__global__ void block_wide_external_discrete_kernel( + RocRandPrngType * states, + unsigned int * device_output, + rocrand_discrete_distribution_st & dis, + size_t items_per_thread, + size_t block_size +){ + const size_t items_per_block = items_per_thread * block_size; + const size_t offset = (items_per_block * blockIdx.x) + (items_per_thread * threadIdx.x); + + for(size_t i = 0; i < items_per_thread; i++){ + __shared__ RocRandPrngType state; + if(threadIdx.x == 0) + state = states[blockIdx.x]; + __syncthreads(); + + device_output[offset + i] = rocrand_discrete(&state, &dis); + + if(threadIdx.x == 0) + states[blockIdx.x] = state; + __syncthreads(); + } +} + +template +__global__ void external_discrete_kernel( + RocRandPrngType * states, + unsigned int * device_output, + rocrand_discrete_distribution_st & dis, + size_t items_per_thread, + size_t block_size +){ + const size_t items_per_block = items_per_thread * block_size; + const size_t offset = (items_per_block * blockIdx.x) + (items_per_thread * threadIdx.x); + + for(size_t i = 0; i < items_per_thread; i++){ + auto local_state = states[offset + i]; + device_output[offset + i] = rocrand_discrete(&local_state, &dis); + states[offset + i] = local_state; + } +} + +template +void run_external_discrete_tests( + PrngState & device_states, + size_t items_per_thread = GlobalSizes::items_per_thread, + size_t block_size = GlobalSizes::block_size, + size_t grid_size = GlobalSizes::grid_size, + size_t size = GlobalSizes::size +){ + + std::vector> all_distributions = { + {10, 10, 10, 10}, + {1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 1}, + {1234, 1677, 1519, 1032, 561, 254, 98, 33, 10, 2}, + {1, 2, 8, 4, 3, 2, 1} + }; + + unsigned int * host_output = new unsigned int[size]; + unsigned int * device_output; + HIP_CHECK(hipMalloc(&device_output, sizeof(unsigned int) * size)); + + for(std::vector distribution : all_distributions){ + + // Getting expected Results + double sum = std::accumulate(distribution.begin(), distribution.end(), 0); + std::vector expected_prob(distribution.size()); + for(size_t i = 0; i < distribution.size(); i++) + expected_prob[i] = distribution[i] / sum; + + // Creating the discrete distribution + rocrand_discrete_distribution discrete_distribution; + ROCRAND_CHECK(rocrand_create_discrete_distribution(expected_prob.data(), expected_prob.size(), 0, &discrete_distribution)); + + if constexpr(block_wide){ + hipLaunchKernelGGL( + HIP_KERNEL_NAME(block_wide_external_discrete_kernel), + dim3(grid_size), dim3(block_size), 0, 0, + device_states, device_output, *discrete_distribution, items_per_thread, block_size + ); + } + else{ + hipLaunchKernelGGL( + HIP_KERNEL_NAME(external_discrete_kernel), + dim3(grid_size), dim3(block_size), 0, 0, + device_states, device_output, *discrete_distribution, items_per_thread, block_size + ); + } + + HIP_CHECK(hipMemcpy(host_output, device_output, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); + std::vector histogram(distribution.size()); + + // Calculating the actual results + for(size_t i = 0; i < size; i++) + histogram[host_output[i]]++; + + std::vector actual_prob(distribution.size()); + for(size_t i = 0; i < actual_prob.size(); i++) + actual_prob[i] = histogram[i] / static_cast(size); + + // If the original probability is bigger than 5% then expected should be within 1% difference. + // Otherwise it should be within 0.01 + for(size_t i = 0; i < expected_prob.size(); i++){ + double eps = expected_prob[i] > 0.05 ? expected_prob[i] * 0.01 : 0.01; + ASSERT_NEAR(expected_prob[i], actual_prob[i], eps); + } + + ROCRAND_CHECK(rocrand_destroy_discrete_distribution(discrete_distribution)); + } + + delete [] host_output; + HIP_CHECK(hipFree(device_output)); +} + +template +__global__ void init_rocrand_states_kernel(RocRandPrngType * states, const InitFunc & f){ + constexpr size_t items_per_block = GlobalSizes::items_per_thread * GlobalSizes::block_size; + const size_t offset = (items_per_block * blockIdx.x) + (GlobalSizes::items_per_thread * threadIdx.x); + + for(size_t i = 0; i < GlobalSizes::items_per_thread; i++) + f(i, offset, &states[offset + i]); +} + +TEST(ExternalDiscreteDistributionTests, Philox4x32_10Test){ + // Initialize the prng state + rocrand_state_philox4x32_10 * device_states; + HIP_CHECK(hipMalloc(&device_states, sizeof(rocrand_state_philox4x32_10) * GlobalSizes::size)); + + hipLaunchKernelGGL( + HIP_KERNEL_NAME(init_rocrand_states_kernel), + dim3(GlobalSizes::grid_size), dim3(GlobalSizes::block_size), 0, 0, + device_states, + [] __device__ (size_t index, size_t offset, rocrand_state_philox4x32_10 * state){ + rocrand_init((123456 ^ index), offset + index, 0, state); + } + ); + + run_external_discrete_tests(device_states); + + HIP_CHECK(hipFree(device_states)); +} + +TEST(ExternalDiscreteDistributionTests, Mrg31k3pTest){ + // Initialize the prng state + rocrand_state_mrg31k3p * device_states; + HIP_CHECK(hipMalloc(&device_states, sizeof(rocrand_state_mrg31k3p) * GlobalSizes::size)); + + hipLaunchKernelGGL( + HIP_KERNEL_NAME(init_rocrand_states_kernel), + dim3(GlobalSizes::grid_size), dim3(GlobalSizes::block_size), 0, 0, + device_states, + [] __device__ (size_t index, size_t offset, rocrand_state_mrg31k3p * state){ + rocrand_init((123456 ^ index), offset + index, 0, state); + } + ); + + run_external_discrete_tests(device_states); + + HIP_CHECK(hipFree(device_states)); +} + +TEST(ExternalDiscreteDistributionTests, Mrg32k3aTest){ + // Initialize the prng state + rocrand_state_mrg32k3a * device_states; + HIP_CHECK(hipMalloc(&device_states, sizeof(rocrand_state_mrg32k3a) * GlobalSizes::size)); + + hipLaunchKernelGGL( + HIP_KERNEL_NAME(init_rocrand_states_kernel), + dim3(GlobalSizes::grid_size), dim3(GlobalSizes::block_size), 0, 0, + device_states, + [] __device__ (size_t index, size_t offset, rocrand_state_mrg32k3a * state){ + rocrand_init((123456 ^ index), offset + index, 0, state); + } + ); + + run_external_discrete_tests(device_states); + + HIP_CHECK(hipFree(device_states)); +} + +TEST(ExternalDiscreteDistributionTests, XorwowTest){ + // Initialize the prng state + rocrand_state_xorwow * device_states; + HIP_CHECK(hipMalloc(&device_states, sizeof(rocrand_state_xorwow) * GlobalSizes::size)); + + hipLaunchKernelGGL( + HIP_KERNEL_NAME(init_rocrand_states_kernel), + dim3(GlobalSizes::grid_size), dim3(GlobalSizes::block_size), 0, 0, + device_states, + [] __device__ (size_t index, size_t offset, rocrand_state_xorwow * state){ + rocrand_init((123456 ^ index), offset + index, 0, state); + } + ); + + run_external_discrete_tests(device_states); + + HIP_CHECK(hipFree(device_states)); +} + +TEST(ExternalDiscreteDistributionTests, Mtgp32Test){ + constexpr size_t items_per_thread = 1024; + constexpr size_t block_size = 256; + constexpr size_t grid_size = 12; + constexpr size_t items_per_block = block_size * items_per_thread; + + constexpr size_t test_size = items_per_block * grid_size; + rocrand_state_mtgp32 * states; + + HIP_CHECK(hipMalloc(&states, sizeof(rocrand_state_mtgp32) * grid_size)); + rocrand_make_state_mtgp32(states, mtgp32dc_params_fast_11213, grid_size, 123456); + HIP_CHECK(hipDeviceSynchronize()); + + run_external_discrete_tests( + states, + items_per_thread, + block_size, + grid_size, + test_size + ); + HIP_CHECK(hipFree(states)); +} + +TEST(ExternalDiscreteDistributionTests, Lfsr113Test){ + // Initialize the prng state + rocrand_state_lfsr113 * device_states; + HIP_CHECK(hipMalloc(&device_states, sizeof(rocrand_state_lfsr113) * GlobalSizes::size)); + + hipLaunchKernelGGL( + HIP_KERNEL_NAME(init_rocrand_states_kernel), + dim3(GlobalSizes::grid_size), dim3(GlobalSizes::block_size), 0, 0, + device_states, + [] __device__ (size_t index, size_t offset, rocrand_state_lfsr113 * state){ + rocrand_init( + { + (123456 ^ index), + (123456 ^ index) << 1, + (123456 ^ index) << 2, + (123456 ^ index) << 3 + }, + offset + index, + 0, + state + ); + } + ); + + run_external_discrete_tests(device_states); + + HIP_CHECK(hipFree(device_states)); +} + +TEST(ExternalDiscreteDistributionTests, Sobol32Test){ + // Initialize the prng state + rocrand_state_sobol32 * host_states = new rocrand_state_sobol32[GlobalSizes::size]; + const unsigned int* directions; + ROCRAND_CHECK(rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + + // 640000 is the size of directions. This is to prevent overflow stuff + for(size_t i = 0; i < GlobalSizes::size; i++) + rocrand_init(directions, i % 640000, host_states + i); + + rocrand_state_sobol32 * device_states; + HIP_CHECK(hipMalloc(&device_states, sizeof(rocrand_state_sobol32) * GlobalSizes::size)); + HIP_CHECK(hipMemcpy(device_states, host_states, sizeof(rocrand_state_sobol32) * GlobalSizes::size, hipMemcpyHostToDevice)); + + run_external_discrete_tests(device_states); + + delete [] host_states; + HIP_CHECK(hipFree(device_states)); +} + +TEST(ExternalDiscreteDistributionTests, ScrambledSobol32Test){ + // Initialize the prng state + rocrand_state_scrambled_sobol32 * host_states = new rocrand_state_scrambled_sobol32[GlobalSizes::size]; + const unsigned int* directions; + ROCRAND_CHECK(rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + + // 640000 is the size of directions. This is to prevent overflow stuff + for(size_t i = 0; i < GlobalSizes::size; i++) + rocrand_init(directions, 123456 ^ i, i % 640000, host_states + i); + + rocrand_state_scrambled_sobol32 * device_states; + HIP_CHECK(hipMalloc(&device_states, sizeof(rocrand_state_scrambled_sobol32) * GlobalSizes::size)); + HIP_CHECK(hipMemcpy(device_states, host_states, sizeof(rocrand_state_scrambled_sobol32) * GlobalSizes::size, hipMemcpyHostToDevice)); + + run_external_discrete_tests(device_states); + + delete [] host_states; + HIP_CHECK(hipFree(device_states)); +} + +TEST(ExternalDiscreteDistributionTests, Sobol64Test){ + // Initialize the prng state + rocrand_state_sobol64 * host_states = new rocrand_state_sobol64[GlobalSizes::size]; + const unsigned long long* directions; + ROCRAND_CHECK(rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + + // 1280000 is the size of directions. This is to prevent overflow stuff + for(size_t i = 0; i < GlobalSizes::size; i++) + rocrand_init(directions, i % 1280000, host_states + i); + + rocrand_state_sobol64 * device_states; + HIP_CHECK(hipMalloc(&device_states, sizeof(rocrand_state_sobol64) * GlobalSizes::size)); + HIP_CHECK(hipMemcpy(device_states, host_states, sizeof(rocrand_state_sobol64) * GlobalSizes::size, hipMemcpyHostToDevice)); + + run_external_discrete_tests(device_states); + + delete [] host_states; + HIP_CHECK(hipFree(device_states)); +} + +TEST(ExternalDiscreteDistributionTests, ScrambledSobol64Test){ + // Initialize the prng state + rocrand_state_scrambled_sobol64 * host_states = new rocrand_state_scrambled_sobol64[GlobalSizes::size]; + const unsigned long long* directions; + ROCRAND_CHECK(rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + + // 1280000 is the size of directions. This is to prevent overflow stuff + for(size_t i = 0; i < GlobalSizes::size; i++) + rocrand_init(directions, 123456 ^ i, i % 1280000, host_states + i); + + rocrand_state_scrambled_sobol64 * device_states; + HIP_CHECK(hipMalloc(&device_states, sizeof(rocrand_state_scrambled_sobol64) * GlobalSizes::size)); + HIP_CHECK(hipMemcpy(device_states, host_states, sizeof(rocrand_state_scrambled_sobol64) * GlobalSizes::size, hipMemcpyHostToDevice)); + + run_external_discrete_tests(device_states); + + delete [] host_states; + HIP_CHECK(hipFree(device_states)); +} + +TEST(ExternalDiscreteDistributionTests, Threefry2x32_20Test){ + // Initialize the prng state + rocrand_state_threefry2x32_20 * device_states; + HIP_CHECK(hipMalloc(&device_states, sizeof(rocrand_state_threefry2x32_20) * GlobalSizes::size)); + + hipLaunchKernelGGL( + HIP_KERNEL_NAME(init_rocrand_states_kernel), + dim3(GlobalSizes::grid_size), dim3(GlobalSizes::block_size), 0, 0, + device_states, + [] __device__ (size_t index, size_t offset, rocrand_state_threefry2x32_20 * state){ + rocrand_init((123456 ^ index), offset + index, 0, state); + } + ); + + run_external_discrete_tests(device_states); + + HIP_CHECK(hipFree(device_states)); +} + +TEST(ExternalDiscreteDistributionTests, Threefry2x64_20Test){ + // Initialize the prng state + rocrand_state_threefry2x64_20 * device_states; + HIP_CHECK(hipMalloc(&device_states, sizeof(rocrand_state_threefry2x64_20) * GlobalSizes::size)); + + hipLaunchKernelGGL( + HIP_KERNEL_NAME(init_rocrand_states_kernel), + dim3(GlobalSizes::grid_size), dim3(GlobalSizes::block_size), 0, 0, + device_states, + [] __device__ (size_t index, size_t offset, rocrand_state_threefry2x64_20 * state){ + rocrand_init((123456 ^ index), offset + index, 0, state); + } + ); + + run_external_discrete_tests(device_states); + + HIP_CHECK(hipFree(device_states)); +} + +TEST(ExternalDiscreteDistributionTests, Threefry4x32_20Test){ + // Initialize the prng state + rocrand_state_threefry4x32_20 * device_states; + HIP_CHECK(hipMalloc(&device_states, sizeof(rocrand_state_threefry4x32_20) * GlobalSizes::size)); + + hipLaunchKernelGGL( + HIP_KERNEL_NAME(init_rocrand_states_kernel), + dim3(GlobalSizes::grid_size), dim3(GlobalSizes::block_size), 0, 0, + device_states, + [] __device__ (size_t index, size_t offset, rocrand_state_threefry4x32_20 * state){ + rocrand_init((123456 ^ index), offset + index, 0, state); + } + ); + + run_external_discrete_tests(device_states); + + HIP_CHECK(hipFree(device_states)); +} + +TEST(ExternalDiscreteDistributionTests, Threefry4x64_20Test){ + // Initialize the prng state + rocrand_state_threefry4x64_20 * device_states; + HIP_CHECK(hipMalloc(&device_states, sizeof(rocrand_state_threefry4x64_20) * GlobalSizes::size)); + + hipLaunchKernelGGL( + HIP_KERNEL_NAME(init_rocrand_states_kernel), + dim3(GlobalSizes::grid_size), dim3(GlobalSizes::block_size), 0, 0, + device_states, + [] __device__ (size_t index, size_t offset, rocrand_state_threefry4x64_20 * state){ + rocrand_init((123456 ^ index), offset + index, 0, state); + } + ); + + run_external_discrete_tests(device_states); + + HIP_CHECK(hipFree(device_states)); +} + +template +__global__ void uint4_kernel(rocrand_state_philox4x32_10 * states, uint4 * device_output, rocrand_discrete_distribution_st & dis){ + const size_t items_per_block = items_per_thread * block_size; + const size_t offset = (items_per_block * blockIdx.x) + (items_per_thread * threadIdx.x); + + for(size_t i = 0; i < items_per_thread; i++){ + auto local_state = states[offset + i]; + device_output[offset + i] = rocrand_discrete4(&local_state, &dis); + states[offset + i] = local_state; + } +} + +TEST(ExternalDiscreteDistributionTests, Philox4x32_10WithUIN4OutputTest) +{ + // Initialize the prng state + rocrand_state_philox4x32_10 * device_states; + HIP_CHECK(hipMalloc(&device_states, sizeof(rocrand_state_philox4x32_10) * GlobalSizes::size)); + + hipLaunchKernelGGL( + HIP_KERNEL_NAME(init_rocrand_states_kernel), + dim3(GlobalSizes::grid_size), dim3(GlobalSizes::block_size), 0, 0, + device_states, + [] __device__ (size_t index, size_t offset, rocrand_state_philox4x32_10 * state){ + rocrand_init((123456 ^ index), offset + index, 0, state); + } + ); + + std::vector> all_distributions = { + {10, 10, 10, 10}, + {1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 1}, + {1234, 1677, 1519, 1032, 561, 254, 98, 33, 10, 2}, + {1, 2, 8, 4, 3, 2, 1} + }; + + uint4 * host_output = new uint4[GlobalSizes::size]; + uint4 * device_output; + HIP_CHECK(hipMalloc(&device_output, sizeof(uint4) * GlobalSizes::size)); + + for(std::vector distribution : all_distributions){ + + // Getting expected Results + double sum = std::accumulate(distribution.begin(), distribution.end(), 0); + std::vector expected_prob(distribution.size()); + for(size_t i = 0; i < distribution.size(); i++) + expected_prob[i] = distribution[i] / sum; + + // Creating the discrete distribution + rocrand_discrete_distribution discrete_distribution; + ROCRAND_CHECK(rocrand_create_discrete_distribution(expected_prob.data(), expected_prob.size(), 0, &discrete_distribution)); + + hipLaunchKernelGGL( + HIP_KERNEL_NAME(uint4_kernel), + dim3(GlobalSizes::grid_size), dim3(GlobalSizes::block_size), 0, 0, + device_states, device_output, * discrete_distribution + ); + + HIP_CHECK(hipMemcpy(host_output, device_output, sizeof(uint4) * GlobalSizes::size, hipMemcpyDeviceToHost)); + std::vector histogram(distribution.size()); + + // Calculating the actual results + for(size_t i = 0; i < GlobalSizes::size; i++){ + histogram[host_output[i].w]++; + histogram[host_output[i].x]++; + histogram[host_output[i].y]++; + histogram[host_output[i].z]++; + } + + std::vector actual_prob(distribution.size()); + for(size_t i = 0; i < actual_prob.size(); i++) + actual_prob[i] = histogram[i] / static_cast(GlobalSizes::size * 4); + + // If the original probability is bigger than 5% then expected should be within 1% difference. + // Otherwise it should be within 0.01 + for(size_t i = 0; i < expected_prob.size(); i++){ + double eps = expected_prob[i] > 0.05 ? expected_prob[i] * 0.01 : 0.01; + ASSERT_NEAR(expected_prob[i], actual_prob[i], eps); + } + + ROCRAND_CHECK(rocrand_destroy_discrete_distribution(discrete_distribution)); + } + + delete [] host_output; + HIP_CHECK(hipFree(device_output)); +} + +/* ################################################# + + TEST HOST SIDE + + ###############################################*/ + +#include "rng/distribution/discrete.hpp" + +template +void run_internal_host_test(const DiscreteFunc& df) +{ + constexpr size_t test_size = 100000; + + std::vector> all_distributions = { + {10, 10, 10, 10}, + {1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 1}, + {1234, 1677, 1519, 1032, 561, 254, 98, 33, 10, 2}, + {1, 2, 8, 4, 3, 2, 1} + }; + + std::random_device rd; + std::mt19937 gen(rd()); + + std::vector input(test_size); + std::vector histogram(test_size, 0); + + // Check for different types of data input and generate the input data + if constexpr(std::is_same_v) + { + std::uniform_real_distribution dis(0, 1); + for(size_t i = 0; i < test_size; i++) + input[i] = dis(gen); + } + else if constexpr(std::is_same_v || std::is_same_v) + { + std::uniform_int_distribution dis(0, std::numeric_limits::max()); + for(size_t i = 0; i < test_size; i++) + input[i] = dis(gen); + } + else + { + std::uniform_int_distribution dis(0, std::numeric_limits::max()); + for(size_t i = 0; i < test_size; i++) + input[i] = dis(gen); + } + + for(std::vector distribution : all_distributions) + { + // Getting expected Results + double sum = std::accumulate(distribution.begin(), distribution.end(), 0); + std::vector expected_prob(distribution.size()); + for(size_t i = 0; i < distribution.size(); i++) + expected_prob[i] = distribution[i] / sum; + + // Creating the discrete distribution + rocrand_discrete_distribution_st discrete_dis; + + using namespace rocrand_impl::host; + + rocrand_status rocrand_err + = discrete_distribution_factory::create(distribution, + distribution.size(), + 0, + discrete_dis); + + ROCRAND_CHECK(rocrand_err); + std::vector histogram(distribution.size()); + for(size_t i = 0; i < test_size; i++) + { + histogram[df(input[i], discrete_dis)]++; + } + + std::vector actual_prob(distribution.size()); + for(size_t i = 0; i < actual_prob.size(); i++) + actual_prob[i] = histogram[i] / static_cast(test_size); + + // If the original probability is bigger than 5% then expected should be within 1% difference. + // Otherwise it should be within 0.01 + for(size_t i = 0; i < expected_prob.size(); i++) + { + double eps = expected_prob[i] > 0.05 ? expected_prob[i] * 0.05 : 0.01; + ASSERT_NEAR(expected_prob[i], actual_prob[i], eps); + } + } +} + +template +struct InternalHostParams +{ + using T = InType; + static constexpr bool uda = UseDiscreteAlias; +}; + +using InternalDiscreteHostParams + = ::testing::Types, + InternalHostParams, + InternalHostParams, + InternalHostParams, + InternalHostParams, + InternalHostParams, + InternalHostParams, + InternalHostParams>; + +template +class InternalDiscreteHostTest : public ::testing::Test +{ +public: + using input_type = typename InternalHostParams::T; + static constexpr bool use_discrete_alias = InternalHostParams::uda; +}; + +TYPED_TEST_SUITE(InternalDiscreteHostTest, InternalDiscreteHostParams); + +TYPED_TEST(InternalDiscreteHostTest, discrete_host_internal) +{ + using input_type = typename TestFixture::input_type; + static constexpr bool use_discrete_alias = TestFixture::use_discrete_alias; + + if constexpr(use_discrete_alias) + { + run_internal_host_test( + [=](const input_type& x, rocrand_discrete_distribution_st& dis) + { return rocrand_device::detail::discrete_alias(x, dis); }); + } + else + { + run_internal_host_test( + [=](const input_type& x, rocrand_discrete_distribution_st& dis) + { return rocrand_device::detail::discrete_cdf(x, dis); }); + } +} + +template +inline void GetRocrandState(RocrandPRNGType* host_state) +{ + + if constexpr(std::is_same_v) + { + const unsigned int* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + rocrand_init(directions, 123456, host_state); + } + // scrambled sobol32 case + else if constexpr(std::is_same_v) + { + const unsigned int* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + rocrand_init(directions, 123456, 654321, host_state); + } + // sobol64 case + else if constexpr(std::is_same_v) + { + const unsigned long long* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + rocrand_init(directions, 123456, host_state); + } + // scrambled sobol64 case + else if constexpr(std::is_same_v) + { + const unsigned long long* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + rocrand_init(directions, 123456, 654321, host_state); + } + // lfsr113 case + else if constexpr(std::is_same_v) + { + rocrand_init({0xabcd, 0xdabc, 0xcdab, 0xbcda}, 0, 0, host_state); + } + else + { + rocrand_init(123456, 654321, 0, host_state); + } +} + +template +void run_host_test(const DiscreteFunc& df) +{ + constexpr size_t test_size = 100000; + + std::vector> all_distributions = { + {10, 10, 10, 10}, + {1, 2, 3, 4, 5, 6, 5, 4, 3, 2, 1}, + {1234, 1677, 1519, 1032, 561, 254, 98, 33, 10, 2}, + {1, 2, 8, 4, 3, 2, 1} + }; + + RocrandPRNGType generator; + GetRocrandState(&generator); + + for(std::vector distribution : all_distributions) + { + // Getting expected Results + double sum = std::accumulate(distribution.begin(), distribution.end(), 0); + std::vector expected_prob(distribution.size()); + for(size_t i = 0; i < distribution.size(); i++) + expected_prob[i] = distribution[i] / sum; + + // Creating the discrete distribution + rocrand_discrete_distribution_st discrete_dis; + + using namespace rocrand_impl::host; + + rocrand_status rocrand_err + = discrete_distribution_factory::create(distribution, + distribution.size(), + 0, + discrete_dis); + + ROCRAND_CHECK(rocrand_err); + std::vector histogram(distribution.size()); + for(size_t i = 0; i < test_size; i++) + { + histogram[df(&generator, &discrete_dis)]++; + } + + std::vector actual_prob(distribution.size()); + for(size_t i = 0; i < actual_prob.size(); i++) + actual_prob[i] = histogram[i] / static_cast(test_size); + + // If the original probability is bigger than 5% then expected should be within 1% difference. + // Otherwise it should be within 0.01 + for(size_t i = 0; i < expected_prob.size(); i++) + { + double eps = expected_prob[i] > 0.05 ? expected_prob[i] * 0.05 : 0.01; + ASSERT_NEAR(expected_prob[i], actual_prob[i], eps); + } + } +} + +using DiscreteHostParams = ::testing::Types; + +template +class DiscreteHostTest : public ::testing::Test +{ +public: + using rocrand_prng_type = T; +}; + +TYPED_TEST_SUITE(DiscreteHostTest, DiscreteHostParams); + +TYPED_TEST(DiscreteHostTest, discrete_host) +{ + using rocrand_prng_type = typename TestFixture::rocrand_prng_type; + + run_host_test( + [=](rocrand_prng_type* x, rocrand_discrete_distribution_st* dis) + { return rocrand_discrete(x, dis); }); +} diff --git a/projects/rocrand/test/internal/test_rocrand_log_normal.cpp b/projects/rocrand/test/internal/test_rocrand_log_normal.cpp new file mode 100644 index 00000000000..94161cc9a02 --- /dev/null +++ b/projects/rocrand/test/internal/test_rocrand_log_normal.cpp @@ -0,0 +1,576 @@ +// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include +#include + +#include +#include + +#undef ROCRAND_DETAIL_BM_NOT_IN_STATE +#include + +#define HIP_CHECK(cmd) \ + do \ + { \ + auto error = (cmd); \ + if(error != hipSuccess) \ + { \ + std::cerr << "Encountered HIP error (" << hipGetErrorString(error) << ") at line " \ + << __LINE__ << " in file " << __FILE__ << "\n"; \ + exit(-1); \ + } \ + } \ + while(0) + +#define ROCRAND_CHECK(cmd) \ + do \ + { \ + auto status = cmd; \ + if(status != 0) \ + { \ + std::cerr << "Encountered ROCRAND error: " << status << "at line" << __LINE__ \ + << " in file " << __FILE__ << "\n"; \ + exit(status); \ + } \ + } \ + while(0) + +// If x is small then get withing 0.001 otherwise 5% +#define GET_EPS(x) x < 0.01 ? 0.001 : x * 0.05 + +struct GlobalSizes +{ + static constexpr size_t items_per_thread = 10000; + static constexpr size_t block_size = 8; + static constexpr size_t items_per_block = items_per_thread * block_size; + static constexpr size_t grid_size = 8; + static constexpr size_t size = grid_size * items_per_block; +}; + +template +inline void GetRocrandDeviceState(RocrandPRNGType* device_state) +{ + + RocrandPRNGType* host_state + = new RocrandPRNGType[GlobalSizes::block_size * GlobalSizes::grid_size]; + + for(size_t i = 0; i < GlobalSizes::block_size * GlobalSizes::grid_size; i++) + { + if constexpr(std::is_same_v) + { + const unsigned int* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + rocrand_init(directions, 123456 ^ i, host_state + i); + } + // scrambled sobol32 case + else if constexpr(std::is_same_v) + { + const unsigned int* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + rocrand_init(directions, 123456 ^ i, 654321 ^ i, host_state + i); + } + // sobol64 case + else if constexpr(std::is_same_v) + { + const unsigned long long* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + rocrand_init(directions, 123456 ^ i, host_state + i); + } + // scrambled sobol64 case + else if constexpr(std::is_same_v) + { + const unsigned long long* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + rocrand_init(directions, 123456 ^ i, 654321 ^ i, host_state + i); + } + // lfsr113 case + else if constexpr(std::is_same_v) + { + rocrand_init({0xabcd, 0xdabc, 0xcdab, 0xbcda}, 0, 0, host_state + i); + } + else + { + rocrand_init(123456 ^ i, 654321 ^ i, 0, host_state + i); + } + HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK(hipMemcpy(device_state + i, + host_state + i, + sizeof(RocrandPRNGType), + hipMemcpyHostToDevice)); + } + + delete[] host_state; +} + +template +ReturnType get_actual_mean(const size_t test_size, + const size_t out_size, + StartIt begin, + EndIt end, + const ReadMeanFunc& rmf) +{ + ReturnType actual_mean = std::accumulate(begin, + end, + (ReturnType)0, + [=](ReturnType acc, OutputType x) + { return acc + static_cast(rmf(x)); }) + / static_cast(test_size * out_size); + return actual_mean; +} + +template +ReturnType get_actual_std_dev(const size_t test_size, + const size_t out_size, + StartIt begin, + EndIt end, + ReturnType actual_mean, + const ReadStdFunc& rsf) +{ + ReturnType actual_std_dev + = std::accumulate(begin, + end, + (ReturnType)0, + [=](ReturnType acc, OutputType x) + { return acc + static_cast(rsf(x, actual_mean)); }); + actual_std_dev = std::sqrt(actual_std_dev / static_cast(test_size * out_size - 1)); + return actual_std_dev; +} + +template +struct ParamsHolder +{ + using out_type = OutputType; + using rng = RocrandPRNGType; + static constexpr size_t out_size = OutSize; +}; + +using logNormalParams = ::testing::Types, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder, + ParamsHolder>; + +template +class LogNormalTest : public ::testing::Test +{ +public: + using out_type = typename ParamsHolder::out_type; + using prng_type = typename ParamsHolder::rng; + static constexpr size_t out_size = ParamsHolder::out_size; +}; + +TYPED_TEST_SUITE(LogNormalTest, logNormalParams); + +template +void __global__ log_normal_kernel(OutputType* device_output, + const InputType mean, + const InputType std_dev, + RocrandPRNGType* states, + const LogNormalFunc& lnf) +{ + const size_t offset = (GlobalSizes::items_per_block * blockIdx.x) + + (GlobalSizes::items_per_thread * threadIdx.x); + const size_t state_offset = (GlobalSizes::block_size * blockIdx.x) + threadIdx.x; + + auto state = states + state_offset; + for(size_t i = 0; i < GlobalSizes::items_per_thread; i++) + device_output[offset + i] = lnf(state, mean, std_dev); + states[state_offset] = *state; +} + +template +void run_device_test(const LogNormalFunc& lnf, const ReadMeanFunc& rmf, ReadStdFunc& rsf) +{ + constexpr T input_mean = 0.5; + constexpr T input_std_dev = 1.0; + + const T dev2 = std::powf(input_std_dev, 2); + + const T expected_mean = std::exp(input_mean + dev2 / 2); + const T expected_std_dev = std::sqrt((std::exp(dev2) - 1) * std::exp(2 * input_mean + dev2)); + + RocrandPRNGType* generators; + HIP_CHECK( + hipMalloc(&generators, + sizeof(RocrandPRNGType) * GlobalSizes::block_size * GlobalSizes::grid_size)); + + GetRocrandDeviceState(generators); + + OutputType* device_output; + HIP_CHECK(hipMalloc(&device_output, sizeof(OutputType) * GlobalSizes::size)); + + hipLaunchKernelGGL(HIP_KERNEL_NAME(log_normal_kernel), + dim3(GlobalSizes::grid_size), + dim3(GlobalSizes::block_size), + 0, + 0, + device_output, + input_mean, + input_std_dev, + generators, + lnf); + + std::vector host_output(GlobalSizes::size); + + HIP_CHECK(hipMemcpy(host_output.data(), + device_output, + sizeof(OutputType) * GlobalSizes::size, + hipMemcpyDeviceToHost)); + + T actual_mean = get_actual_mean(GlobalSizes::size, + OutSize, + host_output.begin(), + host_output.end(), + rmf); + T actual_std_dev = get_actual_std_dev(GlobalSizes::size, + OutSize, + host_output.begin(), + host_output.end(), + actual_mean, + rsf); + + T mean_eps = GET_EPS(expected_mean); + T std_dev_eps = GET_EPS(expected_std_dev); + + ASSERT_NEAR(expected_mean, actual_mean, mean_eps); + ASSERT_NEAR(expected_std_dev, actual_std_dev, std_dev_eps); + + HIP_CHECK(hipFree(generators)); + HIP_CHECK(hipFree(device_output)); +} + +TYPED_TEST(LogNormalTest, log_normal_device_test) +{ + using out_type = typename TestFixture::out_type; + using rocrand_state = typename TestFixture::prng_type; + constexpr size_t out_size = TestFixture::out_size; + using T + = std::conditional_t<(std::is_same_v || std::is_same_v + || std::is_same_v), + float, + double>; + + if constexpr(out_size == 1) + { + auto mean_func = [](out_type x) { return x; }; + auto std_dev_func + = [](out_type x, out_type actual_mean) { return std::powf(x - actual_mean, 2); }; + if constexpr(std::is_same_v) + { + run_device_test( + [=](rocrand_state* state, float input_mean, float input_std_dev) + { return rocrand_log_normal(state, input_mean, input_std_dev); }, + mean_func, + std_dev_func); + } + else + { + run_device_test( + [=](rocrand_state* state, double input_mean, double input_std_dev) + { return rocrand_log_normal_double(state, input_mean, input_std_dev); }, + mean_func, + std_dev_func); + } + } + else if constexpr(out_size == 2) + { + auto mean_func = [](out_type x) { return x.x + x.y; }; + auto std_dev_func = [](out_type x, T actual_mean) + { return std::powf(x.x - actual_mean, 2) + std::powf(x.y - actual_mean, 2); }; + + if constexpr(std::is_same_v) + { + run_device_test( + [=](rocrand_state* state, float input_mean, float input_std_dev) + { return rocrand_log_normal2(state, input_mean, input_std_dev); }, + mean_func, + std_dev_func); + } + else + { + run_device_test( + [=](rocrand_state* state, double input_mean, double input_std_dev) + { return rocrand_log_normal_double2(state, input_mean, input_std_dev); }, + mean_func, + std_dev_func); + } + } + else + { + auto mean_func = [](out_type x) { return x.x + x.y + x.w + x.z; }; + auto std_dev_func = [](out_type x, T actual_mean) + { + return std::powf(x.x - actual_mean, 2) + std::powf(x.y - actual_mean, 2) + + std::powf(x.w - actual_mean, 2) + std::powf(x.z - actual_mean, 2); + }; + + if constexpr(std::is_same_v) + { + run_device_test( + [=](rocrand_state* state, float input_mean, float input_std_dev) + { return rocrand_log_normal4(state, input_mean, input_std_dev); }, + mean_func, + std_dev_func); + } + else + { + run_device_test( + [=](rocrand_state* state, double input_mean, double input_std_dev) + { return rocrand_log_normal_double4(state, input_mean, input_std_dev); }, + mean_func, + std_dev_func); + } + } +} + +/* ################################################# + + TEST HOST SIDE + + ###############################################*/ + +template +inline void GetRocrandHostState(RocrandPRNGType* host_state) +{ + if constexpr(std::is_same_v) + { + const unsigned int* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + rocrand_init(directions, 123456, host_state); + } + // scrambled sobol32 case + else if constexpr(std::is_same_v) + { + const unsigned int* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + rocrand_init(directions, 123456, 654321, host_state); + } + // sobol64 case + else if constexpr(std::is_same_v) + { + const unsigned long long* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + rocrand_init(directions, 123456, host_state); + } + // scrambled sobol64 case + else if constexpr(std::is_same_v) + { + const unsigned long long* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + rocrand_init(directions, 123456, 654321, host_state); + } + // lfsr113 case + else if constexpr(std::is_same_v) + { + rocrand_init({0xabcd, 0xdabc, 0xcdab, 0xbcda}, 0, 0, host_state); + } + else + { + rocrand_init(123456, 654321, 0, host_state); + } +} + +template +void run_host_test(const LogNormalFunc& lnf, const ReadMeanFunc& rmf, ReadStdFunc& rsf) +{ + constexpr size_t test_size = 50000; + constexpr T input_mean = 0.5; + constexpr T input_std_dev = 1.0; + + const T dev2 = std::powf(input_std_dev, 2); + + const T expected_mean = std::exp(input_mean + dev2 / 2); + const T expected_std_dev = std::sqrt((std::exp(dev2) - 1) * std::exp(2 * input_mean + dev2)); + + RocrandPRNGType generator; + GetRocrandHostState(&generator); + + std::vector output(test_size); + + for(size_t i = 0; i < test_size; i++) + { + output[i] = lnf(&generator, input_mean, input_std_dev); + } + + T actual_mean = std::accumulate(output.begin(), + output.end(), + (T)0, + [=](T acc, OutputType x) { return acc + rmf(x); }) + / static_cast(test_size * OutSize); + + T actual_std_dev + = std::accumulate(output.begin(), + output.end(), + (T)0, + [=](T acc, OutputType x) { return acc + rsf(x, actual_mean); }); + actual_std_dev = std::sqrt(actual_std_dev / static_cast(test_size * OutSize - 1)); + + T mean_eps = GET_EPS(expected_mean); + T std_dev_eps = GET_EPS(expected_std_dev); + + ASSERT_NEAR(expected_mean, actual_mean, mean_eps); + ASSERT_NEAR(expected_std_dev, actual_std_dev, std_dev_eps); +} + +TYPED_TEST(LogNormalTest, log_normal_host_test) +{ + using out_type = typename TestFixture::out_type; + using rocrand_state = typename TestFixture::prng_type; + constexpr size_t out_size = TestFixture::out_size; + using T + = std::conditional_t<(std::is_same_v || std::is_same_v + || std::is_same_v), + float, + double>; + + if constexpr(out_size == 1) + { + auto mean_func = [](out_type x) { return x; }; + auto std_dev_func + = [](out_type x, out_type actual_mean) { return std::powf(x - actual_mean, 2); }; + if constexpr(std::is_same_v) + { + run_host_test( + [=](rocrand_state* state, float input_mean, float input_std_dev) + { return rocrand_log_normal(state, input_mean, input_std_dev); }, + mean_func, + std_dev_func); + } + else + { + run_host_test( + [=](rocrand_state* state, double input_mean, double input_std_dev) + { return rocrand_log_normal_double(state, input_mean, input_std_dev); }, + mean_func, + std_dev_func); + } + } + else if constexpr(out_size == 2) + { + auto mean_func = [](out_type x) { return x.x + x.y; }; + auto std_dev_func = [](out_type x, T actual_mean) + { return std::powf(x.x - actual_mean, 2) + std::powf(x.y - actual_mean, 2); }; + + if constexpr(std::is_same_v) + { + run_host_test( + [=](rocrand_state* state, float input_mean, float input_std_dev) + { return rocrand_log_normal2(state, input_mean, input_std_dev); }, + mean_func, + std_dev_func); + } + else + { + run_host_test( + [=](rocrand_state* state, double input_mean, double input_std_dev) + { return rocrand_log_normal_double2(state, input_mean, input_std_dev); }, + mean_func, + std_dev_func); + } + } + else + { + auto mean_func = [](out_type x) { return x.x + x.y + x.w + x.z; }; + auto std_dev_func = [](out_type x, T actual_mean) + { + return std::powf(x.x - actual_mean, 2) + std::powf(x.y - actual_mean, 2) + + std::powf(x.w - actual_mean, 2) + std::powf(x.z - actual_mean, 2); + }; + + if constexpr(std::is_same_v) + { + run_host_test( + [=](rocrand_state* state, float input_mean, float input_std_dev) + { return rocrand_log_normal4(state, input_mean, input_std_dev); }, + mean_func, + std_dev_func); + } + else + { + run_host_test( + [=](rocrand_state* state, double input_mean, double input_std_dev) + { return rocrand_log_normal_double4(state, input_mean, input_std_dev); }, + mean_func, + std_dev_func); + } + } +} diff --git a/projects/rocrand/test/internal/test_rocrand_mrg31k3p_prng.cpp b/projects/rocrand/test/internal/test_rocrand_mrg31k3p_prng.cpp new file mode 100644 index 00000000000..47337a8cded --- /dev/null +++ b/projects/rocrand/test/internal/test_rocrand_mrg31k3p_prng.cpp @@ -0,0 +1,187 @@ +// Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include +#include + +#include +#include +#include +#include + +#include + +TEST(mrg31k3pTest, host_rocrand_init_consistency) +{ + // Test the consistency of rocrand_init when the same seed and subsequence are given + constexpr size_t test_size = 10000; + + std::vector seeds = {1, 12, 123, 1234, 12345}; + std::vector subsequences = {54321, 4321, 321, 21, 1}; + + rocrand_state_mrg31k3p state1; + rocrand_state_mrg31k3p state2; + + for(const unsigned long long& seed : seeds) + { + for(const unsigned long long& subsequence : subsequences) + { + rocrand_init(seed, subsequence, 0, &state1); + rocrand_init(seed, subsequence, 0, &state2); + + for(size_t i = 0; i < test_size; i++) + ASSERT_EQ(rocrand(&state1), rocrand(&state2)); + } + } +} + +TEST(mrg31k3pTest, host_rocrand_offet_consistency) +{ + // Test the consistency of rocrand_init when the same seed and subsequence are given + // but with a different offset + + std::vector seeds = {1, 12, 123, 1234, 12345}; + std::vector subsequences = {54321, 4321, 321, 21, 1}; + std::vector offsets = {1, 10, 100, 1000, 10000}; + + rocrand_state_mrg31k3p state1; + rocrand_state_mrg31k3p state2; + + for(const unsigned long long& seed : seeds) + { + for(const unsigned long long& subsequence : subsequences) + { + for(const unsigned long long& offset : offsets) + { + rocrand_init(seed, subsequence, 0, &state1); + rocrand_init(seed, subsequence, offset, &state2); + + for(size_t i = 0; i < offset; i++) + rocrand(&state1); + + ASSERT_EQ(rocrand(&state1), rocrand(&state2)); + } + } + } +} + +TEST(mrg31k3pTest, rocrand_check_uniform_property) +{ + // Test to ensure that rocrand gives a uniform distribution + + constexpr size_t size = 10000; + + rocrand_state_mrg31k3p state; + rocrand_init(123456, 654321, 0, &state); + + std::vector output(size); + + for(size_t i = 0; i < size; i++) + output[i] = rocrand(&state); + + constexpr double a = 0; + constexpr double b = 1; + + const double expected_mean = (a + b) / 2; + const double expected_std_dev = (b - a) / std::sqrt(12); + + const unsigned int mini = std::numeric_limits::min(); + const unsigned int maxi = std::numeric_limits::max(); + + double actual_mean = std::accumulate(output.begin(), + output.end(), + (double)0.0, + [=](double acc, unsigned int x) + { + double converted + = (a + static_cast(x - mini) * (b - a)) + / (static_cast(maxi - mini)); + return acc + converted; + }) + / static_cast(size); + + double actual_std_dev = std::accumulate(output.begin(), + output.end(), + (double)0.0, + [=](double acc, unsigned int x) + { + double converted + = (a + static_cast(x - mini) * (b - a)) + / (static_cast(maxi - mini)); + return acc + std::pow(converted - actual_mean, 2); + }); + actual_std_dev = std::sqrt(actual_std_dev / static_cast(size)); + + // make sure results are within 5% of expected values + ASSERT_NEAR(expected_mean, actual_mean, expected_mean * 0.05); + ASSERT_NEAR(expected_std_dev, actual_std_dev, expected_std_dev * 0.05); +} + +TEST(mrg31k3pTest, host_skipahead) +{ + // Test the consistency of skipahead when the same seed and subsequence are given + + std::vector seeds = {1, 12, 123, 1234, 12345}; + std::vector subsequences = {54321, 4321, 321, 21, 1}; + std::vector offsets = {1, 10, 100, 1000, 10000}; + + rocrand_state_mrg31k3p state1; + rocrand_state_mrg31k3p state2; + + for(const unsigned long long& seed : seeds) + { + for(const unsigned long long& subsequence : subsequences) + { + for(const unsigned long long& offset : offsets) + { + rocrand_init(seed, subsequence, 0, &state1); + rocrand_init(seed, subsequence, offset, &state2); + + skipahead(offset, &state1); + + ASSERT_EQ(rocrand(&state1), rocrand(&state2)); + } + } + } +} + +TEST(mrg31k3pTest, host_skipahead_subsequence) +{ + // Test the consistency of skipahead when the same seed and subsequence are given + + std::vector seeds = {1, 12, 123, 1234, 12345}; + std::vector subsequences = {54321, 4321, 321, 21, 1}; + + rocrand_state_mrg31k3p state1; + rocrand_state_mrg31k3p state2; + + for(const unsigned long long& seed : seeds) + { + for(const unsigned long long& subsequence : subsequences) + { + rocrand_init(seed, 0, 0, &state1); + rocrand_init(seed, subsequence, 0, &state2); + + skipahead_subsequence(subsequence, &state1); + + ASSERT_EQ(rocrand(&state1), rocrand(&state2)); + } + } +} diff --git a/projects/rocrand/test/internal/test_rocrand_mrg32k3a_prng.cpp b/projects/rocrand/test/internal/test_rocrand_mrg32k3a_prng.cpp new file mode 100644 index 00000000000..618baff23d4 --- /dev/null +++ b/projects/rocrand/test/internal/test_rocrand_mrg32k3a_prng.cpp @@ -0,0 +1,187 @@ +// Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include +#include + +#include +#include +#include +#include + +#include + +TEST(mrg32k3aTest, host_rocrand_init_consistency) +{ + // Test the consistency of rocrand_init when the same seed and subsequence are given + constexpr size_t test_size = 10000; + + std::vector seeds = {1, 12, 123, 1234, 12345}; + std::vector subsequences = {54321, 4321, 321, 21, 1}; + + rocrand_state_mrg32k3a state1; + rocrand_state_mrg32k3a state2; + + for(const unsigned long long& seed : seeds) + { + for(const unsigned long long& subsequence : subsequences) + { + rocrand_init(seed, subsequence, 0, &state1); + rocrand_init(seed, subsequence, 0, &state2); + + for(size_t i = 0; i < test_size; i++) + ASSERT_EQ(rocrand(&state1), rocrand(&state2)); + } + } +} + +TEST(mrg32k3aTest, host_rocrand_offet_consistency) +{ + // Test the consistency of rocrand_init when the same seed and subsequence are given + // but with a different offset + + std::vector seeds = {1, 12, 123, 1234, 12345}; + std::vector subsequences = {54321, 4321, 321, 21, 1}; + std::vector offsets = {1, 10, 100, 1000, 10000}; + + rocrand_state_mrg32k3a state1; + rocrand_state_mrg32k3a state2; + + for(const unsigned long long& seed : seeds) + { + for(const unsigned long long& subsequence : subsequences) + { + for(const unsigned long long& offset : offsets) + { + rocrand_init(seed, subsequence, 0, &state1); + rocrand_init(seed, subsequence, offset, &state2); + + for(size_t i = 0; i < offset; i++) + rocrand(&state1); + + ASSERT_EQ(rocrand(&state1), rocrand(&state2)); + } + } + } +} + +TEST(mrg32k3aTest, rocrand_check_uniform_property) +{ + // Test to ensure that rocrand gives a uniform distribution + + constexpr size_t size = 10000; + + rocrand_state_mrg32k3a state; + rocrand_init(123456, 654321, 0, &state); + + std::vector output(size); + + for(size_t i = 0; i < size; i++) + output[i] = rocrand(&state); + + constexpr double a = 0; + constexpr double b = 1; + + const double expected_mean = (a + b) / 2; + const double expected_std_dev = (b - a) / std::sqrt(12); + + const unsigned int mini = std::numeric_limits::min(); + const unsigned int maxi = std::numeric_limits::max(); + + double actual_mean = std::accumulate(output.begin(), + output.end(), + (double)0.0, + [=](double acc, unsigned int x) + { + double converted + = (a + static_cast(x - mini) * (b - a)) + / (static_cast(maxi - mini)); + return acc + converted; + }) + / static_cast(size); + + double actual_std_dev = std::accumulate(output.begin(), + output.end(), + (double)0.0, + [=](double acc, unsigned int x) + { + double converted + = (a + static_cast(x - mini) * (b - a)) + / (static_cast(maxi - mini)); + return acc + std::pow(converted - actual_mean, 2); + }); + actual_std_dev = std::sqrt(actual_std_dev / static_cast(size)); + + // make sure results are within 5% of expected values + ASSERT_NEAR(expected_mean, actual_mean, expected_mean * 0.05); + ASSERT_NEAR(expected_std_dev, actual_std_dev, expected_std_dev * 0.05); +} + +TEST(mrg32k3aTest, host_skipahead) +{ + // Test the consistency of skipahead when the same seed and subsequence are given + + std::vector seeds = {1, 12, 123, 1234, 12345}; + std::vector subsequences = {54321, 4321, 321, 21, 1}; + std::vector offsets = {1, 10, 100, 1000, 10000}; + + rocrand_state_mrg32k3a state1; + rocrand_state_mrg32k3a state2; + + for(const unsigned long long& seed : seeds) + { + for(const unsigned long long& subsequence : subsequences) + { + for(const unsigned long long& offset : offsets) + { + rocrand_init(seed, subsequence, 0, &state1); + rocrand_init(seed, subsequence, offset, &state2); + + skipahead(offset, &state1); + + ASSERT_EQ(rocrand(&state1), rocrand(&state2)); + } + } + } +} + +TEST(mrg32k3aTest, host_skipahead_subsequence) +{ + // Test the consistency of skipahead when the same seed and subsequence are given + + std::vector seeds = {1, 12, 123, 1234, 12345}; + std::vector subsequences = {54321, 4321, 321, 21, 1}; + + rocrand_state_mrg32k3a state1; + rocrand_state_mrg32k3a state2; + + for(const unsigned long long& seed : seeds) + { + for(const unsigned long long& subsequence : subsequences) + { + rocrand_init(seed, 0, 0, &state1); + rocrand_init(seed, subsequence, 0, &state2); + + skipahead_subsequence(subsequence, &state1); + + ASSERT_EQ(rocrand(&state1), rocrand(&state2)); + } + } +} \ No newline at end of file diff --git a/projects/rocrand/test/internal/test_rocrand_mt19937_octo_engine_prng.cpp b/projects/rocrand/test/internal/test_rocrand_mt19937_octo_engine_prng.cpp new file mode 100644 index 00000000000..8db796bfacf --- /dev/null +++ b/projects/rocrand/test/internal/test_rocrand_mt19937_octo_engine_prng.cpp @@ -0,0 +1,277 @@ +#include +#include + +#include +#include +#include +#include +#include + +#include + +#define MAXI(x) std::numeric_limits::max() +#define MINI(x) std::numeric_limits::min() + +// Normalize betweem 0 and 1 +#define NORMALIZE(x, type) \ + static_cast(x - MINI(type)) / static_cast(MAXI(type) - MINI(type)) + +/* ################################################# + + TEST HOST SIDE + + ###############################################*/ + +TEST(Mt19937OctoEngineTest, test_host_gather) +{ + // Check if a mt199370 Octo Engine state contatins the correct arrangement listed below + + /// Thread 0 has element 0, thread 1 has element 113, thread 2 has element 170, + /// thread 3 had element 283, thread 4 has element 340, thread 5 has element 397, + /// thread 6 has element 510, thread 7 has element 567. + /// Thread i for i in [0, 7) has the following elements (ipt = items_per_thread): + /// [ 1 + ipt * i, 1 + ipt * (i + 1)), [398 + ipt * i, 398 + ipt * (i + 1)), [171 + ipt * i, 171 + ipt * (i + 1)), + /// [568 + ipt * i, 568 + ipt * (i + 1)), [341 + ipt * i, 341 + ipt * (i + 1)), [114 + ipt * i, 114 + ipt * (i + 1)), + /// [511 + ipt * i, 511 + ipt * (i + 1)), [284 + ipt * i, 284 + ipt * (i + 1)), [ 57 + ipt * i, 57 + ipt * (i + 1)), + /// [454 + ipt * i, 454 + ipt * (i + 1)), [227 + ipt * i, 227 + ipt * (i + 1)) + /// + + namespace constants = rocrand_impl::host::mt19937_constants; + + std::vector src(constants::n); + std::iota(src.begin(), src.end(), 0); + + const std::vector offsets = {1, 398, 171, 568, 341, 114, 511, 284, 57, 454, 227}; + const std::vector special_elem = {0, 113, 170, 283, 340, 397, 510, 567}; + const unsigned int ipt = 7; + const unsigned int vpt = 1 + ipt * 11; + + for(size_t tid = 0; tid < 8; tid++) + { + rocrand_impl::host::mt19937_octo_engine test_engine; + test_engine.gather(src.data(), dim3(tid, 0, 0)); + + std::vector expected_items; + + for(const unsigned int& offset : offsets) + { + auto left = offset + ipt * tid; + auto right = (offset + ipt * (tid + 1)); // no need to -1 since insert is exclusive + + expected_items.insert(expected_items.begin(), src.begin() + left, src.begin() + right); + } + + expected_items.insert(expected_items.begin(), special_elem[tid]); + + std::sort(expected_items.begin(), expected_items.end()); + + std::vector actual_items(vpt); + + for(size_t i = 0; i < vpt; i++) + actual_items[i] = test_engine.get(i); + + std::sort(actual_items.begin(), actual_items.end()); + + for(size_t i = 0; i < vpt; i++) + ASSERT_EQ(expected_items[i], actual_items[i]); + } +} + +unsigned int comp(unsigned int a, unsigned int b, unsigned int c) +{ + namespace constants = rocrand_impl::host::mt19937_constants; + + unsigned int x = (a & constants::upper_mask) | (b & constants::lower_mask); + unsigned int xA = x >> 1; + if(x & 1UL) + xA ^= constants::matrix_a; + + x = c ^ xA; + return x; +} + +TEST(Mt19937OctoEngineTest, comp_test) +{ + //Test the twister step + namespace constants = rocrand_impl::host::mt19937_constants; + + std::vector src(constants::n); + std::iota(src.begin(), src.end(), 0); + + // the actual thread should not matter + rocrand_impl::host::mt19937_octo_engine test_engine; + test_engine.gather(src.data(), dim3(0, 0, 0)); + + for(size_t i = 0; i < constants::n; i++) + { + long long k = i; + long long j = k - (constants::n - 1); + if(j < 0) + j += constants::n; + + long long m = k - (constants::n - constants::m); + if(m < 0) + m += constants::n; + + ASSERT_EQ(comp(k, j, m), test_engine.comp(k, j, m)) + << k << " " << j << " " << m << std::endl; + } +} + +TEST(Mt19937OctoEngineTest, gen_next_n_consistency_test) +{ + namespace constants = rocrand_impl::host::mt19937_constants; + + std::vector src(constants::n); + std::iota(src.begin(), src.end(), 0); + + rocrand_impl::host::mt19937_octo_engine octo_engine_a[8]; + rocrand_impl::host::mt19937_octo_engine octo_engine_b[8]; + + for(size_t i = 0; i < 8; i++) + { + octo_engine_a[i].gather(src.data(), dim3(i)); + octo_engine_b[i].gather(src.data(), dim3(i)); + } + + constexpr size_t test_size = 1000; + + for(size_t _ = 0; _ < test_size; _++) + { + rocrand_impl::host::mt19937_octo_engine::gen_next_n(octo_engine_a); + rocrand_impl::host::mt19937_octo_engine::gen_next_n(octo_engine_b); + + for(size_t tid = 0; tid < 8; tid++) + { + for(size_t i = 0; i < 78; i++) + { + ASSERT_EQ(octo_engine_a[tid].get(i), octo_engine_b[tid].get(i)); + } + } + } +} + +TEST(Mt19937OctoEngineTest, uniform_dis_test) +{ + // checks if we are uniformly distributed + namespace constants = rocrand_impl::host::mt19937_constants; + + std::vector src(constants::n); + std::iota(src.begin(), src.end(), 0); + + rocrand_impl::host::mt19937_octo_engine octo_engine[8]; + + for(size_t i = 0; i < 8; i++) + { + octo_engine[i].gather(src.data(), dim3(i)); + } + + constexpr size_t test_size = 1000; + + std::vector output; + + for(size_t _ = 0; _ < test_size; _++) + { + rocrand_impl::host::mt19937_octo_engine::gen_next_n(octo_engine); + + for(size_t tid = 0; tid < 8; tid++) + { + for(size_t i = 0; i < 78; i++) + { + unsigned int x = octo_engine[tid].get(i); + + double normalized = NORMALIZE(x, unsigned int); + + output.push_back(normalized); + } + } + } + + double actual_mean + = std::accumulate(output.begin(), output.end(), (double)0) / static_cast(output.size()); + double actual_std_dev = std::accumulate(output.begin(), + output.end(), + (double)0, + [=](double acc, double x) + { return acc + std::powf(x - actual_mean, 2); }); + actual_std_dev = std::sqrt(actual_std_dev / static_cast(output.size() - 1)); + + double expected_mean = 0.5; + double expected_std_dev = 0.288675134595; //sqrt(1/12) + + ASSERT_NEAR(expected_mean, actual_mean, expected_mean * 0.01); + ASSERT_NEAR(expected_std_dev, actual_std_dev, expected_std_dev * 0.01); +} + +TEST(Mt19937OctoEngineAccessorTest, load_test) +{ + namespace constants = rocrand_impl::host::mt19937_constants; + + std::vector src(constants::n); + std::iota(src.begin(), src.end(), 0); + + const unsigned int ipt = 7; + const unsigned int vpt = 1 + ipt * 11; + + rocrand_impl::host::mt19937_octo_engine_accessor<8> accessor(src.data()); + + for(size_t tid = 0; tid < 8; tid++) + { + auto accessor_engine = accessor.load(tid); + + for(size_t i = 0; i < vpt; i++) + { + ASSERT_EQ(accessor_engine.get(i), src[i * 8 + tid]); + } + } +} + +TEST(Mt19937OctoEngineAccessorTest, load_value_test) +{ + namespace constants = rocrand_impl::host::mt19937_constants; + + std::vector src(constants::n); + std::iota(src.begin(), src.end(), 0); + + const unsigned int ipt = 7; + const unsigned int vpt = 1 + ipt * 11; + + rocrand_impl::host::mt19937_octo_engine_accessor<8> accessor(src.data()); + + for(size_t tid = 0; tid < 8; tid++) + { + auto engine = accessor.load(tid); + + for(size_t i = 0; i < vpt; i++) + { + ASSERT_EQ(engine.get(i), accessor.load_value(tid, i)); + } + } +} + +TEST(Mt19937OctoEngineAccessorTest, save_test) +{ + namespace constants = rocrand_impl::host::mt19937_constants; + + std::vector src(constants::n); + std::iota(src.begin(), src.end(), 0); + + const unsigned int ipt = 7; + const unsigned int vpt = 1 + ipt * 11; + + std::vector octo_engine(8); + rocrand_impl::host::mt19937_octo_engine_accessor<8> accessor(src.data()); + + for(size_t tid = 0; tid < 8; tid++) + { + + octo_engine[tid].gather(src.data(), dim3(tid, 0, 0)); + + accessor.save(tid, octo_engine[tid]); + + for(size_t i = 0; i < vpt; i++) + { + ASSERT_EQ(octo_engine[tid].get(i), accessor.load_value(tid, i)); + } + } +} diff --git a/projects/rocrand/test/internal/test_rocrand_mt19937_prng.cpp b/projects/rocrand/test/internal/test_rocrand_mt19937_prng.cpp index 51b61011511..2e29ac186ee 100644 --- a/projects/rocrand/test/internal/test_rocrand_mt19937_prng.cpp +++ b/projects/rocrand/test/internal/test_rocrand_mt19937_prng.cpp @@ -536,8 +536,10 @@ using mt19937_generator_engine_tests_types = ::testing::Types TYPED_TEST_SUITE(mt19937_generator_engine_tests, mt19937_generator_engine_tests_types); /// Initialize the octo engines for both generators. Skip \p subsequence_size for the first generator. -__global__ __launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) void init_engines_kernel( - mt19937_octo_engine* octo_engines, const unsigned int* engines, unsigned int subsequence_size) +__global__ __launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) +void init_engines_kernel(mt19937_octo_engine* octo_engines, + const unsigned int* engines, + unsigned int subsequence_size) { constexpr unsigned int n = mt19937_constants::n; const unsigned int thread_id = blockIdx.x * blockDim.x + threadIdx.x; @@ -558,11 +560,11 @@ __global__ __launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) void init_engines_k } /// Each generator produces \p n elements in its own \p data section. -__global__ __launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) void generate_kernel( - mt19937_octo_engine* engines, - unsigned int* data, - unsigned int elements_per_generator, - unsigned int subsequence_size) +__global__ __launch_bounds__(ROCRAND_DEFAULT_MAX_BLOCK_SIZE) +void generate_kernel(mt19937_octo_engine* engines, + unsigned int* data, + unsigned int elements_per_generator, + unsigned int subsequence_size) { constexpr unsigned int n = mt19937_constants::n; constexpr unsigned int threads_per_generator = mt19937_octo_engine::threads_per_generator; diff --git a/projects/rocrand/test/internal/test_rocrand_mtgp32_prng.cpp b/projects/rocrand/test/internal/test_rocrand_mtgp32_prng.cpp index aec3bfe7424..3f373b13988 100644 --- a/projects/rocrand/test/internal/test_rocrand_mtgp32_prng.cpp +++ b/projects/rocrand/test/internal/test_rocrand_mtgp32_prng.cpp @@ -24,11 +24,25 @@ #include #include +#include #include #include +#define ROCRAND_ERROR_CHECK(cmd) \ + do \ + { \ + auto status = cmd; \ + if(status != 0) \ + { \ + std::cerr << "Encountered ROCRAND error: " << status << "at line" << __LINE__ \ + << " in file " << __FILE__ << "\n"; \ + exit(-1); \ + } \ + } \ + while(0) + using rocrand_impl::host::mtgp32_generator; // Generator API tests @@ -45,3 +59,385 @@ INSTANTIATE_TYPED_TEST_SUITE_P(mtgp32_generator, // INSTANTIATE_TYPED_TEST_SUITE_P(rocrand_mtgp32, // generator_prng_continuity_tests, // rocrand_mtgp32_generator_prng_tests_types); + +TEST(AdditionalTests, rocrand_make_constant) +{ + // test to make sure the copy is working and that all data is being coverted properly + rocrand_device::mtgp32_fast_params* src_params = mtgp32dc_params_fast_11213; + mtgp32_params* device_params; + HIP_CHECK(hipMalloc(&device_params, sizeof(mtgp32_params))); + + // Bring it to device side + ROCRAND_CHECK(rocrand_make_constant(src_params, device_params)); + + //Bring it back to host to check that everything is the same + mtgp32_params host_params[1]; + + HIP_CHECK( + hipMemcpy(host_params, device_params, sizeof(mtgp32_params) * 1, hipMemcpyDeviceToHost)); + + for(size_t i = 0; i < mtgpdc_params_11213_num; i++) + { + rocrand_device::mtgp32_fast_params original = src_params[i]; + mtgp32_params copy = host_params[0]; + + ASSERT_EQ(original.pos, copy.pos_tbl[i]); + ASSERT_EQ(original.sh1, copy.sh1_tbl[i]); + ASSERT_EQ(original.sh2, copy.sh2_tbl[i]); + + for(size_t ii = 0; ii < MTGP_TS; ii++) + { + ASSERT_EQ(original.tbl[ii], copy.param_tbl[i][ii]); + ASSERT_EQ(original.tmp_tbl[ii], copy.temper_tbl[i][ii]); + ASSERT_EQ(original.flt_tmp_tbl[ii], copy.single_temper_tbl[i][ii]); + } + + ASSERT_EQ(original.mask, copy.mask[0]); + } + HIP_CHECK(hipFree(device_params)); +} + +template +__global__ +void rocrand_kernel(rocrand_state_mtgp32* states, unsigned int* device_output) +{ + constexpr size_t items_per_block = items_per_thread * block_size; + const size_t offset = (items_per_block * blockIdx.x) + (items_per_thread * threadIdx.x); + + __shared__ rocrand_state_mtgp32 state; + for(size_t i = 0; i < items_per_thread; i++) + { + + if(threadIdx.x == 0) + state = states[blockIdx.x]; + __syncthreads(); + + device_output[offset + i] = rocrand(&state); + + if(threadIdx.x == 0) + states[blockIdx.x] = state; + __syncthreads(); + } +} + +TEST(AdditionalTests, rocrand_check_uniform_property) +{ + //Test of rocrand returns a uniformly distributed distribution + constexpr size_t items_per_thread = 512; + constexpr size_t block_size = 16; + constexpr size_t grid_size = 16; + + constexpr size_t items_per_block = items_per_thread * block_size; + constexpr size_t size = items_per_block * grid_size; + + rocrand_state_mtgp32* states; + HIP_CHECK(hipMalloc(&states, sizeof(rocrand_state_mtgp32) * grid_size)); + + rocrand_make_state_mtgp32(states, mtgp32dc_params_fast_11213, grid_size, 0); + + unsigned int* device_output; + HIP_CHECK(hipMalloc(&device_output, sizeof(unsigned int) * size)); + + rocrand_kernel + <<>>(states, device_output); + + unsigned int* host_output = new unsigned int[size]; + HIP_CHECK( + hipMemcpy(host_output, device_output, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); + + constexpr double a = 0; + constexpr double b = 1; + + const double expected_mean = (a + b) / 2; + const double expected_std_dev = (b - a) / std::sqrt(12); + + const unsigned int mini = std::numeric_limits::min(); + const unsigned int maxi = std::numeric_limits::max(); + + double actual_mean = std::accumulate(host_output, + host_output + size, + (double)0.0, + [=](double acc, unsigned int x) + { + double converted + = (a + static_cast(x - mini) * (b - a)) + / (static_cast(maxi - mini)); + return acc + converted; + }) + / static_cast(size); + + double actual_std_dev = std::accumulate(host_output, + host_output + size, + (double)0.0, + [=](double acc, unsigned int x) + { + double converted + = (a + static_cast(x - mini) * (b - a)) + / (static_cast(maxi - mini)); + return acc + std::pow(converted - actual_mean, 2); + }); + actual_std_dev = std::sqrt(actual_std_dev / static_cast(size)); + + // make sure results are within 5% of expected values + ASSERT_NEAR(expected_mean, actual_mean, expected_mean * 0.05); + ASSERT_NEAR(expected_std_dev, actual_std_dev, expected_std_dev * 0.05); + + delete[] host_output; + + HIP_CHECK(hipFree(states)); + HIP_CHECK(hipFree(device_output)); +} + +__global__ +void rocrand_mtgp32_block_copy_kernel(rocrand_state_mtgp32* src_states, + rocrand_state_mtgp32* dest_states) +{ + rocrand_mtgp32_block_copy(src_states + blockIdx.x, dest_states + blockIdx.x); +} + +template +__global__ +void rocrand_kernel(rocrand_state_mtgp32* states1, + rocrand_state_mtgp32* states2, + unsigned int* device_output1, + unsigned int* device_output2) +{ + constexpr size_t items_per_block = items_per_thread * block_size; + const size_t offset = (items_per_block * blockIdx.x) + (items_per_thread * threadIdx.x); + + __shared__ rocrand_state_mtgp32 src_state; + __shared__ rocrand_state_mtgp32 dest_state; + for(size_t i = 0; i < items_per_thread; i++) + { + + if(threadIdx.x == 0) + { + src_state = states1[blockIdx.x]; + dest_state = states2[blockIdx.x]; + } + __syncthreads(); + + device_output1[offset + i] = rocrand(&src_state); + device_output2[offset + i] = rocrand(&dest_state); + + if(threadIdx.x == 0) + { + states1[blockIdx.x] = src_state; + states2[blockIdx.x] = dest_state; + } + __syncthreads(); + } +} + +TEST(AdditionalTests, rocrand_mtgp32_block_copy) +{ + //Test of to make sure rocrand_mtgp32_block_copy is coppying corectly + constexpr size_t items_per_thread = 1024; + constexpr size_t block_size = 16; + constexpr size_t grid_size = 16; + + constexpr size_t items_per_block = items_per_thread * block_size; + constexpr size_t size = items_per_block * grid_size; + + rocrand_state_mtgp32* src_states; + HIP_CHECK(hipMalloc(&src_states, sizeof(rocrand_state_mtgp32) * grid_size)); + + rocrand_state_mtgp32* dest_states; + HIP_CHECK(hipMalloc(&dest_states, sizeof(rocrand_state_mtgp32) * grid_size)); + rocrand_make_state_mtgp32(src_states, mtgp32dc_params_fast_11213, grid_size, 0); + + unsigned int* src_device_output; + HIP_CHECK(hipMalloc(&src_device_output, sizeof(unsigned int) * size)); + + unsigned int* pram_set_output; + HIP_CHECK(hipMalloc(&pram_set_output, sizeof(unsigned int) * size)); + + rocrand_mtgp32_block_copy_kernel<<>>(src_states, + dest_states); + + rocrand_kernel + <<>>(src_states, + dest_states, + src_device_output, + pram_set_output); + + unsigned int* src_host_output = new unsigned int[size]; + HIP_CHECK(hipMemcpy(src_host_output, + src_device_output, + sizeof(unsigned int) * size, + hipMemcpyDeviceToHost)); + + unsigned int* dest_host_output = new unsigned int[size]; + HIP_CHECK(hipMemcpy(dest_host_output, + pram_set_output, + sizeof(unsigned int) * size, + hipMemcpyDeviceToHost)); + + for(size_t i = 0; i < size; i++) + ASSERT_EQ(src_host_output[i], dest_host_output[i]) << "Index: " << i; + + delete[] src_host_output; + delete[] dest_host_output; + + HIP_CHECK(hipFree(src_states)); + HIP_CHECK(hipFree(src_device_output)); + HIP_CHECK(hipFree(dest_states)); + HIP_CHECK(hipFree(pram_set_output)); +} + +__global__ +void rocrand_mtgp32_set_params_kernel(rocrand_state_mtgp32* states, mtgp32_params* params) +{ + rocrand_mtgp32_set_params(states + blockIdx.x, params); +} + +TEST(AdditionalTests, rocrand_mtgp32_set_params) +{ + //Test of to make sure rocrand_mtgp32_set_params is setting parameter correctly + constexpr size_t items_per_thread = 1024; + constexpr size_t block_size = 16; + constexpr size_t grid_size = 16; + + constexpr size_t items_per_block = items_per_thread * block_size; + constexpr size_t size = items_per_block * grid_size; + + rocrand_state_mtgp32* created_states; + HIP_CHECK(hipMalloc(&created_states, sizeof(rocrand_state_mtgp32) * grid_size)); + rocrand_make_state_mtgp32(created_states, mtgp32dc_params_fast_11213, grid_size, 0); + + rocrand_state_mtgp32* param_set_states; + HIP_CHECK(hipMalloc(¶m_set_states, sizeof(rocrand_state_mtgp32) * grid_size)); + rocrand_make_state_mtgp32(param_set_states, mtgp32dc_params_fast_11213, grid_size, 0); + + unsigned int* src_device_output; + HIP_CHECK(hipMalloc(&src_device_output, sizeof(unsigned int) * size)); + + unsigned int* pram_set_output; + HIP_CHECK(hipMalloc(&pram_set_output, sizeof(unsigned int) * size)); + + mtgp32_params* device_params; + HIP_CHECK(hipMalloc(&device_params, sizeof(mtgp32_params))); + ROCRAND_CHECK(rocrand_make_constant(mtgp32dc_params_fast_11213, device_params)); + + rocrand_mtgp32_set_params_kernel<<>>(param_set_states, device_params); + + rocrand_kernel + <<>>(created_states, + param_set_states, + src_device_output, + pram_set_output); + unsigned int* src_host_output = new unsigned int[size]; + HIP_CHECK(hipMemcpy(src_host_output, + src_device_output, + sizeof(unsigned int) * size, + hipMemcpyDeviceToHost)); + + unsigned int* dest_host_output = new unsigned int[size]; + HIP_CHECK(hipMemcpy(dest_host_output, + pram_set_output, + sizeof(unsigned int) * size, + hipMemcpyDeviceToHost)); + + for(size_t i = 0; i < size; i++) + ASSERT_EQ(src_host_output[i], dest_host_output[i]) << "Index: " << i; + + delete[] src_host_output; + delete[] dest_host_output; + + HIP_CHECK(hipFree(created_states)); + HIP_CHECK(hipFree(src_device_output)); + HIP_CHECK(hipFree(param_set_states)); + HIP_CHECK(hipFree(pram_set_output)); + HIP_CHECK(hipFree(device_params)); +} + +template +__global__ +void operator_kernel(rocrand_state_mtgp32* states, unsigned int* device_output) +{ + constexpr size_t items_per_block = items_per_thread * block_size; + const size_t offset = (items_per_block * blockIdx.x) + (items_per_thread * threadIdx.x); + + __shared__ rocrand_state_mtgp32 state; + for(size_t i = 0; i < items_per_thread; i++) + { + + if(threadIdx.x == 0) + state = states[blockIdx.x]; + __syncthreads(); + + device_output[offset + i] = state(); + + if(threadIdx.x == 0) + states[blockIdx.x] = state; + __syncthreads(); + } +} + +TEST(AdditionalTests, operator_check_uniform_property) +{ + //Test of rocrand returns a uniformly distributed distribution + constexpr size_t items_per_thread = 1024; + constexpr size_t block_size = 16; + constexpr size_t grid_size = 16; + + constexpr size_t items_per_block = items_per_thread * block_size; + constexpr size_t size = items_per_block * grid_size; + + rocrand_state_mtgp32* states; + HIP_CHECK(hipMalloc(&states, sizeof(rocrand_state_mtgp32) * grid_size)); + + rocrand_make_state_mtgp32(states, mtgp32dc_params_fast_11213, grid_size, 0); + + unsigned int* device_output; + HIP_CHECK(hipMalloc(&device_output, sizeof(unsigned int) * size)); + + operator_kernel + <<>>(states, device_output); + + unsigned int* host_output = new unsigned int[size]; + HIP_CHECK( + hipMemcpy(host_output, device_output, sizeof(unsigned int) * size, hipMemcpyDeviceToHost)); + + constexpr double a = 0; + constexpr double b = 1; + + const double expected_mean = (a + b) / 2; + const double expected_std_dev = (b - a) / std::sqrt(12); + + const unsigned int mini = std::numeric_limits::min(); + const unsigned int maxi = std::numeric_limits::max(); + + double actual_mean = std::accumulate(host_output, + host_output + size, + (double)0.0, + [=](double acc, unsigned int x) + { + double converted + = (a + static_cast(x - mini) * (b - a)) + / (static_cast(maxi - mini)); + return acc + converted; + }) + / static_cast(size); + + double actual_std_dev = std::accumulate(host_output, + host_output + size, + (double)0.0, + [=](double acc, unsigned int x) + { + double converted + = (a + static_cast(x - mini) * (b - a)) + / (static_cast(maxi - mini)); + return acc + std::pow(converted - actual_mean, 2); + }); + actual_std_dev = std::sqrt(actual_std_dev / static_cast(size)); + + // make sure results are within 5% of expected values + ASSERT_NEAR(expected_mean, actual_mean, expected_mean * 0.05); + ASSERT_NEAR(expected_std_dev, actual_std_dev, expected_std_dev * 0.05); + + delete[] host_output; + + HIP_CHECK(hipFree(states)); + HIP_CHECK(hipFree(device_output)); +} diff --git a/projects/rocrand/test/internal/test_rocrand_normal.cpp b/projects/rocrand/test/internal/test_rocrand_normal.cpp new file mode 100644 index 00000000000..76f2a8e7569 --- /dev/null +++ b/projects/rocrand/test/internal/test_rocrand_normal.cpp @@ -0,0 +1,807 @@ +// Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include +#include + +#include +#include + +#undef ROCRAND_DETAIL_BM_NOT_IN_STATE +#include + +#define HIP_CHECK(cmd) \ + do \ + { \ + auto error = (cmd); \ + if(error != hipSuccess) \ + { \ + std::cerr << "Encountered HIP error (" << hipGetErrorString(error) << ") at line " \ + << __LINE__ << " in file " << __FILE__ << "\n"; \ + exit(-1); \ + } \ + } \ + while(0) + +#define ROCRAND_CHECK(cmd) \ + do \ + { \ + auto status = cmd; \ + if(status != 0) \ + { \ + std::cerr << "Encountered ROCRAND error: " << status << "at line" << __LINE__ \ + << " in file " << __FILE__ << "\n"; \ + exit(status); \ + } \ + } \ + while(0) + +// If x is small then get withing 0.001 otherwise 10% +#define GET_EPS(x) x < 0.01 ? 0.01 : x * 0.1 + +template +ReturnType get_actual_mean(const size_t test_size, + const size_t out_size, + StartIt begin, + EndIt end, + const ReadMeanFunc& rmf) +{ + ReturnType actual_mean = std::accumulate(begin, + end, + (ReturnType)0, + [=](ReturnType acc, OutputType x) + { return acc + static_cast(rmf(x)); }) + / static_cast(test_size * out_size); + return actual_mean; +} + +template +ReturnType get_actual_std_dev(const size_t test_size, + const size_t out_size, + StartIt begin, + EndIt end, + ReturnType actual_mean, + const ReadStdFunc& rsf) +{ + ReturnType actual_std_dev + = std::accumulate(begin, + end, + (ReturnType)0, + [=](ReturnType acc, OutputType x) + { return acc + static_cast(rsf(x, actual_mean)); }); + actual_std_dev = std::sqrt(actual_std_dev / static_cast(test_size * out_size - 1)); + return actual_std_dev; +} + +template +struct StateParams +{ + using out_type = OutputType; + using rng = RocrandPRNGType; + static constexpr size_t out_size = OutSize; +}; + +using NormalDistributionStateParam + = ::testing::Types, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams, + StateParams>; + +template +class NormalDistributionRocRandStateTest : public ::testing::Test +{ +public: + using out_type = typename StateParams::out_type; + using prng_type = typename StateParams::rng; + static constexpr size_t out_size = StateParams::out_size; +}; +TYPED_TEST_SUITE(NormalDistributionRocRandStateTest, NormalDistributionStateParam); + +/* ################################################# + + TEST HOST SIDE + + ###############################################*/ + +template +inline void GetHostRocrandState(RocrandPRNGType* host_state) +{ + if constexpr(std::is_same_v) + { + const unsigned int* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + rocrand_init(directions, 123456, host_state); + } + // scrambled sobol32 case + else if constexpr(std::is_same_v) + { + const unsigned int* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + rocrand_init(directions, 123456, 654321, host_state); + } + // sobol64 case + else if constexpr(std::is_same_v) + { + const unsigned long long* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + rocrand_init(directions, 123456, host_state); + } + // scrambled sobol64 case + else if constexpr(std::is_same_v) + { + const unsigned long long* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + rocrand_init(directions, 123456, 654321, host_state); + } + // lfsr113 case + else if constexpr(std::is_same_v) + { + rocrand_init({0xabcd, 0xdabc, 0xcdab, 0xbcda}, 0, 0, host_state); + } + else + { + rocrand_init(123456, 654321, 0, host_state); + } +} + +template +void run_host_prng_test(const NormalDistFunc& ndf, const ReadMeanFunc& rmf, ReadStdFunc& rsf) +{ + constexpr size_t test_size = 100000; + const T expected_mean = 0; + const T expected_std_dev = 1; + + RocrandPRNGType generator; + GetHostRocrandState(&generator); + + std::vector output(test_size); + + for(size_t i = 0; i < test_size; i++) + { + output[i] = ndf(&generator); + } + + T actual_mean + = get_actual_mean(test_size, OutSize, output.begin(), output.end(), rmf); + T actual_std_dev = get_actual_std_dev(test_size, + OutSize, + output.begin(), + output.end(), + actual_mean, + rsf); + T mean_eps = GET_EPS(expected_mean); + T std_dev_eps = GET_EPS(expected_std_dev); + + ASSERT_NEAR(expected_mean, actual_mean, mean_eps); + ASSERT_NEAR(expected_std_dev, actual_std_dev, std_dev_eps); +} + +template +void run_host_numeric_test(const NormalDistFunc& ndf, const ReadMeanFunc& rmf, ReadStdFunc& rsf) +{ + constexpr size_t test_size = 100000; + const double expected_mean = 0; + const double expected_std_dev = 1; + + std::vector output(test_size); + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dis(std::numeric_limits::min(), + std::numeric_limits::max()); + + for(size_t i = 0; i < test_size; i++) + { + output[i] = ndf(dis, gen); + } + + double actual_mean = get_actual_mean(test_size, + OutSize, + output.begin(), + output.end(), + rmf); + double actual_std_dev = get_actual_std_dev(test_size, + OutSize, + output.begin(), + output.end(), + actual_mean, + rsf); + double mean_eps = GET_EPS(expected_mean); + double std_dev_eps = GET_EPS(expected_std_dev); + + ASSERT_NEAR(expected_mean, actual_mean, mean_eps); + ASSERT_NEAR(expected_std_dev, actual_std_dev, std_dev_eps); +} + +TYPED_TEST(NormalDistributionRocRandStateTest, rocrand_host_state_tests) +{ + using out_type = typename TestFixture::out_type; + using rocrand_state = typename TestFixture::prng_type; + constexpr size_t out_size = TestFixture::out_size; + using T + = std::conditional_t<(std::is_same_v || std::is_same_v + || std::is_same_v), + float, + double>; + if constexpr(out_size == 1) + { + auto mean_func = [](out_type x) { return x; }; + auto std_dev_func + = [](out_type x, out_type actual_mean) { return std::powf(x - actual_mean, 2); }; + if constexpr(std::is_same_v) + { + run_host_prng_test( + [=](rocrand_state* state) { return rocrand_normal(state); }, + mean_func, + std_dev_func); + } + else + { + run_host_prng_test( + [=](rocrand_state* state) { return rocrand_normal_double(state); }, + mean_func, + std_dev_func); + } + } + else if constexpr(out_size == 2) + { + auto mean_func = [](out_type x) { return x.x + x.y; }; + auto std_dev_func = [](out_type x, T actual_mean) + { return std::powf(x.x - actual_mean, 2) + std::powf(x.y - actual_mean, 2); }; + + if constexpr(std::is_same_v) + { + run_host_prng_test( + [=](rocrand_state* state) { return rocrand_normal2(state); }, + mean_func, + std_dev_func); + } + else + { + run_host_prng_test( + [=](rocrand_state* state) { return rocrand_normal_double2(state); }, + mean_func, + std_dev_func); + } + } + else + { + auto mean_func = [](out_type x) { return x.x + x.y + x.w + x.z; }; + auto std_dev_func = [](out_type x, T actual_mean) + { + return std::powf(x.x - actual_mean, 2) + std::powf(x.y - actual_mean, 2) + + std::powf(x.w - actual_mean, 2) + std::powf(x.z - actual_mean, 2); + }; + + if constexpr(std::is_same_v) + { + run_host_prng_test( + [=](rocrand_state* state) { return rocrand_normal4(state); }, + mean_func, + std_dev_func); + } + else + { + run_host_prng_test( + [=](rocrand_state* state) { return rocrand_normal_double4(state); }, + mean_func, + std_dev_func); + } + } +} + +TEST(NormalDistributionRocRandNumericTest, rocrand_host_numeric_uint_in_float_out_test) +{ + using OutputType = float; + using InputType = unsigned int; + constexpr size_t OutputSize = 1; + + auto mean_func = [](OutputType x) { return x; }; + auto std_dev_func + = [](OutputType x, double actual_mean) { return std::powf(x - actual_mean, 2); }; + + run_host_numeric_test( + [=](std::uniform_int_distribution& dis, std::mt19937& gen) + { return rocrand_device::detail::normal_distribution(dis(gen)); }, + mean_func, + std_dev_func); +} + +TEST(NormalDistributionRocRandNumericTest, rocrand_host_numeric_ullint_in_float_out_test) +{ + using OutputType = float; + using InputType = unsigned long long int; + constexpr size_t OutputSize = 1; + + auto mean_func = [](OutputType x) { return x; }; + auto std_dev_func + = [](OutputType x, double actual_mean) { return std::powf(x - actual_mean, 2); }; + + run_host_numeric_test( + [=](std::uniform_int_distribution& dis, std::mt19937& gen) + { return rocrand_device::detail::normal_distribution(dis(gen)); }, + mean_func, + std_dev_func); +} + +TEST(NormalDistributionRocRandNumericTest, rocrand_host_numeric_2uint_in_float2_out_test) +{ + using OutputType = float2; + using InputType = unsigned int; + constexpr size_t OutputSize = 2; + + auto mean_func = [](OutputType x) { return x.x + x.y; }; + auto std_dev_func = [](OutputType x, double actual_mean) + { return std::powf(x.x - actual_mean, 2) + std::powf(x.y - actual_mean, 2); }; + + run_host_numeric_test( + [=](std::uniform_int_distribution& dis, std::mt19937& gen) + { return rocrand_device::detail::normal_distribution2(dis(gen), dis(gen)); }, + mean_func, + std_dev_func); +} + +TEST(NormalDistributionRocRandNumericTest, rocrand_host_numeric_uint2_in_float2_out_test) +{ + using OutputType = float2; + using InputType = unsigned int; + constexpr size_t OutputSize = 2; + + auto mean_func = [](OutputType x) { return x.x + x.y; }; + auto std_dev_func = [](OutputType x, double actual_mean) + { return std::powf(x.x - actual_mean, 2) + std::powf(x.y - actual_mean, 2); }; + + run_host_numeric_test( + [=](std::uniform_int_distribution& dis, std::mt19937& gen) { + return rocrand_device::detail::normal_distribution2(uint2{dis(gen), dis(gen)}); + }, + mean_func, + std_dev_func); +} + +TEST(NormalDistributionRocRandNumericTest, rocrand_host_numeric_ull_in_float2_out_test) +{ + using OutputType = float2; + using InputType = unsigned long long; + constexpr size_t OutputSize = 2; + + auto mean_func = [](OutputType x) { return x.x + x.y; }; + auto std_dev_func = [](OutputType x, double actual_mean) + { return std::powf(x.x - actual_mean, 2) + std::powf(x.y - actual_mean, 2); }; + + run_host_numeric_test( + [=](std::uniform_int_distribution& dis, std::mt19937& gen) + { return rocrand_device::detail::normal_distribution2(dis(gen)); }, + mean_func, + std_dev_func); +} + +TEST(NormalDistributionRocRandNumericTest, rocrand_host_numeric_uint4_in_float4_out_test) +{ + using OutputType = float4; + using InputType = unsigned int; + constexpr size_t OutputSize = 4; + + auto mean_func = [](OutputType x) { return x.w + x.x + x.y + x.z; }; + auto std_dev_func = [](OutputType x, double actual_mean) + { + return std::powf(x.w - actual_mean, 2) + std::powf(x.x - actual_mean, 2) + + std::powf(x.y - actual_mean, 2) + std::powf(x.z - actual_mean, 2); + }; + + run_host_numeric_test( + [=](std::uniform_int_distribution& dis, std::mt19937& gen) + { + return rocrand_device::detail::normal_distribution4( + uint4{dis(gen), dis(gen), dis(gen), dis(gen)}); + }, + mean_func, + std_dev_func); +} + +TEST(NormalDistributionRocRandNumericTest, rocrand_host_numeric_longlong2_in_float4_out_test) +{ + using OutputType = float4; + using InputType = long long; + constexpr size_t OutputSize = 4; + + auto mean_func = [](OutputType x) { return x.w + x.x + x.y + x.z; }; + auto std_dev_func = [](OutputType x, double actual_mean) + { + return std::powf(x.w - actual_mean, 2) + std::powf(x.x - actual_mean, 2) + + std::powf(x.y - actual_mean, 2) + std::powf(x.z - actual_mean, 2); + }; + + run_host_numeric_test( + [=](std::uniform_int_distribution& dis, std::mt19937& gen) { + return rocrand_device::detail::normal_distribution4(longlong2{dis(gen), dis(gen)}); + }, + mean_func, + std_dev_func); +} + +TEST(NormalDistributionRocRandNumericTest, rocrand_host_numeric_2ull_in_float4_out_test) +{ + using OutputType = float4; + using InputType = unsigned long long; + constexpr size_t OutputSize = 4; + + auto mean_func = [](OutputType x) { return x.w + x.x + x.y + x.z; }; + auto std_dev_func = [](OutputType x, double actual_mean) + { + return std::powf(x.w - actual_mean, 2) + std::powf(x.x - actual_mean, 2) + + std::powf(x.y - actual_mean, 2) + std::powf(x.z - actual_mean, 2); + }; + + run_host_numeric_test( + [=](std::uniform_int_distribution& dis, std::mt19937& gen) + { return rocrand_device::detail::normal_distribution4(dis(gen), dis(gen)); }, + mean_func, + std_dev_func); +} + +TEST(NormalDistributionRocRandNumericTest, rocrand_host_numeric_uint_in_half2_out_test) +{ + using OutputType = __half2; + using InputType = unsigned int; + constexpr size_t OutputSize = 2; + + auto mean_func = [](OutputType x) { return static_cast(x.x) + static_cast(x.y); }; + auto std_dev_func = [](OutputType x, double actual_mean) + { + float f = static_cast(x.x) - actual_mean; + float s = static_cast(x.y) - actual_mean; + return (f * f) + (s * s); + }; + + run_host_numeric_test( + [=](std::uniform_int_distribution& dis, std::mt19937& gen) + { return rocrand_device::detail::normal_distribution_half2(dis(gen)); }, + mean_func, + std_dev_func); +} + +TEST(NormalDistributionRocRandNumericTest, rocrand_host_numeric_ull_in_half2_out_test) +{ + using OutputType = __half2; + using InputType = unsigned long long; + constexpr size_t OutputSize = 2; + + auto mean_func = [](OutputType x) { return static_cast(x.x) + static_cast(x.y); }; + auto std_dev_func = [](OutputType x, double actual_mean) + { + float f = static_cast(x.x) - actual_mean; + float s = static_cast(x.y) - actual_mean; + return (f * f) + (s * s); + }; + + run_host_numeric_test( + [=](std::uniform_int_distribution& dis, std::mt19937& gen) + { return rocrand_device::detail::normal_distribution_half2(dis(gen)); }, + mean_func, + std_dev_func); +} + +/* ################################################# + + TEST DEVICE SIDE + + ###############################################*/ + +struct GlobalSizes +{ + static constexpr size_t items_per_thread = 50000; + static constexpr size_t block_size = 16; // Number of threads + static constexpr size_t items_per_block = items_per_thread * block_size; + static constexpr size_t grid_size = 16; // Number of blocks + static constexpr size_t size = grid_size * items_per_block; +}; + +template +inline void GetDeviceRocrandState(RocrandPRNGType* device_prngs) +{ + // Initialize for device code rocrand state. Each thread will get 1 "state" + // Assumed that device_prngs is already initialized + + std::vector host_states(GlobalSizes::block_size * GlobalSizes::grid_size); + + for(size_t bi = 0; bi < GlobalSizes::grid_size; bi++) + { + for(size_t ti = 0; ti < GlobalSizes::block_size; ti++) + { + const size_t offset = bi * GlobalSizes::block_size; + const size_t prng_offset + = (GlobalSizes::items_per_block * bi) + (GlobalSizes::items_per_thread * ti); + + if constexpr(std::is_same_v) + { + const unsigned int* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors32(&directions, + ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + + rocrand_init(directions, prng_offset, &host_states[offset + ti]); + } + // scrambled sobol32 case + else if constexpr(std::is_same_v) + { + const unsigned int* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors32(&directions, + ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + rocrand_init(directions, 123456, prng_offset, &host_states[offset + ti]); + } + // sobol64 case + else if constexpr(std::is_same_v) + { + const unsigned long long* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors64(&directions, + ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + rocrand_init(directions, prng_offset, &host_states[offset + ti]); + } + // scrambled sobol64 case + else if constexpr(std::is_same_v) + { + const unsigned long long* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors64(&directions, + ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + rocrand_init(directions, 123456, prng_offset, &host_states[offset + ti]); + } + // lfsr113 case + else if constexpr(std::is_same_v) + { + rocrand_init({0xabcd, 0xdabc, 0xcdab, 0xbcda}, + 0, + prng_offset, + &host_states[offset + ti]); + } + else + { + rocrand_init(123456, 654321, prng_offset, &host_states[offset + ti]); + } + } + } + + HIP_CHECK(hipMemcpy(device_prngs, + host_states.data(), + sizeof(RocrandPRNGType) * GlobalSizes::grid_size * GlobalSizes::block_size, + hipMemcpyHostToDevice)); +} + +template +__global__ +void normal_distribution_kernel(OutType* device_output, + RocRandPrngType* device_prngs, + const GenFunc& gf) +{ + const size_t offset = (GlobalSizes::items_per_block * blockIdx.x) + + (GlobalSizes::items_per_thread * threadIdx.x); + const size_t prng_offset = (GlobalSizes::block_size * blockIdx.x) + threadIdx.x; + + auto prng = device_prngs + prng_offset; + for(size_t i = 0; i < GlobalSizes::items_per_thread; i++) + { + device_output[offset + i] = gf(prng); + } + + device_prngs[prng_offset] = *prng; +} + +template +void run_device_prng_test(const GenFunc& gf, + const ReadMeanFunc& rmf, + const ReadStdFunc& rsf, + const size_t out_size) +{ + RocRandPrngType* prngs; + HIP_CHECK( + hipMalloc(&prngs, + sizeof(RocRandPrngType) * GlobalSizes::block_size * GlobalSizes::grid_size)); + + GetDeviceRocrandState(prngs); + + std::vector host_output(GlobalSizes::size); + + OutputType* device_output; + HIP_CHECK(hipMalloc(&device_output, sizeof(OutputType) * GlobalSizes::size)); + + hipLaunchKernelGGL(HIP_KERNEL_NAME(normal_distribution_kernel), + dim3(GlobalSizes::grid_size), + dim3(GlobalSizes::block_size), + 0, + 0, + device_output, + prngs, + gf); + HIP_CHECK(hipMemcpy(host_output.data(), + device_output, + sizeof(OutputType) * GlobalSizes::size, + hipMemcpyDeviceToHost)); + + const T expected_mean = 0; + const T expected_std_dev = 1; + + T actual_mean = get_actual_mean(GlobalSizes::size, + out_size, + host_output.begin(), + host_output.end(), + rmf); + T actual_std_dev = get_actual_std_dev(GlobalSizes::size, + out_size, + host_output.begin(), + host_output.end(), + actual_mean, + rsf); + T mean_eps = GET_EPS(expected_mean); + T std_dev_eps = GET_EPS(expected_std_dev); + + ASSERT_NEAR(expected_mean, actual_mean, mean_eps); + ASSERT_NEAR(expected_std_dev, actual_std_dev, std_dev_eps); + + HIP_CHECK(hipFree(prngs)); + HIP_CHECK(hipFree(device_output)); +} + +TYPED_TEST(NormalDistributionRocRandStateTest, rocrand_device_state_tests) +{ + using out_type = typename TestFixture::out_type; + using rocrand_state = typename TestFixture::prng_type; + constexpr size_t out_size = TestFixture::out_size; + using T + = std::conditional_t<(std::is_same_v || std::is_same_v + || std::is_same_v), + float, + double>; + if constexpr(out_size == 1) + { + auto read_mean = [](out_type x) { return x; }; + auto read_std = [](out_type x, T actual_mean) { return std::powf(x - actual_mean, 2); }; + if constexpr(std::is_same_v) + { + run_device_prng_test([=](rocrand_state* state) + { return rocrand_normal(state); }, + read_mean, + read_std, + out_size); + } + else + { + run_device_prng_test( + [=](rocrand_state* state) { return rocrand_normal_double(state); }, + read_mean, + read_std, + out_size); + } + } + + if constexpr(out_size == 2) + { + auto read_mean = [](out_type x) { return x.x + x.y; }; + auto read_std = [](out_type x, T actual_mean) + { return std::powf(x.x - actual_mean, 2) + std::powf(x.y - actual_mean, 2); }; + + if constexpr(std::is_same_v) + { + run_device_prng_test([=](rocrand_state* state) + { return rocrand_normal2(state); }, + read_mean, + read_std, + out_size); + } + else + { + run_device_prng_test( + [=](rocrand_state* state) { return rocrand_normal_double2(state); }, + read_mean, + read_std, + out_size); + } + } + else if constexpr(out_size == 4) + { + auto read_mean = [](out_type x) { return x.w + x.x + x.y + x.z; }; + auto read_std = [](out_type x, T actual_mean) + { + return std::powf(x.w - actual_mean, 2) + std::powf(x.x - actual_mean, 2) + + std::powf(x.y - actual_mean, 2) + std::powf(x.z - actual_mean, 2); + }; + + if constexpr(std::is_same_v) + { + run_device_prng_test([=](rocrand_state* state) + { return rocrand_normal4(state); }, + read_mean, + read_std, + out_size); + } + else + { + run_device_prng_test( + [=](rocrand_state* state) { return rocrand_normal_double4(state); }, + read_mean, + read_std, + out_size); + } + } +} diff --git a/projects/rocrand/test/internal/test_rocrand_philox_prng.cpp b/projects/rocrand/test/internal/test_rocrand_philox_prng.cpp index 7470cabcfdb..c1d1aee737c 100644 --- a/projects/rocrand/test/internal/test_rocrand_philox_prng.cpp +++ b/projects/rocrand/test/internal/test_rocrand_philox_prng.cpp @@ -66,7 +66,8 @@ class philox4x32_10_engine_type_test : public philox4x32_10_generator::engine_ty public: __host__ philox4x32_10_engine_type_test() : philox4x32_10_generator::engine_type(0, 0, 0) {} - __host__ state_type& internal_state_ref() + __host__ + state_type& internal_state_ref() { return m_state; } diff --git a/projects/rocrand/test/internal/test_rocrand_poisson.cpp b/projects/rocrand/test/internal/test_rocrand_poisson.cpp new file mode 100644 index 00000000000..f93051a0659 --- /dev/null +++ b/projects/rocrand/test/internal/test_rocrand_poisson.cpp @@ -0,0 +1,565 @@ +// Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include +#include + +#include +#include +#include +#include + +#undef ROCRAND_DETAIL_BM_NOT_IN_STATE +#include + +#define HIP_CHECK(cmd) \ + do \ + { \ + auto error = (cmd); \ + if(error != hipSuccess) \ + { \ + std::cerr << "Encountered HIP error (" << hipGetErrorString(error) << ") at line " \ + << __LINE__ << " in file " << __FILE__ << "\n"; \ + exit(-1); \ + } \ + } \ + while(0) + +#define ROCRAND_CHECK(cmd) \ + do \ + { \ + auto status = cmd; \ + if(status != 0) \ + { \ + std::cerr << "Encountered ROCRAND error: " << status << "at line" << __LINE__ \ + << " in file " << __FILE__ << "\n"; \ + exit(status); \ + } \ + } \ + while(0) + +// If x is small then get withing 0.001 otherwise 5% +#define GET_EPS(x) x < 0.01 ? 0.01 : x * 0.05 + +// If x is small then get withing 0.001 otherwise 20% +#define GET_EPS_DEVICE(x) x < 0.01 ? 0.01 : x * 0.2 + +#define IS_SOBOL(x) \ + (std::is_same_v || std::is_same_v \ + || std::is_same_v \ + || std::is_same_v) + +/* ################################################# + + TEST HOST SIDE + + ###############################################*/ + +template +inline void GetHostRocrandState(RocrandPRNGType* host_state) +{ + + if constexpr(std::is_same_v) + { + const unsigned int* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + rocrand_init(directions, 123456, host_state); + } + // scrambled sobol32 case + else if constexpr(std::is_same_v) + { + const unsigned int* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + rocrand_init(directions, 123456, 654321, host_state); + } + // sobol64 case + else if constexpr(std::is_same_v) + { + const unsigned long long* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + rocrand_init(directions, 123456, host_state); + } + // scrambled sobol64 case + else if constexpr(std::is_same_v) + { + const unsigned long long* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + rocrand_init(directions, 123456, 654321, host_state); + } + // lfsr113 case + else if constexpr(std::is_same_v) + { + rocrand_init({0xabcd, 0xdabc, 0xcdab, 0xbcda}, 0, 0, host_state); + } + else + { + rocrand_init(123456, 654321, 0, host_state); + } +} + +using PoissonParams = ::testing::Types; + +template +class PoissonTest : public ::testing::Test +{ +public: + using rocrand_prng_type = T; + std::vector small_poisson_lambdas = {1, 2, 4, 8, 16, 32, 64}; + std::vector large_poisson_lambdas = {128, 256, 512, 1024, 2048}; + std::vector massive_poisson_lambdas = {4096, 8192, 16384, 32768}; +}; + +TYPED_TEST_SUITE(PoissonTest, PoissonParams); + +template +void run_device_poisson_test(const PoissonFunc& pf, std::vector& all_lambdas) +{ + constexpr size_t test_size = 100000; + + PrngState state; + GetHostRocrandState(&state); + + std::vector output(test_size); + + for(const double& lambda : all_lambdas) + { + double expected_mean = lambda; + double expected_std_dev = std::sqrt(lambda); + + for(size_t i = 0; i < test_size; i++) + { + output[i] = pf(&state, lambda); + } + double actual_mean = std::accumulate(output.begin(), + output.end(), + (double)0, + [=](double acc, OutputType x) + { return acc + static_cast(x); }) + / static_cast(test_size); + double actual_std_dev + = std::accumulate(output.begin(), + output.end(), + (double)0, + [=](double acc, OutputType x) + { return acc + std::pow(static_cast(x) - actual_mean, 2); }); + actual_std_dev = std::sqrt(actual_std_dev / static_cast(test_size - 1)); + + double mean_eps = expected_mean * 0.05; + double std_dev_eps = expected_std_dev * 0.05; + + ASSERT_NEAR(expected_mean, actual_mean, mean_eps); + ASSERT_NEAR(expected_std_dev, actual_std_dev, std_dev_eps); + } +} + +TYPED_TEST(PoissonTest, test_host_small_lambda) +{ + using PrngState = typename TestFixture::rocrand_prng_type; + + // Since Sobol uses the inv funciton + if(!IS_SOBOL(PrngState)) + { + std::vector small_lambdas = {1, 2, 4, 8, 16, 32, 64}; + + run_device_poisson_test( + [=](PrngState* state, double lambda) + { return rocrand_device::detail::poisson_distribution_small(state, lambda); }, + small_lambdas); + } +} + +TYPED_TEST(PoissonTest, test_host_large_lambda) +{ + using PrngState = typename TestFixture::rocrand_prng_type; + + // Since Sobol uses the inv funciton + if(!IS_SOBOL(PrngState)) + { + std::vector large_lambdas = {128, 256, 512, 1024, 2048}; + + run_device_poisson_test( + [=](PrngState* state, double lambda) + { return rocrand_device::detail::poisson_distribution_large(state, lambda); }, + large_lambdas); + } +} + +TYPED_TEST(PoissonTest, test_host_huge_lambda) +{ + using PrngState = typename TestFixture::rocrand_prng_type; + + // Since Sobol uses the inv funciton + if(!IS_SOBOL(PrngState)) + { + std::vector huge_lambdas = {4096, 8192}; + + run_device_poisson_test( + [=](PrngState* state, double lambda) + { return rocrand_device::detail::poisson_distribution_huge(state, lambda); }, + huge_lambdas); + } +} + +TYPED_TEST(PoissonTest, test_host_all_lambda) +{ + using PrngState = typename TestFixture::rocrand_prng_type; + + // Since Sobol uses the inv funciton + if(!IS_SOBOL(PrngState)) + { + std::vector all_lambdas = {64, 2048, 4096}; + + run_device_poisson_test( + [=](PrngState* state, double lambda) + { return rocrand_device::detail::poisson_distribution(state, lambda); }, + all_lambdas); + } +} + +TYPED_TEST(PoissonTest, test_host_inv) +{ + using PrngState = typename TestFixture::rocrand_prng_type; + + std::vector all_lambdas = {1, 2, 4, 1024, 2048}; + + run_device_poisson_test( + [=](PrngState* state, double lambda) + { return rocrand_device::detail::poisson_distribution_inv(state, lambda); }, + all_lambdas); +} + +TYPED_TEST(PoissonTest, test_host_rocrand_poisson) +{ + using PrngState = typename TestFixture::rocrand_prng_type; + + std::vector all_lambdas = { + 32, + 64, + 1024, + 2048, + 4096, + }; + + run_device_poisson_test([=](PrngState* state, double lambda) + { return rocrand_poisson(state, lambda); }, + all_lambdas); +} + +/* ################################################# + + TEST DEVICE SIDE + + ###############################################*/ + +struct GlobalSizes +{ + static constexpr size_t items_per_thread = 10000; + static constexpr size_t block_size = 8; + static constexpr size_t items_per_block = items_per_thread * block_size; + static constexpr size_t grid_size = 8; + static constexpr size_t size = grid_size * items_per_block; +}; + +//get the rocrand state (device_state should be allocated) +template +inline void GetDeviceRocrandState(RocrandPRNGType* device_prngs) +{ + // Initialize for device code rocrand state. Each thread will get 1 "state" + // Assumed that device_prngs is already initialized + + std::vector host_states(GlobalSizes::block_size * GlobalSizes::grid_size); + + for(size_t bi = 0; bi < GlobalSizes::grid_size; bi++) + { + for(size_t ti = 0; ti < GlobalSizes::block_size; ti++) + { + const size_t offset = bi * GlobalSizes::block_size; + const size_t prng_offset + = (GlobalSizes::items_per_block * bi) + (GlobalSizes::items_per_thread * ti); + + if constexpr(std::is_same_v) + { + const unsigned int* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors32(&directions, + ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + + rocrand_init(directions, prng_offset, &host_states[offset + ti]); + } + // scrambled sobol32 case + else if constexpr(std::is_same_v) + { + const unsigned int* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors32(&directions, + ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + rocrand_init(directions, 123456, prng_offset, &host_states[offset + ti]); + } + // sobol64 case + else if constexpr(std::is_same_v) + { + const unsigned long long* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors64(&directions, + ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + rocrand_init(directions, prng_offset, &host_states[offset + ti]); + } + // scrambled sobol64 case + else if constexpr(std::is_same_v) + { + const unsigned long long* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors64(&directions, + ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + rocrand_init(directions, 123456, prng_offset, &host_states[offset + ti]); + } + // lfsr113 case + else if constexpr(std::is_same_v) + { + rocrand_init({0xabcd, 0xdabc, 0xcdab, 0xbcda}, + 0, + prng_offset, + &host_states[offset + ti]); + } + else + { + rocrand_init(123456, 654321, prng_offset, &host_states[offset + ti]); + } + } + } + + HIP_CHECK(hipMemcpy(device_prngs, + host_states.data(), + sizeof(RocrandPRNGType) * GlobalSizes::grid_size * GlobalSizes::block_size, + hipMemcpyHostToDevice)); +} + +// Declaring typed test parameters + +template +__global__ +void poisson_kernel(RocRandPrngType* states, + ReturnType* device_output, + const double lambda, + const PoissonFunc& f) +{ + const size_t offset = (GlobalSizes::items_per_block * blockIdx.x) + + (GlobalSizes::items_per_thread * threadIdx.x); + const size_t state_offset = (GlobalSizes::block_size * blockIdx.x) + threadIdx.x; + + auto state = states + state_offset; + for(size_t i = 0; i < GlobalSizes::items_per_thread; i++) + device_output[offset + i] = f(state, lambda); + + states[state_offset] = *state; +} + +// read_func is how to interpret the output (needed for special case like uint4) +// size_multiplier is needed for special cases like uint4 where each element is actually 4 +template +void run_poisson_test(std::vector& all_lambdas, + const PoissonFunc& f, + const ReadFunc& read_func, + const size_t size_multiplier = 1) +{ + ReturnType* host_output = new ReturnType[GlobalSizes::size]; + ReturnType* device_output; + HIP_CHECK(hipMalloc(&device_output, sizeof(ReturnType) * GlobalSizes::size)); + + RocRandPrngType* device_state; + HIP_CHECK( + hipMalloc(&device_state, + sizeof(RocRandPrngType) * GlobalSizes::block_size * GlobalSizes::grid_size)); + GetDeviceRocrandState(device_state); + for(const double lambda : all_lambdas) + { + double expected_mean = lambda; + double expected_std_dev = std::sqrt(lambda); + double mean_tol = GET_EPS_DEVICE(expected_mean); + double std_tol = GET_EPS_DEVICE(expected_std_dev); + + hipLaunchKernelGGL(HIP_KERNEL_NAME(poisson_kernel), + dim3(GlobalSizes::grid_size), + dim3(GlobalSizes::block_size), + 0, + 0, + device_state, + device_output, + lambda, + f); + HIP_CHECK(hipMemcpy(host_output, + device_output, + sizeof(ReturnType) * GlobalSizes::size, + hipMemcpyDeviceToHost)); + + for(size_t block_idx = 0; block_idx < GlobalSizes::grid_size; block_idx++) + { + for(size_t thread_idx = 0; thread_idx < GlobalSizes::block_size; thread_idx++) + { + + size_t offset = (block_idx * GlobalSizes::items_per_block) + + (thread_idx * GlobalSizes::items_per_thread); + + double actual_mean + = std::accumulate(host_output + offset, + host_output + offset + GlobalSizes::items_per_thread, + (double)0, + [=](double acc, ReturnType x) { return acc + read_func(x); }) + / static_cast(GlobalSizes::items_per_thread * size_multiplier); + double actual_std_dev + = std::accumulate(host_output + offset, + host_output + offset + GlobalSizes::items_per_thread, + (double)0, + [=](double acc, ReturnType x) { + return acc + + std::pow(static_cast(read_func(x)) + - (actual_mean * size_multiplier), + 2); + }); + actual_std_dev = std::sqrt( + actual_std_dev + / static_cast((GlobalSizes::items_per_thread * size_multiplier) - 1)); + + ASSERT_NEAR(expected_mean, actual_mean, mean_tol); + ASSERT_NEAR(expected_std_dev, actual_std_dev, std_tol); + } + } + } + delete[] host_output; + HIP_CHECK(hipFree(device_output)); + HIP_CHECK(hipFree(device_state)); +} + +TYPED_TEST(PoissonTest, poisson_distribution_small_lambda_test) +{ + using type = typename TestFixture::rocrand_prng_type; + + // Since Sobol uses the inv funciton + if(!IS_SOBOL(type)) + { + run_poisson_test( + TestFixture::small_poisson_lambdas, + [=](type* state, const double lambda) + { return rocrand_device::detail::poisson_distribution_small(state, lambda); }, + [](const unsigned int& x) { return x; }); + } +} + +TYPED_TEST(PoissonTest, poisson_distribution_large_lambda_test) +{ + using type = typename TestFixture::rocrand_prng_type; + + // Since Sobol uses the inv funciton + if(!IS_SOBOL(type)) + { + run_poisson_test( + TestFixture::large_poisson_lambdas, + [=](type* state, const double lambda) + { return rocrand_device::detail::poisson_distribution_large(state, lambda); }, + [](const unsigned int& x) { return x; }); + } +} + +TYPED_TEST(PoissonTest, poisson_distribution_huge_lambda_test) +{ + using type = typename TestFixture::rocrand_prng_type; + run_poisson_test( + TestFixture::massive_poisson_lambdas, + [=](type* state, const double lambda) + { return rocrand_device::detail::poisson_distribution_huge(state, lambda); }, + [](const unsigned int& x) { return x; }); +} + +TYPED_TEST(PoissonTest, poisson_distribution_test) +{ + using type = typename TestFixture::rocrand_prng_type; + + if(!IS_SOBOL(type)) + { + run_poisson_test( + TestFixture::small_poisson_lambdas, + [=](type* state, const double lambda) + { return rocrand_device::detail::poisson_distribution(state, lambda); }, + [](const unsigned int& x) { return x; }); + + run_poisson_test( + TestFixture::large_poisson_lambdas, + [=](type* state, const double lambda) + { return rocrand_device::detail::poisson_distribution(state, lambda); }, + [](const unsigned int& x) { return x; }); + + run_poisson_test( + TestFixture::massive_poisson_lambdas, + [=](type* state, const double lambda) + { return rocrand_device::detail::poisson_distribution(state, lambda); }, + [](const unsigned int& x) { return x; }); + } +} + +TYPED_TEST(PoissonTest, poisson_distribution_inv_test) +{ + using type = typename TestFixture::rocrand_prng_type; + + run_poisson_test( + TestFixture::small_poisson_lambdas, + [=](type* state, const double lambda) + { return rocrand_device::detail::poisson_distribution_inv(state, lambda); }, + [](const unsigned int& x) { return x; }); +} + +// External Tests +TYPED_TEST(PoissonTest, external_rocrand_poisson) +{ + using type = typename TestFixture::rocrand_prng_type; + + run_poisson_test( + TestFixture::small_poisson_lambdas, + [=](type* state, const double lambda) { return rocrand_poisson(state, lambda); }, + [](const unsigned int& x) { return x; }); +} + +// Special Tests +TEST(PoissonTest, philox4x32_10_uint4_output) +{ + std::vector small_poisson_lambdas = {1, 2, 4, 8, 16, 32, 64}; + + run_poisson_test( + small_poisson_lambdas, + [=](rocrand_state_philox4x32_10* state, const double lambda) + { return rocrand_poisson4(state, lambda); }, + [](const uint4& x) { return (x.w + x.x + x.y + x.z); }, + 4); +} diff --git a/projects/rocrand/test/internal/test_rocrand_scrambled_sobol32_qrng.cpp b/projects/rocrand/test/internal/test_rocrand_scrambled_sobol32_qrng.cpp index 134e1823583..d246476ddfc 100644 --- a/projects/rocrand/test/internal/test_rocrand_scrambled_sobol32_qrng.cpp +++ b/projects/rocrand/test/internal/test_rocrand_scrambled_sobol32_qrng.cpp @@ -21,6 +21,21 @@ #include "rocrand/rocrand.h" #include "test_rocrand_sobol_qrng.hpp" +#include + +#define ROCRAND_ERROR_CHECK(cmd) \ + do \ + { \ + auto status = cmd; \ + if(status != 0) \ + { \ + std::cerr << "Encountered ROCRAND error: " << status << "at line" << __LINE__ \ + << " in file " << __FILE__ << "\n"; \ + exit(-1); \ + } \ + } \ + while(0) + using rocrand_impl::host::scrambled_sobol32_generator; using test_scrambled_sobol32_qrng_types = ::testing::Types< @@ -29,3 +44,482 @@ using test_scrambled_sobol32_qrng_types = ::testing::Types< INSTANTIATE_TYPED_TEST_SUITE_P(sobol_qrng_tests, sobol_qrng_tests, test_scrambled_sobol32_qrng_types); + +using uint = unsigned int; + +TEST(AdditionalTest, host_rocrand_init_consistent) +{ + //making sure that the the output is consistent when init parameter are the same + const uint* directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + std::vector scramble_constants = {128, 64, 32, 16, 8, 4, 2, 1}; + std::vector offsets = {1, 2, 4, 8, 32, 64, 128}; + + for(const uint& scramble_constant : scramble_constants) + { + for(const uint& offset : offsets) + { + rocrand_state_scrambled_sobol32 state1, state2; + + rocrand_init(directions, scramble_constant, offset, &state1); + rocrand_init(directions, scramble_constant, offset, &state2); + + for(size_t i = 0; i < 10000; i++) + ASSERT_EQ(rocrand(&state1), rocrand(&state2)); + } + } +} + +TEST(AdditionalTest, host_rocrand_init_offset) +{ + //test the offset functionality + const uint* directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + std::vector offsets = {1, 2, 4, 8, 32, 64, 128}; + + for(const uint& offset : offsets) + { + rocrand_state_scrambled_sobol32 state1, state2; + + rocrand_init(directions, 1, 0, &state1); + rocrand_init(directions, 1, offset, &state2); + + for(uint i = 0; i < offset; i++) + rocrand(&state1); + ASSERT_EQ(rocrand(&state1), rocrand(&state2)); + } +} + +TEST(AdditionalTest, host_rocrand) +{ + // test to make sure rocrand returns uniformly distributed values + const uint* directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + std::vector scramble_constants = {128, 64, 32, 16, 8, 4, 2, 1}; + std::vector offsets = {1, 2, 4, 8, 32, 64, 128}; + + constexpr size_t test_size = 10000; + + std::vector output(test_size); + + constexpr double a = 0; + constexpr double b = 1; + + const double expected_mean = (a + b) / 2; + const double expected_std_dev = (b - a) / std::sqrt(12); + + const uint mini = std::numeric_limits::min(); + const uint maxi = std::numeric_limits::max(); + + for(const uint& scramble_constant : scramble_constants) + { + for(const uint& offset : offsets) + { + rocrand_state_scrambled_sobol32 state; + + rocrand_init(directions, scramble_constant, offset, &state); + + for(size_t i = 0; i < test_size; i++) + { + //converting to range between 0 and 1 + output[i] = (a + static_cast(rocrand(&state) - mini) * (b - a)) + / (static_cast(maxi - mini)); + } + + double actual_mean = std::accumulate(output.begin(), output.end(), 0.0) + / static_cast(test_size); + double actual_std_dev = std::accumulate(output.begin(), + output.end(), + 0.0, + [=](double acc, double x) + { return acc + std::pow(x - actual_mean, 2); }); + actual_std_dev = std::sqrt(actual_std_dev / static_cast(test_size)); + + // make sure results are within 5% of expected values + ASSERT_NEAR(expected_mean, actual_mean, expected_mean * 0.05); + ASSERT_NEAR(expected_std_dev, actual_std_dev, expected_std_dev * 0.05); + } + } +} + +TEST(AdditionalTest, host_skipahead) +{ + //test the skipahead functionality + const uint* directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + std::vector offsets = {1, 2, 4, 8, 32, 64, 128}; + + for(const uint& offset : offsets) + { + rocrand_state_scrambled_sobol32 state1, state2; + + rocrand_init(directions, 1, 0, &state1); + rocrand_init(directions, 1, offset, &state2); + + skipahead(offset, &state1); + + ASSERT_EQ(rocrand(&state1), rocrand(&state2)); + } +} + +template +__global__ +void rocrand_init_consistency_kernel(uint* device_output1, + uint* device_output2, + const uint* device_directions, + const uint* scramble_constants, + const uint* offsets) +{ + + size_t counter = 0; + for(size_t i = 0; i < scramble_offset_size; i++) + { + for(size_t j = 0; j < scramble_offset_size; j++) + { + uint scramble_constant = scramble_constants[i]; + uint offset = offsets[j]; + + rocrand_state_scrambled_sobol32 state1, state2; + rocrand_init(device_directions, scramble_constant, offset, &state1); + rocrand_init(device_directions, scramble_constant, offset, &state2); + + for(size_t ii = 0; ii < test_size; ii++) + { + device_output1[counter] = rocrand(&state1); + device_output2[counter++] = rocrand(&state2); + } + } + } +} + +TEST(AdditionalTest, device_rocrand_init_consistent) +{ + //making sure that the the output is consistent when init parameter are the same + + constexpr uint test_size = 1000; + constexpr uint scramble_offset_size = 5; + + const uint* directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + const uint scramble_constants[scramble_offset_size] = {128, 64, 32, 16, 8}; + const uint offsets[scramble_offset_size] = {8, 16, 32, 64, 128}; + + uint host_output1[scramble_offset_size * scramble_offset_size * test_size]; + uint host_output2[scramble_offset_size * scramble_offset_size * test_size]; + + uint* device_directions; + uint* device_scramble_constants; + uint* device_offsets; + uint* device_output1; + uint* device_output2; + + // 640000 is the size of ROCRAND_DIRECTION_VECTORS_32_JOEKUO6 + HIP_CHECK(hipMalloc(&device_directions, sizeof(uint) * 640000)); + HIP_CHECK(hipMalloc(&device_scramble_constants, sizeof(uint) * scramble_offset_size)); + HIP_CHECK(hipMalloc(&device_offsets, sizeof(uint) * scramble_offset_size)); + HIP_CHECK(hipMalloc(&device_output1, + sizeof(uint) * scramble_offset_size * scramble_offset_size * test_size)); + HIP_CHECK(hipMalloc(&device_output2, + sizeof(uint) * scramble_offset_size * scramble_offset_size * test_size)); + + HIP_CHECK( + hipMemcpy(device_directions, directions, sizeof(uint) * 640000, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(device_scramble_constants, + scramble_constants, + sizeof(uint) * scramble_offset_size, + hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(device_offsets, + offsets, + sizeof(uint) * scramble_offset_size, + hipMemcpyHostToDevice)); + + rocrand_init_consistency_kernel + <<<1, 1>>>(device_output1, + device_output2, + device_directions, + device_scramble_constants, + device_offsets); + + HIP_CHECK(hipMemcpy(host_output1, + device_output1, + sizeof(uint) * scramble_offset_size * scramble_offset_size * test_size, + hipMemcpyDeviceToHost)); + HIP_CHECK(hipMemcpy(host_output2, + device_output2, + sizeof(uint) * scramble_offset_size * scramble_offset_size * test_size, + hipMemcpyDeviceToHost)); + + for(size_t i = 0; i < scramble_offset_size * scramble_offset_size * test_size; i++) + ASSERT_EQ(host_output1[i], host_output2[i]); + + HIP_CHECK(hipFree(device_directions)); + HIP_CHECK(hipFree(device_scramble_constants)); + HIP_CHECK(hipFree(device_offsets)); + HIP_CHECK(hipFree(device_output1)); + HIP_CHECK(hipFree(device_output2)); +} + +template +__global__ +void rocrand_init_offset_kernel(uint* device_output1, + uint* device_output2, + const uint* device_directions, + const uint* offsets) +{ + + for(size_t i = 0; i < offset_size; i++) + { + uint offset = offsets[i]; + + rocrand_state_scrambled_sobol32 state1, state2; + rocrand_init(device_directions, 1u, 0, &state1); + rocrand_init(device_directions, 1u, offset, &state2); + + for(size_t ii = 0; ii < offset; ii++) + rocrand(&state1); + + device_output1[i] = rocrand(&state1); + device_output2[i] = rocrand(&state2); + } +} + +TEST(AdditionalTest, device_rocrand_init_offset) +{ + //test the offset functionality + + constexpr uint offset_size = 5; + const uint* directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + const uint offsets[offset_size] = {8, 16, 32, 64, 128}; + + uint host_output1[offset_size]; + uint host_output2[offset_size]; + + uint* device_directions; + uint* device_offsets; + uint* device_output1; + uint* device_output2; + + // 640000 is the size of ROCRAND_DIRECTION_VECTORS_32_JOEKUO6 + HIP_CHECK(hipMalloc(&device_directions, sizeof(uint) * 640000)); + HIP_CHECK(hipMalloc(&device_offsets, sizeof(uint) * offset_size)); + HIP_CHECK(hipMalloc(&device_output1, sizeof(uint) * offset_size)); + HIP_CHECK(hipMalloc(&device_output2, sizeof(uint) * offset_size)); + + HIP_CHECK( + hipMemcpy(device_directions, directions, sizeof(uint) * 640000, hipMemcpyHostToDevice)); + HIP_CHECK( + hipMemcpy(device_offsets, offsets, sizeof(uint) * offset_size, hipMemcpyHostToDevice)); + + rocrand_init_offset_kernel + <<<1, 1>>>(device_output1, device_output2, device_directions, device_offsets); + + HIP_CHECK( + hipMemcpy(host_output1, device_output1, sizeof(uint) * offset_size, hipMemcpyDeviceToHost)); + HIP_CHECK( + hipMemcpy(host_output2, device_output2, sizeof(uint) * offset_size, hipMemcpyDeviceToHost)); + + for(size_t i = 0; i < offset_size; i++) + ASSERT_EQ(host_output1[i], host_output2[i]); + + HIP_CHECK(hipFree(device_directions)); + HIP_CHECK(hipFree(device_offsets)); + HIP_CHECK(hipFree(device_output1)); + HIP_CHECK(hipFree(device_output2)); +} + +template +__global__ +void rocrand_kernel(uint* device_output, + const uint* device_directions, + const uint* scramble_constants, + const uint* offsets) +{ + + size_t counter = 0; + for(size_t i = 0; i < scramble_offset_size; i++) + { + for(size_t j = 0; j < scramble_offset_size; j++) + { + uint scramble_constant = scramble_constants[i]; + uint offset = offsets[j]; + + rocrand_state_scrambled_sobol32 state; + rocrand_init(device_directions, scramble_constant, offset, &state); + + for(size_t ii = 0; ii < test_size; ii++) + { + device_output[counter++] = rocrand(&state); + } + } + } +} + +TEST(AdditionalTest, device_rocrand) +{ + // test to make sure rocrand returns uniformly distributed values + const uint* host_directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors32(&host_directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + + constexpr size_t test_size = 10000; + constexpr size_t scramble_offset_size = 8; + constexpr size_t total_size = scramble_offset_size * scramble_offset_size * test_size; + + uint host_scramble_constants[scramble_offset_size] = {128, 64, 32, 16, 8, 4, 2, 1}; + uint host_offsets[scramble_offset_size] = {1, 2, 4, 8, 16, 32, 64, 128}; + + uint* host_output = new uint[total_size]; + + uint* device_directions; + uint* device_scramble_constants; + uint* device_offsets; + uint* device_output; + + // 640000 is the size of ROCRAND_DIRECTION_VECTORS_32_JOEKUO6 + HIP_CHECK(hipMalloc(&device_directions, sizeof(uint) * 640000)); + HIP_CHECK(hipMalloc(&device_scramble_constants, sizeof(uint) * scramble_offset_size)); + HIP_CHECK(hipMalloc(&device_offsets, sizeof(uint) * scramble_offset_size)); + HIP_CHECK(hipMalloc(&device_output, sizeof(uint) * total_size)); + + HIP_CHECK(hipMemcpy(device_directions, + host_directions, + sizeof(uint) * 640000, + hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(device_scramble_constants, + host_scramble_constants, + sizeof(uint) * scramble_offset_size, + hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(device_offsets, + host_offsets, + sizeof(uint) * scramble_offset_size, + hipMemcpyHostToDevice)); + + rocrand_kernel + <<<1, 1>>>(device_output, device_directions, device_scramble_constants, device_offsets); + + HIP_CHECK( + hipMemcpy(host_output, device_output, sizeof(uint) * total_size, hipMemcpyDeviceToHost)); + + constexpr double a = 0; + constexpr double b = 1; + + const double expected_mean = (a + b) / 2; + const double expected_std_dev = (b - a) / std::sqrt(12); + + const uint mini = std::numeric_limits::min(); + const uint maxi = std::numeric_limits::max(); + + double actual_mean = std::accumulate(host_output, + host_output + total_size, + (double)0.0, + [=](double acc, uint x) + { + double converted + = (a + static_cast(x - mini) * (b - a)) + / (static_cast(maxi - mini)); + return acc + converted; + }) + / static_cast(total_size); + + double actual_std_dev = std::accumulate(host_output, + host_output + total_size, + (double)0.0, + [=](double acc, uint x) + { + double converted + = (a + static_cast(x - mini) * (b - a)) + / (static_cast(maxi - mini)); + return acc + std::pow(converted - actual_mean, 2); + }); + actual_std_dev = std::sqrt(actual_std_dev / static_cast(total_size)); + + // make sure results are within 5% of expected values + ASSERT_NEAR(expected_mean, actual_mean, expected_mean * 0.05); + ASSERT_NEAR(expected_std_dev, actual_std_dev, expected_std_dev * 0.05); + + delete[] host_output; + + HIP_CHECK(hipFree(device_directions)); + HIP_CHECK(hipFree(device_scramble_constants)); + HIP_CHECK(hipFree(device_offsets)); + HIP_CHECK(hipFree(device_output)); +} + +template +__global__ +void skipahead_kernel(uint* device_output1, + uint* device_output2, + const uint* device_directions, + const uint* offsets) +{ + + for(size_t i = 0; i < offset_size; i++) + { + uint offset = offsets[i]; + + rocrand_state_scrambled_sobol32 state1, state2; + rocrand_init(device_directions, 1u, 0, &state1); + rocrand_init(device_directions, 1u, offset, &state2); + + skipahead(offset, &state1); + + device_output1[i] = rocrand(&state1); + device_output2[i] = rocrand(&state2); + } +} + +TEST(AdditionalTest, device_skipahead) +{ + //test the offset functionality + + constexpr uint offset_size = 5; + const uint* directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + const uint offsets[offset_size] = {8, 16, 32, 64, 128}; + + uint* host_output1 = new uint[offset_size]; + uint* host_output2 = new uint[offset_size]; + + uint* device_directions; + uint* device_offsets; + uint* device_output1; + uint* device_output2; + + // 640000 is the size of ROCRAND_DIRECTION_VECTORS_32_JOEKUO6 + HIP_CHECK(hipMalloc(&device_directions, sizeof(uint) * 640000)); + HIP_CHECK(hipMalloc(&device_offsets, sizeof(uint) * offset_size)); + HIP_CHECK(hipMalloc(&device_output1, sizeof(uint) * offset_size)); + HIP_CHECK(hipMalloc(&device_output2, sizeof(uint) * offset_size)); + + HIP_CHECK( + hipMemcpy(device_directions, directions, sizeof(uint) * 640000, hipMemcpyHostToDevice)); + HIP_CHECK( + hipMemcpy(device_offsets, offsets, sizeof(uint) * offset_size, hipMemcpyHostToDevice)); + + skipahead_kernel + <<<1, 1>>>(device_output1, device_output2, device_directions, device_offsets); + + HIP_CHECK( + hipMemcpy(host_output1, device_output1, sizeof(uint) * offset_size, hipMemcpyDeviceToHost)); + HIP_CHECK( + hipMemcpy(host_output2, device_output2, sizeof(uint) * offset_size, hipMemcpyDeviceToHost)); + + for(size_t i = 0; i < offset_size; i++) + ASSERT_EQ(host_output1[i], host_output2[i]); + + delete[] host_output1; + delete[] host_output2; + + HIP_CHECK(hipFree(device_directions)); + HIP_CHECK(hipFree(device_offsets)); + HIP_CHECK(hipFree(device_output1)); + HIP_CHECK(hipFree(device_output2)); +} diff --git a/projects/rocrand/test/internal/test_rocrand_scrambled_sobol64_qrng.cpp b/projects/rocrand/test/internal/test_rocrand_scrambled_sobol64_qrng.cpp index ed87966f636..193e0479008 100644 --- a/projects/rocrand/test/internal/test_rocrand_scrambled_sobol64_qrng.cpp +++ b/projects/rocrand/test/internal/test_rocrand_scrambled_sobol64_qrng.cpp @@ -21,6 +21,21 @@ #include "rocrand/rocrand.h" #include "test_rocrand_sobol_qrng.hpp" +#include + +#define ROCRAND_ERROR_CHECK(cmd) \ + do \ + { \ + auto status = cmd; \ + if(status != 0) \ + { \ + std::cerr << "Encountered ROCRAND error: " << status << "at line" << __LINE__ \ + << " in file " << __FILE__ << "\n"; \ + exit(-1); \ + } \ + } \ + while(0) + using rocrand_impl::host::scrambled_sobol64_generator; using test_scrambled_sobol64_qrng_types = ::testing::Types< @@ -29,3 +44,480 @@ using test_scrambled_sobol64_qrng_types = ::testing::Types< INSTANTIATE_TYPED_TEST_SUITE_P(sobol_qrng_tests, sobol_qrng_tests, test_scrambled_sobol64_qrng_types); + +using ull = unsigned long long; + +TEST(AdditionalTest, host_rocrand_init_consistent) +{ + //making sure that the the output is consistent when init parameter are the same + const ull* directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + std::vector scramble_constants = {128, 64, 32, 16, 8, 4, 2, 1}; + std::vector offsets = {1, 2, 4, 8, 32, 64, 128}; + + for(const ull& scramble_constant : scramble_constants) + { + for(const ull& offset : offsets) + { + rocrand_state_scrambled_sobol64 state1, state2; + + rocrand_init(directions, scramble_constant, offset, &state1); + rocrand_init(directions, scramble_constant, offset, &state2); + + for(size_t i = 0; i < 10000; i++) + ASSERT_EQ(rocrand(&state1), rocrand(&state2)); + } + } +} + +TEST(AdditionalTest, host_rocrand_init_offset) +{ + //test the offset functionality + const ull* directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + std::vector offsets = {1, 2, 4, 8, 32, 64, 128}; + + for(const ull& offset : offsets) + { + rocrand_state_scrambled_sobol64 state1, state2; + + rocrand_init(directions, 1, 0, &state1); + rocrand_init(directions, 1, offset, &state2); + + for(ull i = 0; i < offset; i++) + rocrand(&state1); + ASSERT_EQ(rocrand(&state1), rocrand(&state2)); + } +} + +TEST(AdditionalTest, host_rocrand) +{ + // test to make sure rocrand returns uniformly distributed values + const ull* directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + std::vector scramble_constants = {128, 64, 32, 16, 8, 4, 2, 1}; + std::vector offsets = {1, 2, 4, 8, 32, 64, 128}; + + constexpr size_t test_size = 10000; + + std::vector output(test_size); + + constexpr double a = 0; + constexpr double b = 1; + + const double expected_mean = (a + b) / 2; + const double expected_std_dev = (b - a) / std::sqrt(12); + + const ull mini = std::numeric_limits::min(); + const ull maxi = std::numeric_limits::max(); + + for(const ull& scramble_constant : scramble_constants) + { + for(const ull& offset : offsets) + { + rocrand_state_scrambled_sobol64 state; + + rocrand_init(directions, scramble_constant, offset, &state); + + for(size_t i = 0; i < test_size; i++) + { + //converting to range between 0 and 1 + output[i] = (a + static_cast(rocrand(&state) - mini) * (b - a)) + / (static_cast(maxi - mini)); + } + + double actual_mean = std::accumulate(output.begin(), output.end(), 0.0) + / static_cast(test_size); + double actual_std_dev = std::accumulate(output.begin(), + output.end(), + 0.0, + [=](double acc, double x) + { return acc + std::pow(x - actual_mean, 2); }); + actual_std_dev = std::sqrt(actual_std_dev / static_cast(test_size)); + + // make sure results are within 5% of expected values + ASSERT_NEAR(expected_mean, actual_mean, expected_mean * 0.05); + ASSERT_NEAR(expected_std_dev, actual_std_dev, expected_std_dev * 0.05); + } + } +} + +TEST(AdditionalTest, host_skipahead) +{ + //test the skipahead functionality + const ull* directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + std::vector offsets = {1, 2, 4, 8, 32, 64, 128}; + + for(const ull& offset : offsets) + { + rocrand_state_scrambled_sobol64 state1, state2; + + rocrand_init(directions, 1, 0, &state1); + rocrand_init(directions, 1, offset, &state2); + + skipahead(offset, &state1); + + ASSERT_EQ(rocrand(&state1), rocrand(&state2)); + } +} + +template +__global__ +void rocrand_init_consistency_kernel(ull* device_output1, + ull* device_output2, + const ull* device_directions, + const ull* scramble_constants, + const ull* offsets) +{ + + size_t counter = 0; + for(size_t i = 0; i < scramble_offset_size; i++) + { + for(size_t j = 0; j < scramble_offset_size; j++) + { + ull scramble_constant = scramble_constants[i]; + ull offset = offsets[j]; + + rocrand_state_scrambled_sobol64 state1, state2; + rocrand_init(device_directions, scramble_constant, offset, &state1); + rocrand_init(device_directions, scramble_constant, offset, &state2); + + for(size_t ii = 0; ii < test_size; ii++) + { + device_output1[counter] = rocrand(&state1); + device_output2[counter++] = rocrand(&state2); + } + } + } +} + +TEST(AdditionalTest, device_rocrand_init_consistent) +{ + //making sure that the the output is consistent when init parameter are the same + + constexpr ull test_size = 1000; + constexpr ull scramble_offset_size = 5; + + const ull* directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + const ull scramble_constants[scramble_offset_size] = {128, 64, 32, 16, 8}; + const ull offsets[scramble_offset_size] = {8, 16, 32, 64, 128}; + + ull host_output1[scramble_offset_size * scramble_offset_size * test_size]; + ull host_output2[scramble_offset_size * scramble_offset_size * test_size]; + + ull* device_directions; + ull* device_scramble_constants; + ull* device_offsets; + ull* device_output1; + ull* device_output2; + + // 1280000 is the size of ROCRAND_DIRECTION_VECTORS_64_JOEKUO6 + HIP_CHECK(hipMalloc(&device_directions, sizeof(ull) * 1280000)); + HIP_CHECK(hipMalloc(&device_scramble_constants, sizeof(ull) * scramble_offset_size)); + HIP_CHECK(hipMalloc(&device_offsets, sizeof(ull) * scramble_offset_size)); + HIP_CHECK(hipMalloc(&device_output1, + sizeof(ull) * scramble_offset_size * scramble_offset_size * test_size)); + HIP_CHECK(hipMalloc(&device_output2, + sizeof(ull) * scramble_offset_size * scramble_offset_size * test_size)); + + HIP_CHECK( + hipMemcpy(device_directions, directions, sizeof(ull) * 1280000, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(device_scramble_constants, + scramble_constants, + sizeof(ull) * scramble_offset_size, + hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(device_offsets, + offsets, + sizeof(ull) * scramble_offset_size, + hipMemcpyHostToDevice)); + + rocrand_init_consistency_kernel + <<<1, 1>>>(device_output1, + device_output2, + device_directions, + device_scramble_constants, + device_offsets); + + HIP_CHECK(hipMemcpy(host_output1, + device_output1, + sizeof(ull) * scramble_offset_size * scramble_offset_size * test_size, + hipMemcpyDeviceToHost)); + HIP_CHECK(hipMemcpy(host_output2, + device_output2, + sizeof(ull) * scramble_offset_size * scramble_offset_size * test_size, + hipMemcpyDeviceToHost)); + + for(size_t i = 0; i < scramble_offset_size * scramble_offset_size * test_size; i++) + ASSERT_EQ(host_output1[i], host_output2[i]); + + HIP_CHECK(hipFree(device_directions)); + HIP_CHECK(hipFree(device_scramble_constants)); + HIP_CHECK(hipFree(device_offsets)); + HIP_CHECK(hipFree(device_output1)); + HIP_CHECK(hipFree(device_output2)); +} + +template +__global__ +void rocrand_init_offset_kernel(ull* device_output1, + ull* device_output2, + const ull* device_directions, + const ull* offsets) +{ + + for(size_t i = 0; i < offset_size; i++) + { + ull offset = offsets[i]; + + rocrand_state_scrambled_sobol64 state1, state2; + rocrand_init(device_directions, 1u, 0, &state1); + rocrand_init(device_directions, 1u, offset, &state2); + + for(size_t ii = 0; ii < offset; ii++) + rocrand(&state1); + + device_output1[i] = rocrand(&state1); + device_output2[i] = rocrand(&state2); + } +} + +TEST(AdditionalTest, device_rocrand_init_offset) +{ + //test the offset functionality + + constexpr ull offset_size = 5; + const ull* directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + const ull offsets[offset_size] = {8, 16, 32, 64, 128}; + + ull host_output1[offset_size]; + ull host_output2[offset_size]; + + ull* device_directions; + ull* device_offsets; + ull* device_output1; + ull* device_output2; + + // 1280000 is the size of ROCRAND_DIRECTION_VECTORS_64_JOEKUO6 + HIP_CHECK(hipMalloc(&device_directions, sizeof(ull) * 1280000)); + HIP_CHECK(hipMalloc(&device_offsets, sizeof(ull) * offset_size)); + HIP_CHECK(hipMalloc(&device_output1, sizeof(ull) * offset_size)); + HIP_CHECK(hipMalloc(&device_output2, sizeof(ull) * offset_size)); + + HIP_CHECK( + hipMemcpy(device_directions, directions, sizeof(ull) * 1280000, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(device_offsets, offsets, sizeof(ull) * offset_size, hipMemcpyHostToDevice)); + + rocrand_init_offset_kernel + <<<1, 1>>>(device_output1, device_output2, device_directions, device_offsets); + + HIP_CHECK( + hipMemcpy(host_output1, device_output1, sizeof(ull) * offset_size, hipMemcpyDeviceToHost)); + HIP_CHECK( + hipMemcpy(host_output2, device_output2, sizeof(ull) * offset_size, hipMemcpyDeviceToHost)); + + for(size_t i = 0; i < offset_size; i++) + ASSERT_EQ(host_output1[i], host_output2[i]); + + HIP_CHECK(hipFree(device_directions)); + HIP_CHECK(hipFree(device_offsets)); + HIP_CHECK(hipFree(device_output1)); + HIP_CHECK(hipFree(device_output2)); +} + +template +__global__ +void rocrand_kernel(ull* device_output, + const ull* device_directions, + const ull* scramble_constants, + const ull* offsets) +{ + + size_t counter = 0; + for(size_t i = 0; i < scramble_offset_size; i++) + { + for(size_t j = 0; j < scramble_offset_size; j++) + { + ull scramble_constant = scramble_constants[i]; + ull offset = offsets[j]; + + rocrand_state_scrambled_sobol64 state; + rocrand_init(device_directions, scramble_constant, offset, &state); + + for(size_t ii = 0; ii < test_size; ii++) + { + device_output[counter++] = rocrand(&state); + } + } + } +} + +TEST(AdditionalTest, device_rocrand) +{ + // test to make sure rocrand returns uniformly distributed values + const ull* host_directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors64(&host_directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + + constexpr size_t test_size = 10000; + constexpr size_t scramble_offset_size = 8; + constexpr size_t total_size = scramble_offset_size * scramble_offset_size * test_size; + + ull host_scramble_constants[scramble_offset_size] = {128, 64, 32, 16, 8, 4, 2, 1}; + ull host_offsets[scramble_offset_size] = {1, 2, 4, 8, 16, 32, 64, 128}; + + ull* host_output = new ull[total_size]; + + ull* device_directions; + ull* device_scramble_constants; + ull* device_offsets; + ull* device_output; + + // 1280000 is the size of ROCRAND_DIRECTION_VECTORS_64_JOEKUO6 + HIP_CHECK(hipMalloc(&device_directions, sizeof(ull) * 1280000)); + HIP_CHECK(hipMalloc(&device_scramble_constants, sizeof(ull) * scramble_offset_size)); + HIP_CHECK(hipMalloc(&device_offsets, sizeof(ull) * scramble_offset_size)); + HIP_CHECK(hipMalloc(&device_output, sizeof(ull) * total_size)); + + HIP_CHECK(hipMemcpy(device_directions, + host_directions, + sizeof(ull) * 1280000, + hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(device_scramble_constants, + host_scramble_constants, + sizeof(ull) * scramble_offset_size, + hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(device_offsets, + host_offsets, + sizeof(ull) * scramble_offset_size, + hipMemcpyHostToDevice)); + + rocrand_kernel + <<<1, 1>>>(device_output, device_directions, device_scramble_constants, device_offsets); + + HIP_CHECK( + hipMemcpy(host_output, device_output, sizeof(ull) * total_size, hipMemcpyDeviceToHost)); + + constexpr double a = 0; + constexpr double b = 1; + + const double expected_mean = (a + b) / 2; + const double expected_std_dev = (b - a) / std::sqrt(12); + + const ull mini = std::numeric_limits::min(); + const ull maxi = std::numeric_limits::max(); + + double actual_mean = std::accumulate(host_output, + host_output + total_size, + (double)0.0, + [=](double acc, ull x) + { + double converted + = (a + static_cast(x - mini) * (b - a)) + / (static_cast(maxi - mini)); + return acc + converted; + }) + / static_cast(total_size); + + double actual_std_dev = std::accumulate(host_output, + host_output + total_size, + (double)0.0, + [=](double acc, ull x) + { + double converted + = (a + static_cast(x - mini) * (b - a)) + / (static_cast(maxi - mini)); + return acc + std::pow(converted - actual_mean, 2); + }); + actual_std_dev = std::sqrt(actual_std_dev / static_cast(total_size)); + + // make sure results are within 5% of expected values + ASSERT_NEAR(expected_mean, actual_mean, expected_mean * 0.05); + ASSERT_NEAR(expected_std_dev, actual_std_dev, expected_std_dev * 0.05); + + delete[] host_output; + + HIP_CHECK(hipFree(device_directions)); + HIP_CHECK(hipFree(device_scramble_constants)); + HIP_CHECK(hipFree(device_offsets)); + HIP_CHECK(hipFree(device_output)); +} + +template +__global__ +void skipahead_kernel(ull* device_output1, + ull* device_output2, + const ull* device_directions, + const ull* offsets) +{ + + for(size_t i = 0; i < offset_size; i++) + { + ull offset = offsets[i]; + + rocrand_state_scrambled_sobol64 state1, state2; + rocrand_init(device_directions, 1u, 0, &state1); + rocrand_init(device_directions, 1u, offset, &state2); + + skipahead(offset, &state1); + + device_output1[i] = rocrand(&state1); + device_output2[i] = rocrand(&state2); + } +} + +TEST(AdditionalTest, device_skipahead) +{ + //test the offset functionality + + constexpr ull offset_size = 5; + const ull* directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + const ull offsets[offset_size] = {8, 16, 32, 64, 128}; + + ull* host_output1 = new ull[offset_size]; + ull* host_output2 = new ull[offset_size]; + + ull* device_directions; + ull* device_offsets; + ull* device_output1; + ull* device_output2; + + // 1280000 is the size of ROCRAND_DIRECTION_VECTORS_64_JOEKUO6 + HIP_CHECK(hipMalloc(&device_directions, sizeof(ull) * 1280000)); + HIP_CHECK(hipMalloc(&device_offsets, sizeof(ull) * offset_size)); + HIP_CHECK(hipMalloc(&device_output1, sizeof(ull) * offset_size)); + HIP_CHECK(hipMalloc(&device_output2, sizeof(ull) * offset_size)); + + HIP_CHECK( + hipMemcpy(device_directions, directions, sizeof(ull) * 1280000, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(device_offsets, offsets, sizeof(ull) * offset_size, hipMemcpyHostToDevice)); + + skipahead_kernel + <<<1, 1>>>(device_output1, device_output2, device_directions, device_offsets); + + HIP_CHECK( + hipMemcpy(host_output1, device_output1, sizeof(ull) * offset_size, hipMemcpyDeviceToHost)); + HIP_CHECK( + hipMemcpy(host_output2, device_output2, sizeof(ull) * offset_size, hipMemcpyDeviceToHost)); + + for(size_t i = 0; i < offset_size; i++) + ASSERT_EQ(host_output1[i], host_output2[i]); + + delete[] host_output1; + delete[] host_output2; + + HIP_CHECK(hipFree(device_directions)); + HIP_CHECK(hipFree(device_offsets)); + HIP_CHECK(hipFree(device_output1)); + HIP_CHECK(hipFree(device_output2)); +} diff --git a/projects/rocrand/test/internal/test_rocrand_sobol32_qrng.cpp b/projects/rocrand/test/internal/test_rocrand_sobol32_qrng.cpp index 5c134464e73..e8cd47fec6d 100644 --- a/projects/rocrand/test/internal/test_rocrand_sobol32_qrng.cpp +++ b/projects/rocrand/test/internal/test_rocrand_sobol32_qrng.cpp @@ -21,9 +21,451 @@ #include "rocrand/rocrand.h" #include "test_rocrand_sobol_qrng.hpp" +#include + +#define ROCRAND_ERROR_CHECK(cmd) \ + do \ + { \ + auto status = cmd; \ + if(status != 0) \ + { \ + std::cerr << "Encountered ROCRAND error: " << status << "at line" << __LINE__ \ + << " in file " << __FILE__ << "\n"; \ + exit(-1); \ + } \ + } \ + while(0) + using rocrand_impl::host::sobol32_generator; using test_sobol32_qrng_types = ::testing::Types>; INSTANTIATE_TYPED_TEST_SUITE_P(sobol_qrng_tests, sobol_qrng_tests, test_sobol32_qrng_types); + +using uint = unsigned int; + +TEST(AdditionalTest, host_rocrand_init_consistent) +{ + //making sure that the the output is consistent when init parameter are the same + const uint* directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + std::vector offsets = {1, 2, 4, 8, 32, 64, 128}; + + for(const uint& offset : offsets) + { + rocrand_state_sobol32 state1, state2; + + rocrand_init(directions, offset, &state1); + rocrand_init(directions, offset, &state2); + + for(size_t i = 0; i < 10000; i++) + ASSERT_EQ(rocrand(&state1), rocrand(&state2)); + } +} + +TEST(AdditionalTest, host_rocrand_init_offset) +{ + //test the offset functionality + const uint* directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + std::vector offsets = {1, 2, 4, 8, 32, 64, 128}; + + for(const uint& offset : offsets) + { + rocrand_state_sobol32 state1, state2; + + rocrand_init(directions, 0, &state1); + rocrand_init(directions, offset, &state2); + + for(uint i = 0; i < offset; i++) + rocrand(&state1); + ASSERT_EQ(rocrand(&state1), rocrand(&state2)); + } +} + +TEST(AdditionalTest, host_rocrand) +{ + // test to make sure rocrand returns uniformly distributed values + const uint* directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + std::vector offsets = {1, 2, 4, 8, 32, 64, 128}; + + constexpr size_t test_size = 10000; + + std::vector output(test_size); + + constexpr double a = 0; + constexpr double b = 1; + + const double expected_mean = (a + b) / 2; + const double expected_std_dev = (b - a) / std::sqrt(12); + + const uint mini = std::numeric_limits::min(); + const uint maxi = std::numeric_limits::max(); + + for(const uint& offset : offsets) + { + rocrand_state_sobol32 state; + + rocrand_init(directions, offset, &state); + + for(size_t i = 0; i < test_size; i++) + { + //converting to range between 0 and 1 + output[i] = (a + static_cast(rocrand(&state) - mini) * (b - a)) + / (static_cast(maxi - mini)); + } + + double actual_mean + = std::accumulate(output.begin(), output.end(), 0.0) / static_cast(test_size); + double actual_std_dev = std::accumulate(output.begin(), + output.end(), + 0.0, + [=](double acc, double x) + { return acc + std::pow(x - actual_mean, 2); }); + actual_std_dev = std::sqrt(actual_std_dev / static_cast(test_size)); + + // make sure results are within 5% of expected values + ASSERT_NEAR(expected_mean, actual_mean, expected_mean * 0.05); + ASSERT_NEAR(expected_std_dev, actual_std_dev, expected_std_dev * 0.05); + } +} + +TEST(AdditionalTest, host_skipahead) +{ + //test the skipahead functionality + const uint* directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + std::vector offsets = {1, 2, 4, 8, 32, 64, 128}; + + for(const uint& offset : offsets) + { + rocrand_state_sobol32 state1, state2; + + rocrand_init(directions, 0, &state1); + rocrand_init(directions, offset, &state2); + + skipahead(offset, &state1); + + ASSERT_EQ(rocrand(&state1), rocrand(&state2)); + } +} + +template +__global__ +void rocrand_init_consistency_kernel(uint* device_output1, + uint* device_output2, + const uint* device_directions, + const uint* offsets) +{ + + size_t counter = 0; + for(size_t i = 0; i < offset_size; i++) + { + uint offset = offsets[i]; + + rocrand_state_sobol32 state1, state2; + rocrand_init(device_directions, offset, &state1); + rocrand_init(device_directions, offset, &state2); + + for(size_t ii = 0; ii < test_size; ii++) + { + device_output1[counter] = rocrand(&state1); + device_output2[counter++] = rocrand(&state2); + } + } +} + +TEST(AdditionalTest, device_rocrand_init_consistent) +{ + //making sure that the the output is consistent when init parameter are the same + + constexpr uint test_size = 1000; + constexpr uint offset_size = 5; + + const uint* directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + const uint offsets[offset_size] = {8, 16, 32, 64, 128}; + + uint host_output1[offset_size * test_size]; + uint host_output2[offset_size * test_size]; + + uint* device_directions; + uint* device_offsets; + uint* device_output1; + uint* device_output2; + + // 640000 is the size of ROCRAND_DIRECTION_VECTORS_32_JOEKUO6 + HIP_CHECK(hipMalloc(&device_directions, sizeof(uint) * 640000)); + HIP_CHECK(hipMalloc(&device_offsets, sizeof(uint) * offset_size)); + HIP_CHECK(hipMalloc(&device_output1, sizeof(uint) * offset_size * test_size)); + HIP_CHECK(hipMalloc(&device_output2, sizeof(uint) * offset_size * test_size)); + + HIP_CHECK( + hipMemcpy(device_directions, directions, sizeof(uint) * 640000, hipMemcpyHostToDevice)); + HIP_CHECK( + hipMemcpy(device_offsets, offsets, sizeof(uint) * offset_size, hipMemcpyHostToDevice)); + + rocrand_init_consistency_kernel + <<<1, 1>>>(device_output1, device_output2, device_directions, device_offsets); + + HIP_CHECK(hipMemcpy(host_output1, + device_output1, + sizeof(uint) * offset_size * test_size, + hipMemcpyDeviceToHost)); + HIP_CHECK(hipMemcpy(host_output2, + device_output2, + sizeof(uint) * offset_size * test_size, + hipMemcpyDeviceToHost)); + + for(size_t i = 0; i < offset_size * test_size; i++) + ASSERT_EQ(host_output1[i], host_output2[i]); + + HIP_CHECK(hipFree(device_directions)); + HIP_CHECK(hipFree(device_offsets)); + HIP_CHECK(hipFree(device_output1)); + HIP_CHECK(hipFree(device_output2)); +} + +template +__global__ +void rocrand_init_offset_kernel(uint* device_output1, + uint* device_output2, + const uint* device_directions, + const uint* offsets) +{ + + for(size_t i = 0; i < offset_size; i++) + { + uint offset = offsets[i]; + + rocrand_state_sobol32 state1, state2; + rocrand_init(device_directions, 0, &state1); + rocrand_init(device_directions, offset, &state2); + + for(size_t ii = 0; ii < offset; ii++) + rocrand(&state1); + + device_output1[i] = rocrand(&state1); + device_output2[i] = rocrand(&state2); + } +} + +TEST(AdditionalTest, device_rocrand_init_offset) +{ + //test the offset functionality + + constexpr uint offset_size = 5; + const uint* directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + const uint offsets[offset_size] = {8, 16, 32, 64, 128}; + + uint host_output1[offset_size]; + uint host_output2[offset_size]; + + uint* device_directions; + uint* device_offsets; + uint* device_output1; + uint* device_output2; + + // 640000 is the size of ROCRAND_DIRECTION_VECTORS_32_JOEKUO6 + HIP_CHECK(hipMalloc(&device_directions, sizeof(uint) * 640000)); + HIP_CHECK(hipMalloc(&device_offsets, sizeof(uint) * offset_size)); + HIP_CHECK(hipMalloc(&device_output1, sizeof(uint) * offset_size)); + HIP_CHECK(hipMalloc(&device_output2, sizeof(uint) * offset_size)); + + HIP_CHECK( + hipMemcpy(device_directions, directions, sizeof(uint) * 640000, hipMemcpyHostToDevice)); + HIP_CHECK( + hipMemcpy(device_offsets, offsets, sizeof(uint) * offset_size, hipMemcpyHostToDevice)); + + rocrand_init_offset_kernel + <<<1, 1>>>(device_output1, device_output2, device_directions, device_offsets); + + HIP_CHECK( + hipMemcpy(host_output1, device_output1, sizeof(uint) * offset_size, hipMemcpyDeviceToHost)); + HIP_CHECK( + hipMemcpy(host_output2, device_output2, sizeof(uint) * offset_size, hipMemcpyDeviceToHost)); + + for(size_t i = 0; i < offset_size; i++) + ASSERT_EQ(host_output1[i], host_output2[i]); + + HIP_CHECK(hipFree(device_directions)); + HIP_CHECK(hipFree(device_offsets)); + HIP_CHECK(hipFree(device_output1)); + HIP_CHECK(hipFree(device_output2)); +} + +template +__global__ +void rocrand_kernel(uint* device_output, const uint* device_directions, const uint* offsets) +{ + + size_t counter = 0; + for(size_t i = 0; i < offset_size; i++) + { + uint offset = offsets[i]; + + rocrand_state_sobol32 state; + rocrand_init(device_directions, offset, &state); + + for(size_t ii = 0; ii < test_size; ii++) + { + device_output[counter++] = rocrand(&state); + } + } +} + +TEST(AdditionalTest, device_rocrand) +{ + // test to make sure rocrand returns uniformly distributed values + const uint* host_directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors32(&host_directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + + constexpr size_t test_size = 10000; + constexpr size_t offset_size = 8; + constexpr size_t total_size = offset_size * test_size; + + uint host_offsets[offset_size] = {1, 2, 4, 8, 16, 32, 64, 128}; + uint host_output[total_size]; + + uint* device_directions; + uint* device_offsets; + uint* device_output; + + // 640000 is the size of ROCRAND_DIRECTION_VECTORS_32_JOEKUO6 + HIP_CHECK(hipMalloc(&device_directions, sizeof(uint) * 640000)); + HIP_CHECK(hipMalloc(&device_offsets, sizeof(uint) * offset_size)); + HIP_CHECK(hipMalloc(&device_output, sizeof(uint) * total_size)); + + HIP_CHECK(hipMemcpy(device_directions, + host_directions, + sizeof(uint) * 640000, + hipMemcpyHostToDevice)); + HIP_CHECK( + hipMemcpy(device_offsets, host_offsets, sizeof(uint) * offset_size, hipMemcpyHostToDevice)); + + rocrand_kernel + <<<1, 1>>>(device_output, device_directions, device_offsets); + + HIP_CHECK( + hipMemcpy(host_output, device_output, sizeof(uint) * total_size, hipMemcpyDeviceToHost)); + + constexpr double a = 0; + constexpr double b = 1; + + const double expected_mean = (a + b) / 2; + const double expected_std_dev = (b - a) / std::sqrt(12); + + const uint mini = std::numeric_limits::min(); + const uint maxi = std::numeric_limits::max(); + + double actual_mean = std::accumulate(host_output, + host_output + total_size, + (double)0.0, + [=](double acc, uint x) + { + double converted + = (a + static_cast(x - mini) * (b - a)) + / (static_cast(maxi - mini)); + return acc + converted; + }) + / static_cast(total_size); + + double actual_std_dev = std::accumulate(host_output, + host_output + total_size, + (double)0.0, + [=](double acc, uint x) + { + double converted + = (a + static_cast(x - mini) * (b - a)) + / (static_cast(maxi - mini)); + return acc + std::pow(converted - actual_mean, 2); + }); + actual_std_dev = std::sqrt(actual_std_dev / static_cast(total_size)); + + // make sure results are within 5% of expected values + ASSERT_NEAR(expected_mean, actual_mean, expected_mean * 0.05); + ASSERT_NEAR(expected_std_dev, actual_std_dev, expected_std_dev * 0.05); + + HIP_CHECK(hipFree(device_directions)); + HIP_CHECK(hipFree(device_offsets)); + HIP_CHECK(hipFree(device_output)); +} + +template +__global__ +void skipahead_kernel(uint* device_output1, + uint* device_output2, + const uint* device_directions, + const uint* offsets) +{ + + for(size_t i = 0; i < offset_size; i++) + { + uint offset = offsets[i]; + + rocrand_state_sobol32 state1, state2; + rocrand_init(device_directions, 0, &state1); + rocrand_init(device_directions, offset, &state2); + + skipahead(offset, &state1); + + device_output1[i] = rocrand(&state1); + device_output2[i] = rocrand(&state2); + } +} + +TEST(AdditionalTest, device_skipahead) +{ + //test the offset functionality + + constexpr uint offset_size = 5; + const uint* directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + const uint offsets[offset_size] = {8, 16, 32, 64, 128}; + + uint host_output1[offset_size]; + uint host_output2[offset_size]; + + uint* device_directions; + uint* device_offsets; + uint* device_output1; + uint* device_output2; + + // 640000 is the size of ROCRAND_DIRECTION_VECTORS_32_JOEKUO6 + HIP_CHECK(hipMalloc(&device_directions, sizeof(uint) * 640000)); + HIP_CHECK(hipMalloc(&device_offsets, sizeof(uint) * offset_size)); + HIP_CHECK(hipMalloc(&device_output1, sizeof(uint) * offset_size)); + HIP_CHECK(hipMalloc(&device_output2, sizeof(uint) * offset_size)); + + HIP_CHECK( + hipMemcpy(device_directions, directions, sizeof(uint) * 640000, hipMemcpyHostToDevice)); + HIP_CHECK( + hipMemcpy(device_offsets, offsets, sizeof(uint) * offset_size, hipMemcpyHostToDevice)); + + skipahead_kernel + <<<1, 1>>>(device_output1, device_output2, device_directions, device_offsets); + + HIP_CHECK( + hipMemcpy(host_output1, device_output1, sizeof(uint) * offset_size, hipMemcpyDeviceToHost)); + HIP_CHECK( + hipMemcpy(host_output2, device_output2, sizeof(uint) * offset_size, hipMemcpyDeviceToHost)); + + for(size_t i = 0; i < offset_size; i++) + ASSERT_EQ(host_output1[i], host_output2[i]); + + HIP_CHECK(hipFree(device_directions)); + HIP_CHECK(hipFree(device_offsets)); + HIP_CHECK(hipFree(device_output1)); + HIP_CHECK(hipFree(device_output2)); +} diff --git a/projects/rocrand/test/internal/test_rocrand_sobol64_qrng.cpp b/projects/rocrand/test/internal/test_rocrand_sobol64_qrng.cpp index eeda2af4339..f46776663c9 100644 --- a/projects/rocrand/test/internal/test_rocrand_sobol64_qrng.cpp +++ b/projects/rocrand/test/internal/test_rocrand_sobol64_qrng.cpp @@ -21,9 +21,448 @@ #include "rocrand/rocrand.h" #include "test_rocrand_sobol_qrng.hpp" +#include + +#define ROCRAND_ERROR_CHECK(cmd) \ + do \ + { \ + auto status = cmd; \ + if(status != 0) \ + { \ + std::cerr << "Encountered ROCRAND error: " << status << "at line" << __LINE__ \ + << " in file " << __FILE__ << "\n"; \ + exit(-1); \ + } \ + } \ + while(0) + using rocrand_impl::host::sobol64_generator; using test_sobol64_qrng_types = ::testing::Types>; INSTANTIATE_TYPED_TEST_SUITE_P(sobol_qrng_tests, sobol_qrng_tests, test_sobol64_qrng_types); + +using ull = unsigned long long; + +TEST(AdditionalTest, host_rocrand_init_consistent) +{ + //making sure that the the output is consistent when init parameter are the same + const ull* directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + std::vector offsets = {1, 2, 4, 8, 32, 64, 128}; + + for(const ull& offset : offsets) + { + rocrand_state_sobol64 state1, state2; + + rocrand_init(directions, offset, &state1); + rocrand_init(directions, offset, &state2); + + for(size_t i = 0; i < 10000; i++) + ASSERT_EQ(rocrand(&state1), rocrand(&state2)); + } +} + +TEST(AdditionalTest, host_rocrand_init_offset) +{ + //test the offset functionality + const ull* directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + std::vector offsets = {1, 2, 4, 8, 32, 64, 128}; + + for(const ull& offset : offsets) + { + rocrand_state_sobol64 state1, state2; + + rocrand_init(directions, 0, &state1); + rocrand_init(directions, offset, &state2); + + for(ull i = 0; i < offset; i++) + rocrand(&state1); + ASSERT_EQ(rocrand(&state1), rocrand(&state2)); + } +} + +TEST(AdditionalTest, host_rocrand) +{ + // test to make sure rocrand returns uniformly distributed values + const ull* directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + std::vector offsets = {1, 2, 4, 8, 32, 64, 128}; + + constexpr size_t test_size = 10000; + + std::vector output(test_size); + + constexpr double a = 0; + constexpr double b = 1; + + const double expected_mean = (a + b) / 2; + const double expected_std_dev = (b - a) / std::sqrt(12); + + const ull mini = std::numeric_limits::min(); + const ull maxi = std::numeric_limits::max(); + + for(const ull& offset : offsets) + { + rocrand_state_sobol64 state; + + rocrand_init(directions, offset, &state); + + for(size_t i = 0; i < test_size; i++) + { + //converting to range between 0 and 1 + output[i] = (a + static_cast(rocrand(&state) - mini) * (b - a)) + / (static_cast(maxi - mini)); + } + + double actual_mean + = std::accumulate(output.begin(), output.end(), 0.0) / static_cast(test_size); + double actual_std_dev = std::accumulate(output.begin(), + output.end(), + 0.0, + [=](double acc, double x) + { return acc + std::pow(x - actual_mean, 2); }); + actual_std_dev = std::sqrt(actual_std_dev / static_cast(test_size)); + + // make sure results are within 5% of expected values + ASSERT_NEAR(expected_mean, actual_mean, expected_mean * 0.05); + ASSERT_NEAR(expected_std_dev, actual_std_dev, expected_std_dev * 0.05); + } +} + +TEST(AdditionalTest, host_skipahead) +{ + //test the skipahead functionality + const ull* directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + std::vector offsets = {1, 2, 4, 8, 32, 64, 128}; + + for(const ull& offset : offsets) + { + rocrand_state_sobol64 state1, state2; + + rocrand_init(directions, 0, &state1); + rocrand_init(directions, offset, &state2); + + skipahead(offset, &state1); + + ASSERT_EQ(rocrand(&state1), rocrand(&state2)); + } +} + +template +__global__ +void rocrand_init_consistency_kernel(ull* device_output1, + ull* device_output2, + const ull* device_directions, + const ull* offsets) +{ + + size_t counter = 0; + for(size_t i = 0; i < offset_size; i++) + { + ull offset = offsets[i]; + + rocrand_state_sobol64 state1, state2; + rocrand_init(device_directions, offset, &state1); + rocrand_init(device_directions, offset, &state2); + + for(size_t ii = 0; ii < test_size; ii++) + { + device_output1[counter] = rocrand(&state1); + device_output2[counter++] = rocrand(&state2); + } + } +} + +TEST(AdditionalTest, device_rocrand_init_consistent) +{ + //making sure that the the output is consistent when init parameter are the same + + constexpr ull test_size = 1000; + constexpr ull offset_size = 5; + + const ull* directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + const ull offsets[offset_size] = {8, 16, 32, 64, 128}; + + ull host_output1[offset_size * test_size]; + ull host_output2[offset_size * test_size]; + + ull* device_directions; + ull* device_offsets; + ull* device_output1; + ull* device_output2; + + // 1280000 is the size of ROCRAND_DIRECTION_VECTORS_64_JOEKUO6 + HIP_CHECK(hipMalloc(&device_directions, sizeof(ull) * 1280000)); + HIP_CHECK(hipMalloc(&device_offsets, sizeof(ull) * offset_size)); + HIP_CHECK(hipMalloc(&device_output1, sizeof(ull) * offset_size * test_size)); + HIP_CHECK(hipMalloc(&device_output2, sizeof(ull) * offset_size * test_size)); + + HIP_CHECK( + hipMemcpy(device_directions, directions, sizeof(ull) * 1280000, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(device_offsets, offsets, sizeof(ull) * offset_size, hipMemcpyHostToDevice)); + + rocrand_init_consistency_kernel + <<<1, 1>>>(device_output1, device_output2, device_directions, device_offsets); + + HIP_CHECK(hipMemcpy(host_output1, + device_output1, + sizeof(ull) * offset_size * test_size, + hipMemcpyDeviceToHost)); + HIP_CHECK(hipMemcpy(host_output2, + device_output2, + sizeof(ull) * offset_size * test_size, + hipMemcpyDeviceToHost)); + + for(size_t i = 0; i < offset_size * test_size; i++) + ASSERT_EQ(host_output1[i], host_output2[i]); + + HIP_CHECK(hipFree(device_directions)); + HIP_CHECK(hipFree(device_offsets)); + HIP_CHECK(hipFree(device_output1)); + HIP_CHECK(hipFree(device_output2)); +} + +template +__global__ +void rocrand_init_offset_kernel(ull* device_output1, + ull* device_output2, + const ull* device_directions, + const ull* offsets) +{ + + for(size_t i = 0; i < offset_size; i++) + { + ull offset = offsets[i]; + + rocrand_state_sobol64 state1, state2; + rocrand_init(device_directions, 0, &state1); + rocrand_init(device_directions, offset, &state2); + + for(size_t ii = 0; ii < offset; ii++) + rocrand(&state1); + + device_output1[i] = rocrand(&state1); + device_output2[i] = rocrand(&state2); + } +} + +TEST(AdditionalTest, device_rocrand_init_offset) +{ + //test the offset functionality + + constexpr ull offset_size = 5; + const ull* directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + const ull offsets[offset_size] = {8, 16, 32, 64, 128}; + + ull host_output1[offset_size]; + ull host_output2[offset_size]; + + ull* device_directions; + ull* device_offsets; + ull* device_output1; + ull* device_output2; + + // 1280000 is the size of ROCRAND_DIRECTION_VECTORS_64_JOEKUO6 + HIP_CHECK(hipMalloc(&device_directions, sizeof(ull) * 1280000)); + HIP_CHECK(hipMalloc(&device_offsets, sizeof(ull) * offset_size)); + HIP_CHECK(hipMalloc(&device_output1, sizeof(ull) * offset_size)); + HIP_CHECK(hipMalloc(&device_output2, sizeof(ull) * offset_size)); + + HIP_CHECK( + hipMemcpy(device_directions, directions, sizeof(ull) * 1280000, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(device_offsets, offsets, sizeof(ull) * offset_size, hipMemcpyHostToDevice)); + + rocrand_init_offset_kernel + <<<1, 1>>>(device_output1, device_output2, device_directions, device_offsets); + + HIP_CHECK( + hipMemcpy(host_output1, device_output1, sizeof(ull) * offset_size, hipMemcpyDeviceToHost)); + HIP_CHECK( + hipMemcpy(host_output2, device_output2, sizeof(ull) * offset_size, hipMemcpyDeviceToHost)); + + for(size_t i = 0; i < offset_size; i++) + ASSERT_EQ(host_output1[i], host_output2[i]); + + HIP_CHECK(hipFree(device_directions)); + HIP_CHECK(hipFree(device_offsets)); + HIP_CHECK(hipFree(device_output1)); + HIP_CHECK(hipFree(device_output2)); +} + +template +__global__ +void rocrand_kernel(ull* device_output, const ull* device_directions, const ull* offsets) +{ + + size_t counter = 0; + for(size_t i = 0; i < offset_size; i++) + { + ull offset = offsets[i]; + + rocrand_state_sobol64 state; + rocrand_init(device_directions, offset, &state); + + for(size_t ii = 0; ii < test_size; ii++) + { + device_output[counter++] = rocrand(&state); + } + } +} + +TEST(AdditionalTest, device_rocrand) +{ + // test to make sure rocrand returns uniformly distributed values + const ull* host_directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors64(&host_directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + + constexpr size_t test_size = 10000; + constexpr size_t offset_size = 8; + constexpr size_t total_size = offset_size * test_size; + + ull host_offsets[offset_size] = {1, 2, 4, 8, 16, 32, 64, 128}; + ull host_output[total_size]; + + ull* device_directions; + ull* device_offsets; + ull* device_output; + + // 1280000 is the size of ROCRAND_DIRECTION_VECTORS_64_JOEKUO6 + HIP_CHECK(hipMalloc(&device_directions, sizeof(ull) * 1280000)); + HIP_CHECK(hipMalloc(&device_offsets, sizeof(ull) * offset_size)); + HIP_CHECK(hipMalloc(&device_output, sizeof(ull) * total_size)); + + HIP_CHECK(hipMemcpy(device_directions, + host_directions, + sizeof(ull) * 1280000, + hipMemcpyHostToDevice)); + HIP_CHECK( + hipMemcpy(device_offsets, host_offsets, sizeof(ull) * offset_size, hipMemcpyHostToDevice)); + + rocrand_kernel + <<<1, 1>>>(device_output, device_directions, device_offsets); + + HIP_CHECK( + hipMemcpy(host_output, device_output, sizeof(ull) * total_size, hipMemcpyDeviceToHost)); + + constexpr double a = 0; + constexpr double b = 1; + + const double expected_mean = (a + b) / 2; + const double expected_std_dev = (b - a) / std::sqrt(12); + + const ull mini = std::numeric_limits::min(); + const ull maxi = std::numeric_limits::max(); + + double actual_mean = std::accumulate(host_output, + host_output + total_size, + (double)0.0, + [=](double acc, ull x) + { + double converted + = (a + static_cast(x - mini) * (b - a)) + / (static_cast(maxi - mini)); + return acc + converted; + }) + / static_cast(total_size); + + double actual_std_dev = std::accumulate(host_output, + host_output + total_size, + (double)0.0, + [=](double acc, ull x) + { + double converted + = (a + static_cast(x - mini) * (b - a)) + / (static_cast(maxi - mini)); + return acc + std::pow(converted - actual_mean, 2); + }); + actual_std_dev = std::sqrt(actual_std_dev / static_cast(total_size)); + + // make sure results are within 5% of expected values + ASSERT_NEAR(expected_mean, actual_mean, expected_mean * 0.05); + ASSERT_NEAR(expected_std_dev, actual_std_dev, expected_std_dev * 0.05); + + HIP_CHECK(hipFree(device_directions)); + HIP_CHECK(hipFree(device_offsets)); + HIP_CHECK(hipFree(device_output)); +} + +template +__global__ +void skipahead_kernel(ull* device_output1, + ull* device_output2, + const ull* device_directions, + const ull* offsets) +{ + + for(size_t i = 0; i < offset_size; i++) + { + ull offset = offsets[i]; + + rocrand_state_sobol64 state1, state2; + rocrand_init(device_directions, 0, &state1); + rocrand_init(device_directions, offset, &state2); + + skipahead(offset, &state1); + + device_output1[i] = rocrand(&state1); + device_output2[i] = rocrand(&state2); + } +} + +TEST(AdditionalTest, device_skipahead) +{ + //test the offset functionality + + constexpr ull offset_size = 5; + const ull* directions; + ROCRAND_ERROR_CHECK( + rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + const ull offsets[offset_size] = {8, 16, 32, 64, 128}; + + ull host_output1[offset_size]; + ull host_output2[offset_size]; + + ull* device_directions; + ull* device_offsets; + ull* device_output1; + ull* device_output2; + + // 1280000 is the size of ROCRAND_DIRECTION_VECTORS_64_JOEKUO6 + HIP_CHECK(hipMalloc(&device_directions, sizeof(ull) * 1280000)); + HIP_CHECK(hipMalloc(&device_offsets, sizeof(ull) * offset_size)); + HIP_CHECK(hipMalloc(&device_output1, sizeof(ull) * offset_size)); + HIP_CHECK(hipMalloc(&device_output2, sizeof(ull) * offset_size)); + + HIP_CHECK( + hipMemcpy(device_directions, directions, sizeof(ull) * 1280000, hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(device_offsets, offsets, sizeof(ull) * offset_size, hipMemcpyHostToDevice)); + + skipahead_kernel + <<<1, 1>>>(device_output1, device_output2, device_directions, device_offsets); + + HIP_CHECK( + hipMemcpy(host_output1, device_output1, sizeof(ull) * offset_size, hipMemcpyDeviceToHost)); + HIP_CHECK( + hipMemcpy(host_output2, device_output2, sizeof(ull) * offset_size, hipMemcpyDeviceToHost)); + + for(size_t i = 0; i < offset_size; i++) + ASSERT_EQ(host_output1[i], host_output2[i]); + + HIP_CHECK(hipFree(device_directions)); + HIP_CHECK(hipFree(device_offsets)); + HIP_CHECK(hipFree(device_output1)); + HIP_CHECK(hipFree(device_output2)); +} diff --git a/projects/rocrand/test/internal/test_rocrand_sobol_qrng.hpp b/projects/rocrand/test/internal/test_rocrand_sobol_qrng.hpp index ecae4c70931..12e139d7508 100644 --- a/projects/rocrand/test/internal/test_rocrand_sobol_qrng.hpp +++ b/projects/rocrand/test/internal/test_rocrand_sobol_qrng.hpp @@ -41,10 +41,10 @@ template struct sobol_qrng_tests : public ::testing::Test { - using generator_t = typename Params::generator_t; + using generator_t = typename Params::generator_t; static inline constexpr rocrand_ordering ordering = Params::ordering; - using constant_t = typename generator_t::constant_type; - using engine_t = typename generator_t::engine_type; + using constant_t = typename generator_t::constant_type; + using engine_t = typename generator_t::engine_type; auto get_generator() const { @@ -139,7 +139,7 @@ TYPED_TEST_SUITE_P(sobol_qrng_tests); template struct sobol_qrng_tests_params { - using generator_t = Generator; + using generator_t = Generator; static inline constexpr rocrand_ordering ordering = Ordering; }; diff --git a/projects/rocrand/test/internal/test_rocrand_threefry2x32_20_prng.cpp b/projects/rocrand/test/internal/test_rocrand_threefry2x32_20_prng.cpp index fa6667e41a0..3019e46a3c6 100644 --- a/projects/rocrand/test/internal/test_rocrand_threefry2x32_20_prng.cpp +++ b/projects/rocrand/test/internal/test_rocrand_threefry2x32_20_prng.cpp @@ -67,7 +67,8 @@ class threefry2x32_engine_type_test : public threefry2x32_20_generator::engine_t public: __host__ threefry2x32_engine_type_test() : threefry2x32_20_generator::engine_type(0, 0, 0) {} - __host__ state_type& internal_state_ref() + __host__ + state_type& internal_state_ref() { return m_state; } @@ -161,3 +162,96 @@ TEST(threefry_prng_state_tests, discard_sequence_test) EXPECT_EQ(state.counter.y, 457U); EXPECT_EQ(state.substate, 0U); } + +TEST(threefry_additional_tests, rocrand_init_test) +{ + // making sure the outputs are the same when initialized with same parameters + rocrand_state_threefry2x32_20 state1, state2; + + using ull = unsigned long long; + + ull seeds[] = {0, 123, 321, 123456, 654321}; + ull subsequences[] = {0xf, 0xff, 0x1f, 0x1ff, 0x1f1}; + ull offsets[] = {0, 1, 2, 3, 4}; + + for(int i = 0; i < 5; i++) + { + rocrand_init(seeds[i], subsequences[i], offsets[i], &state1); + rocrand_init(seeds[i], subsequences[i], offsets[i], &state2); + + for(int j = 0; j < 5000; j++) + ASSERT_EQ(rocrand(&state1), rocrand(&state2)); + } +} + +TEST(threefry_additional_tests, rocrand_test) +{ + // making sure the outputs are uniformly distributed! + rocrand_state_threefry2x32_20 state; + + rocrand_init(0, 0, 0, &state); + size_t testSize = 40000; + + unsigned int* output = new unsigned int[testSize]; + + double mean = 0; + for(size_t i = 0; i < testSize; i++) + { + output[i] = rocrand(&state); + mean += static_cast(output[i]); + } + mean /= testSize; + + double std = 0.0; + for(size_t i = 0; i < testSize; i++) + std += std::pow(output[i] - mean, 2); + + std = std::sqrt(std / testSize); + + double maxi = (double)std::numeric_limits::max(); + double eMean = 0.5 * (maxi); // 0.5(a + b) + double eStd = (maxi) / (2 * std::sqrt(3)); // (b - a) / (2*3^0.5) + + ASSERT_NEAR(mean, eMean, eMean * 0.1); + ASSERT_NEAR(std, eStd, eStd * 0.1); + + delete[] output; +} + +TEST(threefry_additional_tests, rocrand2_test) +{ + // making sure the outputs are uniformly distributed! + rocrand_state_threefry2x32_20 state; + + rocrand_init(0, 0, 0, &state); + size_t testSize = 40000; + + unsigned int* output = new unsigned int[testSize]; + + double mean = 0; + for(size_t i = 0; i < testSize; i += 2) + { + uint2 t = rocrand2(&state); + output[i] = t.x; + output[i + 1] = t.y; + mean += static_cast(output[i]); + mean += static_cast(output[i + 1]); + } + mean /= testSize; + + double std = 0.0; + for(size_t i = 0; i < testSize; i++) + std += std::pow(output[i] - mean, 2); + + std = std::sqrt(std / testSize); + + double maxi = (double)std::numeric_limits::max(); + // min val is 0 + double eMean = 0.5 * (maxi); // 0.5(a + b) + double eStd = (maxi) / (2 * std::sqrt(3)); // (b - a) / (2*3^0.5) + + ASSERT_NEAR(mean, eMean, eMean * 0.1); + ASSERT_NEAR(std, eStd, eStd * 0.1); + + delete[] output; +} diff --git a/projects/rocrand/test/internal/test_rocrand_threefry2x64_20_prng.cpp b/projects/rocrand/test/internal/test_rocrand_threefry2x64_20_prng.cpp index cd13ea3bba2..18e61a09f3f 100644 --- a/projects/rocrand/test/internal/test_rocrand_threefry2x64_20_prng.cpp +++ b/projects/rocrand/test/internal/test_rocrand_threefry2x64_20_prng.cpp @@ -72,7 +72,8 @@ class threefry2x64_engine_type_test : public threefry2x64_20_generator::engine_t public: __host__ threefry2x64_engine_type_test() : threefry2x64_20_generator::engine_type(0, 0, 0) {} - __host__ state_type& internal_state_ref() + __host__ + state_type& internal_state_ref() { return m_state; } @@ -169,3 +170,75 @@ TEST(threefry_prng_state_tests, discard_sequence_test) EXPECT_EQ(state.counter.y, 457ULL); EXPECT_EQ(state.substate, 0U); } + +TEST(threefry_additional_tests, rocrand_test) +{ + // making sure the outputs are uniformly distributed! + rocrand_state_threefry2x64_20 state; + + rocrand_init(0, 0, 0, &state); + size_t testSize = 40000; + + unsigned long long* output = new unsigned long long[testSize]; + + double mean = 0; + for(size_t i = 0; i < testSize; i++) + { + output[i] = rocrand(&state); + mean += static_cast(output[i]); + } + mean /= testSize; + + double std = 0.0; + for(size_t i = 0; i < testSize; i++) + std += std::pow(output[i] - mean, 2); + + std = std::sqrt(std / testSize); + + double maxi = (double)std::numeric_limits::max(); + double eMean = 0.5 * (maxi); // 0.5(a + b) + double eStd = (maxi) / (2 * std::sqrt(3)); // (b - a) / (2*3^0.5) + + ASSERT_NEAR(mean, eMean, eMean * 0.1); + ASSERT_NEAR(std, eStd, eStd * 0.1); + + delete[] output; +} + +TEST(threefry_additional_tests, rocrand2_test) +{ + // making sure the outputs are uniformly distributed! + rocrand_state_threefry2x64_20 state; + + rocrand_init(0, 0, 0, &state); + size_t testSize = 40000; + + unsigned long long* output = new unsigned long long[testSize]; + + double mean = 0; + for(size_t i = 0; i < testSize; i += 2) + { + ulonglong2 t = rocrand2(&state); + output[i] = t.x; + output[i + 1] = t.y; + mean += static_cast(output[i]); + mean += static_cast(output[i + 1]); + } + mean /= testSize; + + double std = 0.0; + for(size_t i = 0; i < testSize; i++) + std += std::pow(output[i] - mean, 2); + + std = std::sqrt(std / testSize); + + double maxi = (double)std::numeric_limits::max(); + // min val is 0 + double eMean = 0.5 * (maxi); // 0.5(a + b) + double eStd = (maxi) / (2 * std::sqrt(3)); // (b - a) / (2*3^0.5) + + ASSERT_NEAR(mean, eMean, eMean * 0.1); + ASSERT_NEAR(std, eStd, eStd * 0.1); + + delete[] output; +} diff --git a/projects/rocrand/test/internal/test_rocrand_threefry4x32_20_prng.cpp b/projects/rocrand/test/internal/test_rocrand_threefry4x32_20_prng.cpp index 3ce1be60ea6..7a1d6fd7f1d 100644 --- a/projects/rocrand/test/internal/test_rocrand_threefry4x32_20_prng.cpp +++ b/projects/rocrand/test/internal/test_rocrand_threefry4x32_20_prng.cpp @@ -68,7 +68,8 @@ class threefry4x32_engine_type_test : public threefry4x32_20_generator::engine_t public: __host__ threefry4x32_engine_type_test() : threefry4x32_20_generator::engine_type(0, 0, 0) {} - __host__ state_type& internal_state_ref() + __host__ + state_type& internal_state_ref() { return m_state; } @@ -245,3 +246,100 @@ TEST(threefry_prng_state_tests, discard_sequence_test) EXPECT_EQ(state.counter.w, 6U); EXPECT_EQ(state.substate, 0U); } + +TEST(threefry_additional_tests, rocrand_init_test) +{ + // making sure the outputs are the same when initialized with same parameters + rocrand_state_threefry4x32_20 state1, state2; + + using ull = unsigned long long; + + ull seeds[] = {0, 123, 321, 123456, 654321}; + ull subsequences[] = {0xf, 0xff, 0x1f, 0x1ff, 0x1f1}; + ull offsets[] = {0, 1, 2, 3, 4}; + + for(int i = 0; i < 5; i++) + { + rocrand_init(seeds[i], subsequences[i], offsets[i], &state1); + rocrand_init(seeds[i], subsequences[i], offsets[i], &state2); + + for(int j = 0; j < 5000; j++) + ASSERT_EQ(rocrand(&state1), rocrand(&state2)); + } +} + +TEST(threefry_additional_tests, rocrand_test) +{ + // making sure the outputs are uniformly distributed! + rocrand_state_threefry4x32_20 state; + + rocrand_init(0, 0, 0, &state); + size_t testSize = 40000; + + unsigned int* output = new unsigned int[testSize]; + + double mean = 0; + for(size_t i = 0; i < testSize; i++) + { + output[i] = rocrand(&state); + mean += static_cast(output[i]); + } + mean /= testSize; + + double std = 0.0; + for(size_t i = 0; i < testSize; i++) + std += std::pow(output[i] - mean, 2); + + std = std::sqrt(std / testSize); + + double maxi = (double)std::numeric_limits::max(); + double eMean = 0.5 * (maxi); // 0.5(a + b) + double eStd = (maxi) / (2 * std::sqrt(3)); // (b - a) / (2*3^0.5) + + ASSERT_NEAR(mean, eMean, eMean * 0.1); + ASSERT_NEAR(std, eStd, eStd * 0.1); + + delete[] output; +} + +TEST(threefry_additional_tests, rocrand4_test) +{ + // making sure the outputs are uniformly distributed! + rocrand_state_threefry4x32_20 state; + + rocrand_init(0, 0, 0, &state); + size_t testSize = 40000; + + unsigned int* output = new unsigned int[testSize]; + + double mean = 0; + for(size_t i = 0; i < testSize; i += 4) + { + uint4 t = rocrand4(&state); + output[i] = t.w; + output[i + 1] = t.x; + output[i + 2] = t.y; + output[i + 3] = t.z; + mean += static_cast(output[i]); + mean += static_cast(output[i + 1]); + mean += static_cast(output[i + 2]); + mean += static_cast(output[i + 3]); + } + mean /= testSize; + + double std = 0.0; + for(size_t i = 0; i < testSize; i++) + std += std::pow(output[i] - mean, 2); + + std = std::sqrt(std / testSize); + + double maxi = (double)std::numeric_limits::max(); + // min val is 0 + double eMean = 0.5 * (maxi); // 0.5(a + b) + double eStd = (maxi) / (2 * std::sqrt(3)); // (b - a) / (2*3^0.5) + + ASSERT_NEAR(mean, eMean, eMean * 0.1); + ASSERT_NEAR(std, eStd, eStd * 0.1); + + delete[] output; +} diff --git a/projects/rocrand/test/internal/test_rocrand_threefry4x64_20_prng.cpp b/projects/rocrand/test/internal/test_rocrand_threefry4x64_20_prng.cpp index 280c333d242..96f95cf14e8 100644 --- a/projects/rocrand/test/internal/test_rocrand_threefry4x64_20_prng.cpp +++ b/projects/rocrand/test/internal/test_rocrand_threefry4x64_20_prng.cpp @@ -72,7 +72,8 @@ class threefry4x64_engine_type_test : public threefry4x64_20_generator::engine_t public: __host__ threefry4x64_engine_type_test() : threefry4x64_20_generator::engine_type(0, 0, 0) {} - __host__ state_type& internal_state_ref() + __host__ + state_type& internal_state_ref() { return m_state; } @@ -261,3 +262,100 @@ TEST(threefry_prng_state_tests, discard_sequence_test) EXPECT_EQ(state.counter.w, 6ULL); EXPECT_EQ(state.substate, 0U); } + +TEST(threefry_additional_tests, rocrand_init_test) +{ + // making sure the outputs are the same when initialized with same parameters + rocrand_state_threefry4x64_20 state1, state2; + + using ull = unsigned long long; + + ull seeds[] = {0, 123, 321, 123456, 654321}; + ull subsequences[] = {0xf, 0xff, 0x1f, 0x1ff, 0x1f1}; + ull offsets[] = {0, 1, 2, 3, 4}; + + for(int i = 0; i < 5; i++) + { + rocrand_init(seeds[i], subsequences[i], offsets[i], &state1); + rocrand_init(seeds[i], subsequences[i], offsets[i], &state2); + + for(int j = 0; j < 5000; j++) + ASSERT_EQ(rocrand(&state1), rocrand(&state2)); + } +} + +TEST(threefry_additional_tests, rocrand_test) +{ + // making sure the outputs are uniformly distributed! + rocrand_state_threefry4x64_20 state; + + rocrand_init(0, 0, 0, &state); + size_t testSize = 40000; + + unsigned long long* output = new unsigned long long[testSize]; + + double mean = 0; + for(size_t i = 0; i < testSize; i++) + { + output[i] = rocrand(&state); + mean += static_cast(output[i]); + } + mean /= testSize; + + double std = 0.0; + for(size_t i = 0; i < testSize; i++) + std += std::pow(output[i] - mean, 2); + + std = std::sqrt(std / testSize); + + double maxi = (double)std::numeric_limits::max(); + double eMean = 0.5 * (maxi); // 0.5(a + b) + double eStd = (maxi) / (2 * std::sqrt(3)); // (b - a) / (2*3^0.5) + + ASSERT_NEAR(mean, eMean, eMean * 0.1); + ASSERT_NEAR(std, eStd, eStd * 0.1); + + delete[] output; +} + +TEST(threefry_additional_tests, rocrand4_test) +{ + // making sure the outputs are uniformly distributed! + rocrand_state_threefry4x64_20 state; + + rocrand_init(0, 0, 0, &state); + size_t testSize = 40000; + + unsigned long long* output = new unsigned long long[testSize]; + + double mean = 0; + for(size_t i = 0; i < testSize; i += 4) + { + ulonglong4 t = rocrand4(&state); + output[i] = t.w; + output[i + 1] = t.x; + output[i + 2] = t.y; + output[i + 3] = t.z; + mean += static_cast(output[i]); + mean += static_cast(output[i + 1]); + mean += static_cast(output[i + 2]); + mean += static_cast(output[i + 3]); + } + mean /= testSize; + + double std = 0.0; + for(size_t i = 0; i < testSize; i++) + std += std::pow(output[i] - mean, 2); + + std = std::sqrt(std / testSize); + + double maxi = (double)std::numeric_limits::max(); + // min val is 0 + double eMean = 0.5 * (maxi); // 0.5(a + b) + double eStd = (maxi) / (2 * std::sqrt(3)); // (b - a) / (2*3^0.5) + + ASSERT_NEAR(mean, eMean, eMean * 0.1); + ASSERT_NEAR(std, eStd, eStd * 0.1); + + delete[] output; +} diff --git a/projects/rocrand/test/internal/test_rocrand_xorwow_prng.cpp b/projects/rocrand/test/internal/test_rocrand_xorwow_prng.cpp index 2fcee362723..dc243f59e9f 100644 --- a/projects/rocrand/test/internal/test_rocrand_xorwow_prng.cpp +++ b/projects/rocrand/test/internal/test_rocrand_xorwow_prng.cpp @@ -62,7 +62,7 @@ class xorwow_engine_type_test : public xorwow_generator::engine_type TEST(xorwow_engine_type_test, discard_test) { - const unsigned long long seed = 1234567890123ULL; + const unsigned long long seed = 1234567890123ULL; xorwow_generator::engine_type engine1(seed, 0, 678ULL); xorwow_generator::engine_type engine2(seed, 0, 677ULL); @@ -95,7 +95,7 @@ TEST(xorwow_engine_type_test, discard_test) TEST(xorwow_engine_type_test, discard_sequence_test) { - const unsigned long long seed = ~1234567890123ULL; + const unsigned long long seed = ~1234567890123ULL; xorwow_generator::engine_type engine1(seed, 0, 444ULL); xorwow_generator::engine_type engine2(seed, 123ULL, 444ULL); diff --git a/projects/rocrand/test/internal/test_uniform_distribution.cpp b/projects/rocrand/test/internal/test_uniform_distribution.cpp index e897e014466..85bfde8adc8 100644 --- a/projects/rocrand/test/internal/test_uniform_distribution.cpp +++ b/projects/rocrand/test/internal/test_uniform_distribution.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,9 +21,40 @@ #include #include +#include #include #include +#include + +#define HIP_CHECK(cmd) \ + do \ + { \ + auto error = (cmd); \ + if(error != hipSuccess) \ + { \ + std::cerr << "Encountered HIP error (" << hipGetErrorString(error) << ") at line " \ + << __LINE__ << " in file " << __FILE__ << "\n"; \ + exit(-1); \ + } \ + } \ + while(0) + +#define ROCRAND_CHECK(cmd) \ + do \ + { \ + auto status = cmd; \ + if(status != 0) \ + { \ + std::cerr << "Encountered ROCRAND error: " << status << "at line" << __LINE__ \ + << " in file " << __FILE__ << "\n"; \ + exit(status); \ + } \ + } \ + while(0) + +// If x is small then get withing 0.001 otherwise 5% +#define GET_EPS(x) x < (T)0.01 ? (T)0.001 : x*(T)0.05 using namespace rocrand_impl::host; @@ -358,3 +389,1201 @@ TEST(sobol_uniform_distribution_tests, half_test) EXPECT_GT(__half2float(output[0]), 0.0f); EXPECT_LT(__half2float(output[0]), 1e-4f); } + +template +struct NumericUD{ + + template + void run_test(UD & dis, const FuncCall & f){ + std::random_device rd; + std::mt19937 gen(rd()); + + const size_t testSize = 4000000; + + float * output = new float [testSize]; + + InType input; + OutType out; + + double mean = 0; + + for(size_t i = 0; i < testSize; i += 4){ + input = {dis(gen), dis(gen), dis(gen), dis(gen)}; + + f(input, out); + + output[i] = out.w; + output[i + 1] = out.x; + output[i + 2] = out.y; + output[i + 3] = out.z; + + ASSERT_GT(out.w, 0); + ASSERT_GT(out.x, 0); + ASSERT_GT(out.y, 0); + ASSERT_GT(out.z, 0); + + ASSERT_LE(out.w, 1); + ASSERT_LE(out.x, 1); + ASSERT_LE(out.y, 1); + ASSERT_LE(out.z, 1); + + mean += out.w + out.x + out.y + out.z; + } + + mean /= testSize; + + double std = 0.0; + for(size_t i = 0; i < testSize; i++) + std += std::pow(output[i] - mean, 2); + + std = std::sqrt(std / testSize); + + double eMean = 0.5 * (0 + 1); // 0.5(a + b) + double eStd = (1 - 0) / (2 * std::sqrt(3)); // (b - a) / (2*3^0.5) + + ASSERT_NEAR(mean, eMean, eMean * 0.1) << "Expected Mean: " << eMean << " Actual Mean: " << mean << " Eps: " << eMean * 0.1; + ASSERT_NEAR(std, eStd, eStd * 0.1) << "Expected Std: " << eStd << " Actual Std: " << std << " Eps: " << eStd * 0.1; + + delete [] output; + } + +}; + +TEST(uniform_distribution_tests, float4_uint4_in_test){ + unsigned int mini = 0; + unsigned int maxi = std::numeric_limits::max(); + std::uniform_int_distribution dis(mini, maxi); + + NumericUD> test; + test.run_test( + dis, + [] __host__ __device__ (uint4 & input, float4 & output){ + output = rocrand_device::detail::uniform_distribution4(input); + } + ); +} + +TEST(uniform_distribution_tests, float4_ulonglong4_test){ + unsigned long long mini = 0; + unsigned long long maxi = std::numeric_limits::max(); + std::uniform_int_distribution dis(mini, maxi); + + NumericUD> test; + test.run_test( + dis, + [] __host__ __device__ (ulonglong4 & input, float4 & output){ + output = rocrand_device::detail::uniform_distribution4(input); + } + ); +} + +TEST(uniform_distribution_tests, double4_uint4_test){ + unsigned int mini = 0; + unsigned int maxi = std::numeric_limits::max(); + std::uniform_int_distribution dis(mini, maxi); + + std::random_device rd; + std::mt19937 gen(rd()); + + NumericUD> test; + test.run_test( + dis, + [&] __host__ __device__ (uint4 & input, double4 & output){ + output = rocrand_device::detail::uniform_distribution_double4(input, input); + } + ); +} + +TEST(uniform_distribution_tests, double4_ulonglong4_test){ + unsigned long long mini = 0; + unsigned long long maxi = std::numeric_limits::max(); + std::uniform_int_distribution dis(mini, maxi); + + NumericUD> test; + test.run_test( + dis, + [] __host__ __device__ (ulonglong4 & input, double4 & output){ + output = rocrand_device::detail::uniform_distribution_double4(input); + } + ); +} + +TEST(uniform_distribution_tests, double2_uint4_in_test){ + unsigned int mini = 0; + unsigned int maxi = std::numeric_limits::max(); + std::uniform_int_distribution dis(mini, maxi); + + std::random_device rd; + std::mt19937 gen(rd()); + + NumericUD> test; + test.run_test( + dis, + [&] __host__ __device__ (uint4 & input, double4 & output){ + + uint4 secondInput = {dis(gen), dis(gen), dis(gen), dis(gen)}; + double2 o1 = rocrand_device::detail::uniform_distribution_double2(input); + double2 o2 = rocrand_device::detail::uniform_distribution_double2(secondInput); + + output.w = o1.x; output.x = o1.y; + output.y = o2.x; output.z = o2.y; + } + ); +} + +TEST(uniform_distribution_tests, double2_ulonglong2_in_test){ + unsigned long long mini = 0; + unsigned long long maxi = std::numeric_limits::max(); + std::uniform_int_distribution dis(mini, maxi); + + NumericUD> test; + test.run_test( + dis, + [&] __host__ __device__ (ulonglong4 & input, double4 & output){ + + ulonglong2 i1 = {input.w, input.x}; + ulonglong2 i2 = {input.y, input.z}; + + double2 o1 = rocrand_device::detail::uniform_distribution_double2(i1); + double2 o2 = rocrand_device::detail::uniform_distribution_double2(i2); + + output.w = o1.x; output.x = o1.y; + output.y = o2.x; output.z = o2.y; + } + ); +} + +TEST(uniform_distribution_tests, double2_ulonglong4_in_test){ + unsigned long long mini = 0; + unsigned long long maxi = std::numeric_limits::max(); + std::uniform_int_distribution dis(mini, maxi); + + std::random_device rd; + std::mt19937 gen(rd()); + + NumericUD> test; + test.run_test( + dis, + [&] __host__ __device__ (ulonglong4 & input, double4 & output){ + ulonglong4 secondInput = {dis(gen), dis(gen), dis(gen), dis(gen)}; + double2 o1 = rocrand_device::detail::uniform_distribution_double2(input); + double2 o2 = rocrand_device::detail::uniform_distribution_double2(secondInput); + + output.w = o1.x; output.x = o1.y; + output.y = o2.x; output.z = o2.y; + } + ); +} + +template +struct StatesUD{ + template + void run_test(const FuncCall & f, size_t testSize = 4000000){ + float * output = new float [testSize]; + OutType out; + + double mean = 0; + + for(size_t i = 0; i < testSize; i += 4){ + f(out); + + output[i] = out.w; + output[i + 1] = out.x; + output[i + 2] = out.y; + output[i + 3] = out.z; + + ASSERT_GT(out.w, 0); + ASSERT_GT(out.x, 0); + ASSERT_GT(out.y, 0); + ASSERT_GT(out.z, 0); + + ASSERT_LE(out.w, 1); + ASSERT_LE(out.x, 1); + ASSERT_LE(out.y, 1); + ASSERT_LE(out.z, 1); + + mean += out.w + out.x + out.y + out.z; + } + + mean /= testSize; + + double std = 0.0; + for(size_t i = 0; i < testSize; i++) + std += std::pow(output[i] - mean, 2); + + std = std::sqrt(std / testSize); + + double eMean = 0.5 * (0 + 1); // 0.5(a + b) + double eStd = (1 - 0) / (2 * std::sqrt(3)); // (b - a) / (2*3^0.5) + + ASSERT_NEAR(mean, eMean, eMean * 0.1) << "Expected Mean: " << eMean << " Actual Mean: " << mean << " Eps: " << eMean * 0.1; + ASSERT_NEAR(std, eStd, eStd * 0.1) << "Expected Std: " << eStd << " Actual Std: " << std << " Eps: " << eStd * 0.1; + + delete [] output; + } +}; + +TEST(uniform_distribution_tests, philox4x32_10_test){ + rocrand_state_philox4x32_10 states; + rocrand_init(123456, 654321, 0, &states); + + StatesUD testFloat; + + testFloat.run_test( + [&] __host__ __device__ (float4 & output){ + output = { + rocrand_uniform(&states), rocrand_uniform(&states), + rocrand_uniform(&states), rocrand_uniform(&states) + }; + } + ); + + testFloat.run_test( + [&] __host__ __device__ (float4 & output){ + float2 o1 = rocrand_uniform2(&states); + float2 o2 = rocrand_uniform2(&states); + + output.w = o1.x; output.x = o1.y; + output.y = o2.x; output.z = o2.y; + } + ); + + testFloat.run_test( + [&] __host__ __device__ (float4 & output){ + output = rocrand_uniform4(&states); + } + ); + + StatesUD testDouble; + + testDouble.run_test( + [&] __host__ __device__ (double4 & output){ + output = { + rocrand_uniform_double(&states), rocrand_uniform_double(&states), + rocrand_uniform_double(&states), rocrand_uniform_double(&states) + }; + } + ); + + testDouble.run_test( + [&] __host__ __device__ (double4 & output){ + double2 o1 = rocrand_uniform_double2(&states); + double2 o2 = rocrand_uniform_double2(&states); + + output.w = o1.x; output.x = o1.y; + output.y = o2.x; output.z = o2.y; + } + ); + + testDouble.run_test( + [&] __host__ __device__ (double4 & output){ + output = rocrand_uniform_double4(&states); + } + ); +} + +TEST(uniform_distribution_tests, mrg31k3p_test){ + rocrand_state_mrg31k3p states; + rocrand_init(123456, 654321, 0, &states); + + StatesUD testFloat; + + testFloat.run_test( + [&] __host__ __device__ (float4 & output){ + output = { + rocrand_uniform(&states), rocrand_uniform(&states), + rocrand_uniform(&states), rocrand_uniform(&states) + }; + } + ); + + StatesUD testDouble; + + testDouble.run_test( + [&] __host__ __device__ (double4 & output){ + output = { + rocrand_uniform_double(&states), rocrand_uniform_double(&states), + rocrand_uniform_double(&states), rocrand_uniform_double(&states) + }; + } + ); +} + +TEST(uniform_distribution_tests, mrg32k3a_test){ + rocrand_state_mrg32k3a states; + rocrand_init(123456, 654321, 0, &states); + + StatesUD testFloat; + + testFloat.run_test( + [&] __host__ __device__ (float4 & output){ + output = { + rocrand_uniform(&states), rocrand_uniform(&states), + rocrand_uniform(&states), rocrand_uniform(&states) + }; + } + ); + + StatesUD testDouble; + + testDouble.run_test( + [&] __host__ __device__ (double4 & output){ + output = { + rocrand_uniform_double(&states), rocrand_uniform_double(&states), + rocrand_uniform_double(&states), rocrand_uniform_double(&states) + }; + } + ); +} + +TEST(uniform_distribution_tests, xorwow_test){ + rocrand_state_xorwow states; + rocrand_init(123456, 654321, 0, &states); + + StatesUD testFloat; + + testFloat.run_test( + [&] __host__ __device__ (float4 & output){ + output = { + rocrand_uniform(&states), rocrand_uniform(&states), + rocrand_uniform(&states), rocrand_uniform(&states) + }; + } + ); + + StatesUD testDouble; + + testDouble.run_test( + [&] __host__ __device__ (double4 & output){ + output = { + rocrand_uniform_double(&states), rocrand_uniform_double(&states), + rocrand_uniform_double(&states), rocrand_uniform_double(&states) + }; + } + ); +} + +TEST(uniform_distribution_tests, sobol32_test){ + rocrand_state_sobol32 states; + const unsigned int* directions; + ROCRAND_CHECK(rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + rocrand_init(directions, 0, &states); + + StatesUD testFloat; + + testFloat.run_test( + [&] __host__ __device__ (float4 & output){ + output = { + rocrand_uniform(&states), rocrand_uniform(&states), + rocrand_uniform(&states), rocrand_uniform(&states) + }; + } + ); + + StatesUD testDouble; + + testDouble.run_test( + [&] __host__ __device__ (double4 & output){ + output = { + rocrand_uniform_double(&states), rocrand_uniform_double(&states), + rocrand_uniform_double(&states), rocrand_uniform_double(&states) + }; + } + ); +} + +TEST(uniform_distribution_tests, scrambled_sobol32_test){ + rocrand_state_scrambled_sobol32 states; + const unsigned int* directions; + ROCRAND_CHECK(rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + rocrand_init(directions, 123456, 0, &states); + + StatesUD testFloat; + + testFloat.run_test( + [&] __host__ __device__ (float4 & output){ + output = { + rocrand_uniform(&states), rocrand_uniform(&states), + rocrand_uniform(&states), rocrand_uniform(&states) + }; + } + ); + + StatesUD testDouble; + + testDouble.run_test( + [&] __host__ __device__ (double4 & output){ + output = { + rocrand_uniform_double(&states), rocrand_uniform_double(&states), + rocrand_uniform_double(&states), rocrand_uniform_double(&states) + }; + } + ); +} + +TEST(uniform_distribution_tests, sobol64_test){ + rocrand_state_sobol64 states; + const unsigned long long* directions; + ROCRAND_CHECK(rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + rocrand_init(directions, 0, &states); + + StatesUD testFloat; + + testFloat.run_test( + [&] __host__ __device__ (float4 & output){ + output = { + rocrand_uniform(&states), rocrand_uniform(&states), + rocrand_uniform(&states), rocrand_uniform(&states) + }; + } + ); + + StatesUD testDouble; + + testDouble.run_test( + [&] __host__ __device__ (double4 & output){ + output = { + rocrand_uniform_double(&states), rocrand_uniform_double(&states), + rocrand_uniform_double(&states), rocrand_uniform_double(&states) + }; + } + ); +} + +TEST(uniform_distribution_tests, scrambled_sobol64_test){ + rocrand_state_scrambled_sobol64 states; + const unsigned long long* directions; + ROCRAND_CHECK(rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + rocrand_init(directions, 123456, 0, &states); + + StatesUD testFloat; + + testFloat.run_test( + [&] __host__ __device__ (float4 & output){ + output = { + rocrand_uniform(&states), rocrand_uniform(&states), + rocrand_uniform(&states), rocrand_uniform(&states) + }; + } + ); + + StatesUD testDouble; + + testDouble.run_test( + [&] __host__ __device__ (double4 & output){ + output = { + rocrand_uniform_double(&states), rocrand_uniform_double(&states), + rocrand_uniform_double(&states), rocrand_uniform_double(&states) + }; + } + ); +} + +TEST(uniform_distribution_tests, lfsr113_test){ + rocrand_state_lfsr113 states; + rocrand_init(static_cast(12), 0, &states); + + StatesUD testFloat; + + testFloat.run_test( + [&] __host__ __device__ (float4 & output){ + output = { + rocrand_uniform(&states), rocrand_uniform(&states), + rocrand_uniform(&states), rocrand_uniform(&states) + }; + } + ); + + StatesUD testDouble; + + testDouble.run_test( + [&] __host__ __device__ (double4 & output){ + output = { + rocrand_uniform_double(&states), rocrand_uniform_double(&states), + rocrand_uniform_double(&states), rocrand_uniform_double(&states) + }; + } + ); +} + +TEST(uniform_distribution_tests, threefry2x32_20_test){ + rocrand_state_threefry2x32_20 states; + rocrand_init(123456, 654321, 0, & states); + + StatesUD testFloat; + + testFloat.run_test( + [&] __host__ __device__ (float4 & output){ + output = { + rocrand_uniform(&states), rocrand_uniform(&states), + rocrand_uniform(&states), rocrand_uniform(&states) + }; + } + ); + + StatesUD testDouble; + + testDouble.run_test( + [&] __host__ __device__ (double4 & output){ + output = { + rocrand_uniform_double(&states), rocrand_uniform_double(&states), + rocrand_uniform_double(&states), rocrand_uniform_double(&states) + }; + } + ); +} + +TEST(uniform_distribution_tests, threefry2x64_20_test){ + rocrand_state_threefry2x64_20 states; + rocrand_init(123456, 654321, 0, & states); + + StatesUD testFloat; + + testFloat.run_test( + [&] __host__ __device__ (float4 & output){ + output = { + rocrand_uniform(&states), rocrand_uniform(&states), + rocrand_uniform(&states), rocrand_uniform(&states) + }; + } + ); + + StatesUD testDouble; + + testDouble.run_test( + [&] __host__ __device__ (double4 & output){ + output = { + rocrand_uniform_double(&states), rocrand_uniform_double(&states), + rocrand_uniform_double(&states), rocrand_uniform_double(&states) + }; + } + ); +} + +TEST(uniform_distribution_tests, threefry4x32_20_test){ + rocrand_state_threefry4x32_20 states; + rocrand_init(123456, 654321, 0, & states); + + StatesUD testFloat; + + testFloat.run_test( + [&] __host__ __device__ (float4 & output){ + output = { + rocrand_uniform(&states), rocrand_uniform(&states), + rocrand_uniform(&states), rocrand_uniform(&states) + }; + } + ); + + StatesUD testDouble; + + testDouble.run_test( + [&] __host__ __device__ (double4 & output){ + output = { + rocrand_uniform_double(&states), rocrand_uniform_double(&states), + rocrand_uniform_double(&states), rocrand_uniform_double(&states) + }; + } + ); +} + +TEST(uniform_distribution_tests, threefry4x64_20_test){ + rocrand_state_threefry4x64_20 states; + rocrand_init(123456, 654321, 0, & states); + + StatesUD testFloat; + + testFloat.run_test( + [&] __host__ __device__ (float4 & output){ + output = { + rocrand_uniform(&states), rocrand_uniform(&states), + rocrand_uniform(&states), rocrand_uniform(&states) + }; + } + ); + + StatesUD testDouble; + + testDouble.run_test( + [&] __host__ __device__ (double4 & output){ + output = { + rocrand_uniform_double(&states), rocrand_uniform_double(&states), + rocrand_uniform_double(&states), rocrand_uniform_double(&states) + }; + } + ); +} + +template +__global__ void mtgp32_kernel (rocrand_state_mtgp32 * states, T * output, const size_t N, const UDFunction & f){ + const unsigned int state_id = blockIdx.x; + const unsigned int thread_id = threadIdx.x; + unsigned int index = blockIdx.x * blockDim.x + threadIdx.x; + + if(index >= N) + return; + + __shared__ rocrand_state_mtgp32 state; + if(thread_id == 0) + state = states[state_id]; + __syncthreads(); + + // output[index] = rocrand_uniform(&state); + output[index] = f(&state); + + if(thread_id == 0) + states[state_id] = state; +} + +TEST(uniform_distribution_tests, float_mtgp32_test){ + size_t testSize = 40192; + size_t threads = 256; + size_t blocks = std::ceil(static_cast(testSize) / static_cast(threads)); + + rocrand_state_mtgp32 * states; + size_t state_size = blocks, seed = 654321; + HIP_CHECK(hipMalloc(&states, state_size * sizeof(rocrand_state_mtgp32))); + rocrand_make_state_mtgp32(states, mtgp32dc_params_fast_11213, state_size, seed); + + float * hOut = new float[testSize]; + float * dOut; + HIP_CHECK(hipMalloc(&dOut, sizeof(float) * testSize)); + HIP_CHECK(hipDeviceSynchronize()); + + hipLaunchKernelGGL( + HIP_KERNEL_NAME(mtgp32_kernel), + dim3(blocks), + dim3(threads), + 0, + 0, + states, + dOut, + testSize, + [] __device__ (rocrand_state_mtgp32 * state){ + return rocrand_uniform(state); + } + ); + + HIP_CHECK(hipMemcpy(hOut, dOut, sizeof(float) * testSize, hipMemcpyDeviceToHost)); + + double mean = 0.0; + for(size_t i = 0; i < testSize; i++){ + ASSERT_GT(hOut[i], 0.0); + ASSERT_LE(hOut[i], 1.0); + + mean += hOut[i]; + } + + mean /= testSize; + + double std = 0.0; + for(size_t i = 0; i < testSize; i++) + std += std::pow(hOut[i] - mean, 2); + + std = std::sqrt(std / testSize); + + double eMean = 0.5 * (0 + 1); // 0.5(a + b) + double eStd = (1 - 0) / (2 * std::sqrt(3)); // (b - a) / (2*3^0.5) + + ASSERT_NEAR(mean, eMean, eMean * 0.1) << "Expected Mean: " << eMean << " Actual Mean: " << mean << " Eps: " << eMean * 0.1; + ASSERT_NEAR(std, eStd, eStd * 0.1) << "Expected Std: " << eStd << " Actual Std: " << std << " Eps: " << eStd * 0.1; + + HIP_CHECK(hipFree(states)); + HIP_CHECK(hipFree(dOut)); + + delete [] hOut; +} + +TEST(uniform_distribution_tests, double_mtgp32_test){ + size_t testSize = 40192; + size_t threads = 256; + size_t blocks = std::ceil(static_cast(testSize) / static_cast(threads)); + + rocrand_state_mtgp32 * states; + size_t state_size = blocks, seed = 654321; + HIP_CHECK(hipMalloc(&states, state_size * sizeof(rocrand_state_mtgp32))); + rocrand_make_state_mtgp32(states, mtgp32dc_params_fast_11213, state_size, seed); + + double * hOut = new double[testSize]; + double * dOut; + HIP_CHECK(hipMalloc(&dOut, sizeof(double) * testSize)); + HIP_CHECK(hipDeviceSynchronize()); + + hipLaunchKernelGGL( + HIP_KERNEL_NAME(mtgp32_kernel), + dim3(blocks), + dim3(threads), + 0, + 0, + states, + dOut, + testSize, + [] __device__ (rocrand_state_mtgp32 * state){ + return rocrand_uniform_double(state); + } + ); + + HIP_CHECK(hipMemcpy(hOut, dOut, sizeof(double) * testSize, hipMemcpyDeviceToHost)); + + double mean = 0.0; + for(size_t i = 0; i < testSize; i++){ + ASSERT_GT(hOut[i], 0.0); + ASSERT_LE(hOut[i], 1.0); + + mean += hOut[i]; + } + + mean /= testSize; + + double std = 0.0; + for(size_t i = 0; i < testSize; i++) + std += std::pow(hOut[i] - mean, 2); + + std = std::sqrt(std / testSize); + + double eMean = 0.5 * (0 + 1); // 0.5(a + b) + double eStd = (1 - 0) / (2 * std::sqrt(3)); // (b - a) / (2*3^0.5) + + ASSERT_NEAR(mean, eMean, eMean * 0.1) << "Expected Mean: " << eMean << " Actual Mean: " << mean << " Eps: " << eMean * 0.1; + ASSERT_NEAR(std, eStd, eStd * 0.1) << "Expected Std: " << eStd << " Actual Std: " << std << " Eps: " << eStd * 0.1; + + HIP_CHECK(hipFree(states)); + HIP_CHECK(hipFree(dOut)); + + delete [] hOut; +} + +/* ################################################# + + TEST HOST SIDE + + ###############################################*/ + +template +void run_host_num_test(const GenFunc& gf, const ReadMeanFunc& rmf, const ReadStdFunc& rsf) +{ + constexpr size_t test_size = 50000; + + const T expected_mean = 0.5; + const T expected_std_dev = 0.288675134595; //sqrt(1/12) + + std::vector output(test_size); + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dis( + std::numeric_limits::min(), + std::numeric_limits::max()); + + for(size_t i = 0; i < test_size; i++) + { + output[i] = gf(dis, gen); + } + + T actual_mean = std::accumulate(output.begin(), + output.end(), + (T)0, + [=](T acc, OutType x) { return acc + rmf(x); }) + / static_cast(test_size * OutSize); + + T actual_std_dev = std::accumulate(output.begin(), + output.end(), + (T)0, + [=](T acc, OutType x) { return acc + rsf(x, actual_mean); }); + actual_std_dev = std::sqrt(actual_std_dev / static_cast(test_size * OutSize - 1)); + + T mean_eps = GET_EPS(expected_mean); + T std_dev_eps = GET_EPS(expected_std_dev); + + ASSERT_NEAR(expected_mean, actual_mean, mean_eps); + ASSERT_NEAR(expected_std_dev, actual_std_dev, std_dev_eps); +} + +TEST(UniformHostTest, float_out_uint_in) +{ + run_host_num_test( + [](std::uniform_int_distribution& dis, std::mt19937& gen) + { return rocrand_device::detail::uniform_distribution((unsigned int)(dis(gen) >> 32)); }, + [](float x) { return x; }, + [](float x, float actual_mean) { return std::powf(x - actual_mean, 2); }); +} +TEST(UniformHostTest, float_out_ulonglongint_in) +{ + run_host_num_test( + [](std::uniform_int_distribution& dis, std::mt19937& gen) + { return rocrand_device::detail::uniform_distribution(dis(gen)); }, + [](float x) { return x; }, + [](float x, float actual_mean) { return std::powf(x - actual_mean, 2); }); +} + +TEST(UniformHostTest, float4_out_uint4_in) +{ + run_host_num_test( + [](std::uniform_int_distribution& dis, std::mt19937& gen) + { + unsigned long long a = dis(gen); + unsigned long long b = dis(gen); + + return rocrand_device::detail::uniform_distribution4( + uint4{a & 0xffffffff, a >> 32, b & 0xffffffff, b >> 32}); + }, + [](float4 x) { return x.w + x.x + x.y + x.z; }, + [](float4 x, float actual_mean) + { + return std::powf(x.w - actual_mean, 2) + std::powf(x.x - actual_mean, 2) + + std::powf(x.y - actual_mean, 2) + std::powf(x.z - actual_mean, 2); + }); +} + +TEST(UniformHostTest, float4_out_ulonglong4_in) +{ + run_host_num_test( + [](std::uniform_int_distribution& dis, std::mt19937& gen) + { + return rocrand_device::detail::uniform_distribution4( + ulonglong4{dis(gen), dis(gen), dis(gen), dis(gen)}); + }, + [](float4 x) { return x.w + x.x + x.y + x.z; }, + [](float4 x, float actual_mean) + { + return std::powf(x.w - actual_mean, 2) + std::powf(x.x - actual_mean, 2) + + std::powf(x.y - actual_mean, 2) + std::powf(x.z - actual_mean, 2); + }); +} + +TEST(UniformHostTest, double_out_uint_in) +{ + run_host_num_test( + [](std::uniform_int_distribution& dis, std::mt19937& gen) { + return rocrand_device::detail::uniform_distribution_double( + (unsigned int)(dis(gen) >> 32)); + }, + [](double x) { return x; }, + [](double x, double actual_mean) { return std::powf(x - actual_mean, 2); }); +} + +TEST(UniformHostTest, double_out_2uint_in) +{ + run_host_num_test( + [](std::uniform_int_distribution& dis, std::mt19937& gen) + { + unsigned long long temp = dis(gen); + return rocrand_device::detail::uniform_distribution_double( + (unsigned int)(temp >> 32), + (unsigned int)(temp & 0xffffffff)); + }, + [](double x) { return x; }, + [](double x, double actual_mean) { return std::powf(x - actual_mean, 2); }); +} + +TEST(UniformHostTest, double_out_ulonglong_in) +{ + run_host_num_test( + [](std::uniform_int_distribution& dis, std::mt19937& gen) + { return rocrand_device::detail::uniform_distribution_double(dis(gen)); }, + [](double x) { return x; }, + [](double x, double actual_mean) { return std::powf(x - actual_mean, 2); }); +} + +TEST(UniformHostTest, double2_out_uint4_in) +{ + run_host_num_test( + [](std::uniform_int_distribution& dis, std::mt19937& gen) + { + unsigned long long a = dis(gen); + unsigned long long b = dis(gen); + return rocrand_device::detail::uniform_distribution_double2( + uint4{a & 0xffffffff, a >> 32, b & 0xffffffff, b >> 32}); + }, + [](double2 x) { return x.x + x.y; }, + [](double2 x, double actual_mean) + { return std::powf(x.x - actual_mean, 2) + std::powf(x.y - actual_mean, 2); }); +} + +TEST(UniformHostTest, double4_out_2uint4_in) +{ + run_host_num_test( + + [](std::uniform_int_distribution& dis, std::mt19937& gen) + { + unsigned long long a = dis(gen); + unsigned long long b = dis(gen); + unsigned long long c = dis(gen); + unsigned long long d = dis(gen); + + return rocrand_device::detail::uniform_distribution_double4( + uint4{a & 0xffffffff, a >> 32, b & 0xffffffff, b >> 32}, + uint4{c & 0xffffffff, c >> 32, d & 0xffffffff, d >> 32}); + }, + [](double4 x) { return x.x + x.y + x.z + x.w; }, + + [](double4 x, double actual_mean) + { + return std::pow(x.x - actual_mean, 2) + std::pow(x.y - actual_mean, 2) + + std::pow(x.z - actual_mean, 2) + std::pow(x.w - actual_mean, 2); + }); +} + +TEST(UniformHostTest, double2_out_ulonglong2_in) +{ + run_host_num_test( + [](std::uniform_int_distribution& dis, std::mt19937& gen) { + return rocrand_device::detail::uniform_distribution_double2( + ulonglong2{dis(gen), dis(gen)}); + }, + [](double2 x) { return x.x + x.y; }, + + [](double2 x, double actual_mean) + { return std::pow(x.x - actual_mean, 2) + std::pow(x.y - actual_mean, 2); }); +} + +TEST(UniformHostTest, double2_out_ulonglong4_in) +{ + run_host_num_test( + [](std::uniform_int_distribution& dis, std::mt19937& gen) + { + return rocrand_device::detail::uniform_distribution_double2( + ulonglong4{dis(gen), dis(gen), dis(gen), dis(gen)}); + }, + [](double2 x) { return x.x + x.y; }, + + [](double2 x, double actual_mean) + { return std::pow(x.x - actual_mean, 2) + std::pow(x.y - actual_mean, 2); }); +} + +TEST(UniformHostTest, double4_out_ulonglong4_in) +{ + run_host_num_test( + [](std::uniform_int_distribution& dis, std::mt19937& gen) + { + return rocrand_device::detail::uniform_distribution_double4( + ulonglong4{dis(gen), dis(gen), dis(gen), dis(gen)}); + }, + [](double4 x) { return x.x + x.y + x.z + x.w; }, + + [](double4 x, double actual_mean) + { + return std::pow(x.x - actual_mean, 2) + std::pow(x.y - actual_mean, 2) + + std::pow(x.z - actual_mean, 2) + std::pow(x.w - actual_mean, 2); + }); +} + +template +inline void GetRocrandState(RocrandPRNGType* host_state) +{ + if constexpr(std::is_same_v) + { + const unsigned int* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + rocrand_init(directions, 123456, host_state); + } + // scrambled sobol32 case + else if constexpr(std::is_same_v) + { + const unsigned int* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors32(&directions, ROCRAND_DIRECTION_VECTORS_32_JOEKUO6)); + rocrand_init(directions, 123456, 654321, host_state); + } + // sobol64 case + else if constexpr(std::is_same_v) + { + const unsigned long long* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + rocrand_init(directions, 123456, host_state); + } + // scrambled sobol64 case + else if constexpr(std::is_same_v) + { + const unsigned long long* directions; + ROCRAND_CHECK( + rocrand_get_direction_vectors64(&directions, ROCRAND_DIRECTION_VECTORS_64_JOEKUO6)); + rocrand_init(directions, 123456, 654321, host_state); + } + // lfsr113 case + else if constexpr(std::is_same_v) + { + rocrand_init({0xabcd, 0xdabc, 0xcdab, 0xbcda}, 0, 0, host_state); + } + else + { + rocrand_init(123456, 654321, 0, host_state); + } +} + +template +struct HostParams +{ + using out_type = OutputType; + using rng = RocrandPRNGType; + static constexpr size_t out_size = OutSize; +}; + +using UniformParams = ::testing::Types, + HostParams, + HostParams, + HostParams, + HostParams, + HostParams, + HostParams, + HostParams, + HostParams, + HostParams, + HostParams, + HostParams, + HostParams, + HostParams, + HostParams, + HostParams, + HostParams, + HostParams, + HostParams, + HostParams, + HostParams, + HostParams, + HostParams, + HostParams, + HostParams, + HostParams, + HostParams, + HostParams, + HostParams, + HostParams>; + +template +void run_host_prng_test(const UniformFunc& lnf, const ReadMeanFunc& rmf, ReadStdFunc& rsf) +{ + constexpr size_t test_size = 50000; + const T expected_mean = 0.5; + const T expected_std_dev = 0.288675134595; //sqrt(1/12) + + RocrandPRNGType generator; + GetRocrandState(&generator); + + std::vector output(test_size); + + for(size_t i = 0; i < test_size; i++) + { + output[i] = lnf(&generator); + } + + T actual_mean = std::accumulate(output.begin(), + output.end(), + (T)0, + [=](T acc, OutputType x) { return acc + rmf(x); }) + / static_cast(test_size * OutSize); + + T actual_std_dev + = std::accumulate(output.begin(), + output.end(), + (T)0, + [=](T acc, OutputType x) { return acc + rsf(x, actual_mean); }); + actual_std_dev = std::sqrt(actual_std_dev / static_cast(test_size * OutSize - 1)); + + T mean_eps = GET_EPS(expected_mean); + T std_dev_eps = GET_EPS(expected_std_dev); + + ASSERT_NEAR(expected_mean, actual_mean, mean_eps); + ASSERT_NEAR(expected_std_dev, actual_std_dev, std_dev_eps); +} + +template +class UniformRocRandStateHostTest : public ::testing::Test +{ +public: + using out_type = typename HostParams::out_type; + using prng_type = typename HostParams::rng; + static constexpr size_t out_size = HostParams::out_size; +}; + +TYPED_TEST_SUITE(UniformRocRandStateHostTest, UniformParams); +TYPED_TEST(UniformRocRandStateHostTest, rocrand_state_tests) +{ + using out_type = typename TestFixture::out_type; + using rocrand_state = typename TestFixture::prng_type; + constexpr size_t out_size = TestFixture::out_size; + using T + = std::conditional_t<(std::is_same_v || std::is_same_v + || std::is_same_v), + float, + double>; + + if constexpr(out_size == 1) + { + auto mean_func = [](out_type x) { return x; }; + auto std_dev_func + = [](out_type x, out_type actual_mean) { return std::powf(x - actual_mean, 2); }; + if constexpr(std::is_same_v) + { + run_host_prng_test( + [=](rocrand_state* state) { return rocrand_uniform(state); }, + mean_func, + std_dev_func); + } + else + { + run_host_prng_test( + [=](rocrand_state* state) { return rocrand_uniform_double(state); }, + mean_func, + std_dev_func); + } + } + else if constexpr(out_size == 2) + { + auto mean_func = [](out_type x) { return x.x + x.y; }; + auto std_dev_func = [](out_type x, T actual_mean) + { return std::powf(x.x - actual_mean, 2) + std::powf(x.y - actual_mean, 2); }; + + if constexpr(std::is_same_v) + { + run_host_prng_test( + [=](rocrand_state* state) { return rocrand_uniform2(state); }, + mean_func, + std_dev_func); + } + else + { + run_host_prng_test( + [=](rocrand_state* state) { return rocrand_uniform_double2(state); }, + mean_func, + std_dev_func); + } + } + else + { + auto mean_func = [](out_type x) { return x.x + x.y + x.w + x.z; }; + auto std_dev_func = [](out_type x, T actual_mean) + { + return std::powf(x.x - actual_mean, 2) + std::powf(x.y - actual_mean, 2) + + std::powf(x.w - actual_mean, 2) + std::powf(x.z - actual_mean, 2); + }; + + if constexpr(std::is_same_v) + { + run_host_prng_test( + [=](rocrand_state* state) { return rocrand_uniform4(state); }, + mean_func, + std_dev_func); + } + else + { + run_host_prng_test( + [=](rocrand_state* state) { return rocrand_uniform_double4(state); }, + mean_func, + std_dev_func); + } + } +}