Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
419 changes: 381 additions & 38 deletions clients/hipfft_params.h

Large diffs are not rendered by default.

35 changes: 26 additions & 9 deletions clients/tests/accuracy_test_1D.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,8 @@ static std::vector<size_t> small_1D_sizes()
static const size_t SMALL_1D_MAX = 8192;

// generate a list of sizes from 2 and up, skipping any sizes that are already covered
std::vector<size_t> covered_sizes;
std::copy(pow2_range.begin(), pow2_range.end(), std::back_inserter(covered_sizes));
std::copy(pow3_range.begin(), pow3_range.end(), std::back_inserter(covered_sizes));
std::copy(pow5_range.begin(), pow5_range.end(), std::back_inserter(covered_sizes));
std::copy(radX_range.begin(), radX_range.end(), std::back_inserter(covered_sizes));
std::copy(mix_range.begin(), mix_range.end(), std::back_inserter(covered_sizes));
std::copy(prime_range.begin(), prime_range.end(), std::back_inserter(covered_sizes));
std::sort(covered_sizes.begin(), covered_sizes.end());
std::vector<size_t> covered_sizes = merge_and_sort_values<size_t>(
{pow2_range, pow3_range, pow5_range, radX_range, mix_range, prime_range});

std::vector<size_t> output;
for(size_t i = 2; i < SMALL_1D_MAX; ++i)
Expand Down Expand Up @@ -319,12 +313,14 @@ INSTANTIATE_TEST_SUITE_P(DISABLED_offset_mix_1D,

// small 1D sizes just need to make sure our factorization isn't
// completely broken, so we just check simple C2C outplace interleaved
const static std::vector<size_t> small_1D_lengths = small_1D_sizes();

INSTANTIATE_TEST_SUITE_P(
small_1D,
accuracy_test,
::testing::ValuesIn(param_generator_base(test_prob,
{fft_transform_type_complex_forward},
generate_lengths({small_1D_sizes()}),
generate_lengths({small_1D_lengths}),
{fft_precision_single},
{1},
generate_types,
Expand Down Expand Up @@ -530,3 +526,24 @@ INSTANTIATE_TEST_SUITE_P(
ooffset_range_zero,
place_range)),
accuracy_test::TestName);

const static std::vector<size_t> lengths_for_disabled_autoalloc = merge_and_sort_values<size_t>(
{pow2_range, pow3_range, pow5_range, radX_range, mix_range, small_1D_lengths, prime_range},
128);

INSTANTIATE_TEST_SUITE_P(
various_1D,
accuracy_test,
::testing::ValuesIn(param_generator(test_prob,
generate_lengths({lengths_for_disabled_autoalloc}),
precision_range_sp_dp,
batch_range_1D,
stride_range,
stride_range,
ioffset_range_zero,
ooffset_range_zero,
place_range,
false,
false,
fft_auto_allocation_off)),
accuracy_test::TestName);
21 changes: 21 additions & 0 deletions clients/tests/accuracy_test_2D.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -278,3 +278,24 @@ INSTANTIATE_TEST_SUITE_P(len1_swap_2D,
false,
false)),
accuracy_test::TestName);

const static std::vector<size_t> lengths_for_disabled_autoalloc
= merge_and_sort_values<size_t>({pow2_range, pow3_range, prime_range, mix_range}, 12);

INSTANTIATE_TEST_SUITE_P(
various_2D,
accuracy_test,
::testing::ValuesIn(param_generator(test_prob,
generate_lengths({lengths_for_disabled_autoalloc,
lengths_for_disabled_autoalloc}),
precision_range_sp_dp,
batch_range,
stride_range,
stride_range,
ioffset_range_zero,
ooffset_range_zero,
place_range,
false,
false,
fft_auto_allocation_off)),
accuracy_test::TestName);
22 changes: 22 additions & 0 deletions clients/tests/accuracy_test_3D.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,3 +284,25 @@ INSTANTIATE_TEST_SUITE_P(
false,
false)),
accuracy_test::TestName);

const static std::vector<size_t> lengths_for_disabled_autoalloc = merge_and_sort_values<size_t>(
{pow2_range, pow3_range, pow5_range, prime_range, sbrc_range}, 5);

INSTANTIATE_TEST_SUITE_P(
various_3D,
accuracy_test,
::testing::ValuesIn(param_generator(test_prob,
generate_lengths({lengths_for_disabled_autoalloc,
lengths_for_disabled_autoalloc,
lengths_for_disabled_autoalloc}),
precision_range_sp_dp,
batch_range,
stride_range,
stride_range,
ioffset_range_zero,
ooffset_range_zero,
place_range,
false,
false,
fft_auto_allocation_off)),
accuracy_test::TestName);
8 changes: 8 additions & 0 deletions clients/tests/gtest_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@
#include "hipfft_accuracy_test.h"
#include "hipfft_test_params.h"

// initialize static class member of hipfft_params
std::vector<gpubuf> hipfft_params::externally_managed_workareas = std::vector<gpubuf>();

// Control output verbosity:
int verbose;

Expand Down Expand Up @@ -340,6 +343,11 @@ int main(int argc, char* argv[])
non_token->add_flag("--callback", "Inject load/store callbacks")->each([&](const std::string&) {
manual_params.run_callbacks = true;
});
non_token
->add_option("--auto_allocation",
manual_params.auto_allocate,
"Backend library's auto-allocation behavior: \"on\", \"off\", or \"default\"")
->default_val("default");
non_token
->add_flag("--double", "Double precision transform (deprecated: use --precision double)")
->each([&](const std::string&) { manual_params.precision = fft_precision_double; });
Expand Down
18 changes: 15 additions & 3 deletions clients/tests/multi_device_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@ enum SplitType
PENCIL_3D,
};

std::vector<fft_params> param_generator_multi_gpu(const std::optional<SplitType> type)
std::vector<fft_params> param_generator_multi_gpu(const std::optional<SplitType> type,
fft_auto_allocation auto_alloc_setting
= fft_auto_allocation_default)
{
int localDeviceCount = 0;
(void)hipGetDeviceCount(&localDeviceCount);
Expand All @@ -80,7 +82,9 @@ std::vector<fft_params> param_generator_multi_gpu(const std::optional<SplitType>
ioffset_range_zero,
ooffset_range_zero,
place_range,
false);
false,
false,
auto_alloc_setting);

auto params_real = param_generator_real(test_prob,
multi_gpu_sizes,
Expand All @@ -91,7 +95,9 @@ std::vector<fft_params> param_generator_multi_gpu(const std::optional<SplitType>
ioffset_range_zero,
ooffset_range_zero,
{fft_placement_notinplace},
false);
false,
false,
auto_alloc_setting);

std::vector<fft_params> all_params;

Expand Down Expand Up @@ -229,3 +235,9 @@ INSTANTIATE_TEST_SUITE_P(multi_gpu,
accuracy_test,
::testing::ValuesIn(param_generator_multi_gpu({})),
accuracy_test::TestName);

INSTANTIATE_TEST_SUITE_P(DISABLED_multi_gpu,
accuracy_test,
::testing::ValuesIn(param_generator_multi_gpu({},
fft_auto_allocation_off)),
accuracy_test::TestName);
58 changes: 17 additions & 41 deletions library/src/amd_detail/hipfft.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,6 @@
#include "../../../shared/ptrdiff.h"
#include "../../../shared/rocfft_hip.h"

#define ROC_FFT_CHECK_ALLOC_FAILED(ret) \
{ \
auto code = ret; \
if(code != rocfft_status_success) \
{ \
return HIPFFT_ALLOC_FAILED; \
} \
}

#define ROC_FFT_CHECK_INVALID_VALUE(ret) \
{ \
auto code = ret; \
Expand Down Expand Up @@ -410,31 +401,6 @@ catch(...)
return handle_exception();
}

hipfftResult hipfftPlanMany64(hipfftHandle* plan,
int rank,
long long int* n,
long long int* inembed,
long long int istride,
long long int idist,
long long int* onembed,
long long int ostride,
long long int odist,
hipfftType type,
long long int batch)
try
{
hipfftHandle handle = nullptr;
HIP_FFT_CHECK_AND_RETURN(hipfftCreate(&handle));
*plan = handle;

return hipfftMakePlanMany64(
*plan, rank, n, inembed, istride, idist, onembed, ostride, odist, type, batch, nullptr);
}
catch(...)
{
return handle_exception();
}

hipfftResult hipfftMakePlan_internal(hipfftHandle plan,
size_t dim,
size_t* lengths,
Expand Down Expand Up @@ -1262,6 +1228,7 @@ try

hipfftHandle p;
HIP_FFT_CHECK_AND_RETURN(hipfftCreate(&p));
p->autoAllocate = false;
HIP_FFT_CHECK_AND_RETURN(hipfftMakePlan1d(p, nx, type, batch, workSize));
HIP_FFT_CHECK_AND_RETURN(hipfftDestroy(p));

Expand All @@ -1282,6 +1249,7 @@ try

hipfftHandle p;
HIP_FFT_CHECK_AND_RETURN(hipfftCreate(&p));
p->autoAllocate = false;
HIP_FFT_CHECK_AND_RETURN(hipfftMakePlan2d(p, nx, ny, type, workSize));
HIP_FFT_CHECK_AND_RETURN(hipfftDestroy(p));

Expand All @@ -1303,6 +1271,7 @@ try

hipfftHandle p;
HIP_FFT_CHECK_AND_RETURN(hipfftCreate(&p));
p->autoAllocate = false;
HIP_FFT_CHECK_AND_RETURN(hipfftMakePlan3d(p, nx, ny, nz, type, workSize));
HIP_FFT_CHECK_AND_RETURN(hipfftDestroy(p));

Expand All @@ -1327,10 +1296,13 @@ hipfftResult hipfftGetSizeMany(hipfftHandle plan,
size_t* workSize)
try
{
hipfftHandle p;
HIP_FFT_CHECK_AND_RETURN(
hipfftPlanMany(&p, rank, n, inembed, istride, idist, onembed, ostride, odist, type, batch));
*workSize = p->workBufferSize;
if(workSize == nullptr)
return HIPFFT_INVALID_VALUE;
hipfftHandle p = nullptr;
HIP_FFT_CHECK_AND_RETURN(hipfftCreate(&p));
p->autoAllocate = false;
HIP_FFT_CHECK_AND_RETURN(hipfftMakePlanMany(
p, rank, n, inembed, istride, idist, onembed, ostride, odist, type, batch, workSize));
HIP_FFT_CHECK_AND_RETURN(hipfftDestroy(p));

return HIPFFT_SUCCESS;
Expand All @@ -1354,10 +1326,13 @@ hipfftResult hipfftGetSizeMany64(hipfftHandle plan,
size_t* workSize)
try
{
if(workSize == nullptr)
return HIPFFT_INVALID_VALUE;
hipfftHandle p = nullptr;
HIP_FFT_CHECK_AND_RETURN(hipfftPlanMany64(
&p, rank, n, inembed, istride, idist, onembed, ostride, odist, type, batch));
*workSize = p->workBufferSize;
HIP_FFT_CHECK_AND_RETURN(hipfftCreate(&p));
p->autoAllocate = false;
HIP_FFT_CHECK_AND_RETURN(hipfftMakePlanMany64(
p, rank, n, inembed, istride, idist, onembed, ostride, odist, type, batch, workSize));
HIP_FFT_CHECK_AND_RETURN(hipfftDestroy(p));

return HIPFFT_SUCCESS;
Expand Down Expand Up @@ -1831,6 +1806,7 @@ try

hipfftHandle p;
HIP_FFT_CHECK_AND_RETURN(hipfftCreate(&p));
p->autoAllocate = false;

HIP_FFT_CHECK_AND_RETURN(hipfftMakePlanMany_internal(
p, rank, n, inembed, istride, idist, onembed, ostride, odist, iotype, batch, workSize));
Expand Down
4 changes: 2 additions & 2 deletions shared/accuracy_test.h
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,7 @@ inline void run_round_trip_inverse(Tparams& params,
catch(fft_params::work_buffer_alloc_failure& e)
{
std::stringstream ss;
ss << "Failed to allocate work buffer (size: " << params.workbuffersize << ")";
ss << "Failed to allocate work buffer (size: " << e.attempted_size << ")";
++n_hip_failures;
if(skip_runtime_fails)
{
Expand Down Expand Up @@ -768,7 +768,7 @@ inline void fft_vs_reference_impl(Tparams& params, bool round_trip)
{
++n_hip_failures;
std::stringstream ss;
ss << "Work buffer allocation failed with size: " << params.workbuffersize;
ss << "Work buffer allocation failed with size: " << e.attempted_size;
if(skip_runtime_fails)
{
throw ROCFFT_SKIP{ss.str()};
Expand Down
Loading