Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion clients/bench/bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,11 @@ int main(int argc, char* argv[])
"Type of transform:\n0) complex forward\n1) complex inverse\n2) real "
"forward\n3) real inverse")
->default_val(fft_transform_type_complex_forward);

non_token
->add_option("--auto_allocation",
params.auto_allocate,
"rocFFT's auto-allocation behavior: \"on\", \"off\", or \"default\"")
->default_val("default");
non_token
->add_option(
"--precision", params.precision, "Transform precision: single (default), double, half")
Expand Down
5 changes: 5 additions & 0 deletions clients/tests/gtest_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,11 @@ int main(int argc, char* argv[])
"Type of transform:\n0) complex forward\n1) complex inverse\n2) real "
"forward\n3) real inverse")
->default_val(fft_transform_type_complex_forward);
non_token
->add_option("--auto_allocation",
manual_params.auto_allocate,
"rocFFT's auto-allocation behavior: \"on\", \"off\", or \"default\"")
->default_val("default");
non_token
->add_option("--precision",
manual_params.precision,
Expand Down
4 changes: 2 additions & 2 deletions shared/accuracy_test.h
Original file line number Diff line number Diff line change
Expand Up @@ -569,7 +569,7 @@ inline void run_round_trip_inverse(Tparams& params,
catch(fft_params::work_buffer_alloc_failure& e)
{
std::stringstream ss;
ss << "Failed to allocate work buffer (size: " << params.workbuffersize << ")";
ss << "Failed to allocate work buffer (size: " << e.attempted_size << ")";
++n_hip_failures;
if(skip_runtime_fails)
{
Expand Down Expand Up @@ -770,7 +770,7 @@ inline void fft_vs_reference_impl(Tparams& params, bool round_trip)
{
++n_hip_failures;
std::stringstream ss;
ss << "Work buffer allocation failed with size: " << params.workbuffersize;
ss << "Work buffer allocation failed with size: " << e.attempted_size;
if(skip_runtime_fails)
{
throw ROCFFT_SKIP{ss.str()};
Expand Down
77 changes: 62 additions & 15 deletions shared/fft_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ enum fft_precision
fft_precision_double,
};

// Used for CLI11 parsing of input gen enum
// Used for CLI11 parsing of precision enum
static bool lexical_cast(const std::string& word, fft_precision& precision)
{
if(word == "half")
Expand All @@ -88,6 +88,28 @@ static bool lexical_cast(const std::string& word, fft_precision& precision)
return true;
}

enum fft_auto_allocation
{
fft_auto_allocation_on,
fft_auto_allocation_off,
fft_auto_allocation_default
};

// Used for CLI11 parsing of auto-allocation enum
static bool lexical_cast(const std::string& word, fft_auto_allocation& auto_allocation)
{
if(word == "on")
auto_allocation = fft_auto_allocation_on;
else if(word == "off")
auto_allocation = fft_auto_allocation_off;
else if(word == "default")
auto_allocation = fft_auto_allocation_default;
else
throw std::runtime_error(
"Invalid auto-allocation behavior specified (choose \"on\", \"off\", or \"default\")");
return true;
}

// fft_input_generator: linearly spaced sequence in [-0.5,0.5]
// fft_input_random_generator: pseudo-random sequence in [-0.5,0.5]
enum fft_input_generator
Expand Down Expand Up @@ -491,7 +513,7 @@ class fft_params
fft_input_generator igen = fft_input_random_generator_host;
#endif

size_t workbuffersize = 0;
fft_auto_allocation auto_allocate = fft_auto_allocation_default;

enum fft_mp_lib
{
Expand Down Expand Up @@ -1063,6 +1085,12 @@ class fft_params
ret += std::to_string(multiGPU);
}

if(auto_allocate != fft_auto_allocation_default)
{
ret += "_autoallocation_";
ret += (auto_allocate == fft_auto_allocation_on ? "on" : "off");
}

return ret;
}

Expand Down Expand Up @@ -1222,6 +1250,13 @@ class fft_params
++pos;
multiGPU = std::stoull(vals[pos++]);
}

auto_allocate = fft_auto_allocation_default; // default if unspecified
if(pos < vals.size() && vals[pos] == "autoallocation")
{
++pos;
lexical_cast(vals[pos++], auto_allocate);
}
}

// Stream output operator (for gtest, etc).
Expand Down Expand Up @@ -2271,7 +2306,18 @@ class fft_params
// Tests that hit this can't fit on the GPU and should be skipped.
struct work_buffer_alloc_failure : public std::runtime_error
{
work_buffer_alloc_failure(const std::string& s)
const size_t attempted_size;
work_buffer_alloc_failure(const std::string& s, size_t _attempted_size = 0)
: std::runtime_error(s)
, attempted_size(_attempted_size)
{
}
};

// Specific exception type for unimplemented feature(s).
struct unimplemented_exception : public std::runtime_error
{
unimplemented_exception(const std::string& s)
: std::runtime_error(s)
{
}
Expand All @@ -2297,18 +2343,19 @@ class fft_params
throw std::runtime_error("Transform type not forward.");
}

length = params_forward.length;
istride = params_forward.ostride;
ostride = params_forward.istride;
nbatch = params_forward.nbatch;
precision = params_forward.precision;
placement = params_forward.placement;
idist = params_forward.odist;
odist = params_forward.idist;
itype = params_forward.otype;
otype = params_forward.itype;
ioffset = params_forward.ooffset;
ooffset = params_forward.ioffset;
length = params_forward.length;
istride = params_forward.ostride;
ostride = params_forward.istride;
nbatch = params_forward.nbatch;
precision = params_forward.precision;
placement = params_forward.placement;
idist = params_forward.odist;
odist = params_forward.idist;
itype = params_forward.otype;
otype = params_forward.itype;
ioffset = params_forward.ooffset;
ooffset = params_forward.ioffset;
auto_allocate = params_forward.auto_allocate;

run_callbacks = params_forward.run_callbacks;
multiGPU = params_forward.multiGPU;
Expand Down
54 changes: 39 additions & 15 deletions shared/params_gen.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,27 @@ inline double hash_prob(const int seed, const std::string& token)
return roll;
}

template <typename T, std::enable_if_t<std::is_arithmetic_v<T>, bool> = true>
std::vector<T> merge_and_sort_values(const std::vector<std::vector<T>>& set_of_vecs,
size_t max_num_elem = std::numeric_limits<size_t>::max())
{
std::vector<T> merged;
for(const auto& vec : set_of_vecs)
{
std::copy(vec.begin(), vec.end(), std::back_inserter(merged));
}
std::sort(merged.begin(), merged.end());
auto last_unique = std::unique(merged.begin(), merged.end());
merged.erase(last_unique, merged.end());
std::ranlux24_base gen(random_seed);
while(merged.size() > max_num_elem)
{
// remove pseudo-randomly chosen elements
merged.erase(merged.begin() + (static_cast<size_t>(gen()) % merged.size()));
}
return merged;
}

// Given a vector of vector of lengths, generate all unique permutations.
// Add an optional vector of ad-hoc lengths to the result.
inline std::vector<std::vector<size_t>>
Expand Down Expand Up @@ -248,7 +269,8 @@ inline auto param_generator_base(const double base_p
const std::vector<std::vector<size_t>>& ooffset_range,
const std::vector<fft_result_placement>& place_range,
const bool planar = true,
const bool run_callbacks = false)
const bool run_callbacks = false,
const fft_auto_allocation auto_alloc = fft_auto_allocation_default)
{
std::vector<fft_params> params;

Expand Down Expand Up @@ -300,6 +322,7 @@ inline auto param_generator_base(const double base_p
param.otype = std::get<3>(types);
param.ioffset = ioffset;
param.ooffset = ooffset;
param.auto_allocate = auto_alloc;

if(run_callbacks)
{
Expand Down Expand Up @@ -369,8 +392,8 @@ inline auto param_generator(const double base_prob,
const std::vector<std::vector<size_t>>& ooffset_range,
const std::vector<fft_result_placement>& place_range,
const bool planar,

const bool run_callbacks = false)
const bool run_callbacks = false,
const fft_auto_allocation auto_alloc = fft_auto_allocation_default)
{
return param_generator_base(base_prob,
trans_type_range,
Expand All @@ -383,9 +406,9 @@ inline auto param_generator(const double base_prob,
ioffset_range,
ooffset_range,
place_range,

planar,
run_callbacks);
run_callbacks,
auto_alloc);
}

// Create an array of parameters to pass to gtest. Only tests complex-type transforms
Expand All @@ -398,9 +421,10 @@ inline auto param_generator_complex(const double bas
const std::vector<std::vector<size_t>>& ioffset_range,
const std::vector<std::vector<size_t>>& ooffset_range,
const std::vector<fft_result_placement>& place_range,

const bool planar,
const bool run_callbacks = false)
const bool planar,
const bool run_callbacks = false,
const fft_auto_allocation auto_alloc
= fft_auto_allocation_default)
{
return param_generator_base(base_prob,
trans_type_range_complex,
Expand All @@ -413,9 +437,9 @@ inline auto param_generator_complex(const double bas
ioffset_range,
ooffset_range,
place_range,

planar,
run_callbacks);
run_callbacks,
auto_alloc);
}

// Create an array of parameters to pass to gtest.
Expand All @@ -428,9 +452,9 @@ inline auto param_generator_real(const double base_p
const std::vector<std::vector<size_t>>& ioffset_range,
const std::vector<std::vector<size_t>>& ooffset_range,
const std::vector<fft_result_placement>& place_range,

const bool planar,
const bool run_callbacks = false)
const bool planar,
const bool run_callbacks = false,
const fft_auto_allocation auto_alloc = fft_auto_allocation_default)
{
return param_generator_base(base_prob,
trans_type_range_real,
Expand All @@ -443,9 +467,9 @@ inline auto param_generator_real(const double base_p
ioffset_range,
ooffset_range,
place_range,

planar,
run_callbacks);
run_callbacks,
auto_alloc);
}

template <class Tcontainer>
Expand Down
6 changes: 4 additions & 2 deletions shared/rocfft_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ class rocfft_params_base : public fft_params
rocfft_execution_info info = nullptr;
rocfft_plan_description desc = nullptr;
gpubuf_t<void> wbuffer;
size_t workbuffersize = 0;

explicit rocfft_params_base() = default;

Expand Down Expand Up @@ -410,7 +411,8 @@ class rocfft_params_base : public fft_params
{
return ret;
}
if(workbuffersize > 0)
// default behavior is to feed rocfft with a work area if it needs one
if(workbuffersize > 0 && auto_allocate != fft_auto_allocation_on)
{
hipError_t hip_status = hipSuccess;
hip_status = wbuffer.alloc(workbuffersize);
Expand All @@ -429,7 +431,7 @@ class rocfft_params_base : public fft_params
{
oss << "hipMemGetInfo also failed";
}
throw work_buffer_alloc_failure(oss.str());
throw work_buffer_alloc_failure(oss.str(), workbuffersize);
}

auto rocret
Expand Down