diff --git a/src/chain/chain-kernels.cu b/src/chain/chain-kernels.cu index 640040c60f3..1a1bc2f3bcf 100644 --- a/src/chain/chain-kernels.cu +++ b/src/chain/chain-kernels.cu @@ -20,21 +20,31 @@ #include #include "chain/chain-kernels-ansi.h" -template -__device__ inline void atomic_add(Real* address, Real value) { - atomicAdd(address, value); -} +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 200 +#error - Kaldi no longer supports CC1.x devices. Please use a newer GPU or \ + configure with --use-cuda=no (this will disable the use of GPU). +#endif -template<> -__device__ inline void atomic_add(double* address, double val) { - unsigned long long int* address_as_ull = - reinterpret_cast(address); +#if __CUDA_ARCH__ < 600 +__device__ double atomicAdd(double* address, double val) { + unsigned long long int* address_as_ull = (unsigned long long int*) address; unsigned long long int old = *address_as_ull, assumed; + do { assumed = old; old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed))); + + // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) } while (assumed != old); + + return __longlong_as_double(old); +} +#endif + +template +__device__ inline void atomic_add(Real* address, Real value) { + atomicAdd(address, value); } template diff --git a/src/feat/feature-plp.cc b/src/feat/feature-plp.cc index 0034027cbe6..719e55dd6da 100644 --- a/src/feat/feature-plp.cc +++ b/src/feat/feature-plp.cc @@ -125,7 +125,7 @@ void PlpComputer::Compute(BaseFloat signal_log_energy, if (opts_.use_energy && !opts_.raw_energy) signal_log_energy = Log(std::max(VecVec(*signal_frame, *signal_frame), - std::numeric_limits::min())); + std::numeric_limits::min())); if (srfft_ != NULL) // Compute FFT using split-radix algorithm. srfft_->Compute(signal_frame->Data(), true); diff --git a/src/feat/feature-window.cc b/src/feat/feature-window.cc index 2726462d22c..65c0a2a29c3 100644 --- a/src/feat/feature-window.cc +++ b/src/feat/feature-window.cc @@ -140,7 +140,7 @@ void ProcessWindow(const FrameExtractionOptions &opts, if (log_energy_pre_window != NULL) { BaseFloat energy = std::max(VecVec(*window, *window), - std::numeric_limits::epsilon()); + std::numeric_limits::epsilon()); *log_energy_pre_window = Log(energy); } diff --git a/src/util/kaldi-holder.cc b/src/util/kaldi-holder.cc index ee7dd66e922..a26bdf2ce29 100644 --- a/src/util/kaldi-holder.cc +++ b/src/util/kaldi-holder.cc @@ -72,8 +72,8 @@ bool ExtractObjectRange(const Matrix &input, const std::string &range, // template instantiation template bool ExtractObjectRange(const Matrix &, const std::string &, Matrix *); -template bool ExtractObjectRange(const Matrix &, const std::string &, - Matrix *); +template bool ExtractObjectRange(const Matrix &, const std::string &, + Matrix *); bool ExtractRangeSpecifier(const std::string &rxfilename_with_range, std::string *data_rxfilename,