Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 18 additions & 8 deletions src/chain/chain-kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,31 @@
#include <cfloat>
#include "chain/chain-kernels-ansi.h"

template <typename Real>
__device__ inline void atomic_add(Real* address, Real value) {
atomicAdd(address, value);
}
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 200
#error - Kaldi no longer supports CC1.x devices. Please use a newer GPU or \
configure with --use-cuda=no (this will disable the use of GPU).
#endif

template<>
__device__ inline void atomic_add(double* address, double val) {
unsigned long long int* address_as_ull =
reinterpret_cast<unsigned long long int*>(address);
#if __CUDA_ARCH__ < 600
__device__ double atomicAdd(double* address, double val) {
unsigned long long int* address_as_ull = (unsigned long long int*) address;
unsigned long long int old = *address_as_ull, assumed;

do {
assumed = old;
old = atomicCAS(address_as_ull, assumed,
__double_as_longlong(val + __longlong_as_double(assumed)));

// Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
} while (assumed != old);

return __longlong_as_double(old);
}
#endif

template <typename Real>
__device__ inline void atomic_add(Real* address, Real value) {
atomicAdd(address, value);
}

template <typename Real>
Expand Down
2 changes: 1 addition & 1 deletion src/feat/feature-plp.cc
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ void PlpComputer::Compute(BaseFloat signal_log_energy,

if (opts_.use_energy && !opts_.raw_energy)
signal_log_energy = Log(std::max(VecVec(*signal_frame, *signal_frame),
std::numeric_limits<float>::min()));
std::numeric_limits<BaseFloat>::min()));

if (srfft_ != NULL) // Compute FFT using split-radix algorithm.
srfft_->Compute(signal_frame->Data(), true);
Expand Down
2 changes: 1 addition & 1 deletion src/feat/feature-window.cc
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ void ProcessWindow(const FrameExtractionOptions &opts,

if (log_energy_pre_window != NULL) {
BaseFloat energy = std::max(VecVec(*window, *window),
std::numeric_limits<float>::epsilon());
std::numeric_limits<BaseFloat>::epsilon());
*log_energy_pre_window = Log(energy);
}

Expand Down
4 changes: 2 additions & 2 deletions src/util/kaldi-holder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ bool ExtractObjectRange(const Matrix<Real> &input, const std::string &range,
// template instantiation
template bool ExtractObjectRange(const Matrix<double> &, const std::string &,
Matrix<double> *);
template bool ExtractObjectRange(const Matrix<BaseFloat> &, const std::string &,
Matrix<BaseFloat> *);
template bool ExtractObjectRange(const Matrix<float> &, const std::string &,
Matrix<float> *);

bool ExtractRangeSpecifier(const std::string &rxfilename_with_range,
std::string *data_rxfilename,
Expand Down