Skip to content

Commit

Permalink
Add deframe op and stft/istft api. (PaddlePaddle#23)
Browse files Browse the repository at this point in the history
* Add frame api

* Add deframe op and kernels.

* Add stft and istft apis.

* Add deframe api. Update stft and istft apis.

* Fix bug in frame_from_librosa function when input dims >= 3

* Rename deframe to overlap_add.

* Update istft.

* Update after code review.
  • Loading branch information
KPatr1ck authored Sep 10, 2021
1 parent fcd9069 commit f9e3309
Show file tree
Hide file tree
Showing 10 changed files with 1,088 additions and 204 deletions.
11 changes: 9 additions & 2 deletions paddle/fluid/operators/frame_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ class FrameOp : public framework::OperatorWithKernel {
const auto x_dims = ctx->GetInputDim("X");
const int x_rank = x_dims.size();

PADDLE_ENFORCE_GE(
x_rank, 1, platform::errors::InvalidArgument(
"Input(X) of FrameOp should be a tensor which contains "
"at least 1 dimension, but got rank %s.",
x_rank));
PADDLE_ENFORCE_GT(hop_length, 0,
platform::errors::InvalidArgument(
"Attribute(hop_length) of FrameOp should be greater "
Expand Down Expand Up @@ -111,7 +116,7 @@ class FrameOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment(R"DOC(
Frame Operator.
Frame op slices frames from input sequence $X$.
Frame op convert time sequences into frames.
)DOC");
}
Expand Down Expand Up @@ -174,7 +179,9 @@ REGISTER_OP_CPU_KERNEL(
paddle::platform::complex<double>>);

REGISTER_OP_CPU_KERNEL(
frame_grad, ops::FrameGradKernel<paddle::platform::CPUDeviceContext, float>,
frame_grad, ops::FrameGradKernel<paddle::platform::CPUDeviceContext, int>,
ops::FrameGradKernel<paddle::platform::CPUDeviceContext, int64_t>,
ops::FrameGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::FrameGradKernel<paddle::platform::CPUDeviceContext, double>,
ops::FrameGradKernel<paddle::platform::CPUDeviceContext,
paddle::platform::complex<float>>,
Expand Down
3 changes: 2 additions & 1 deletion paddle/fluid/operators/frame_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ REGISTER_OP_CUDA_KERNEL(
paddle::platform::complex<double>>);

REGISTER_OP_CUDA_KERNEL(
frame_grad,
frame_grad, ops::FrameGradKernel<paddle::platform::CUDADeviceContext, int>,
ops::FrameGradKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::FrameGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::FrameGradKernel<paddle::platform::CUDADeviceContext, double>,
ops::FrameGradKernel<paddle::platform::CUDADeviceContext,
Expand Down
179 changes: 7 additions & 172 deletions paddle/fluid/operators/frame_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/seq2col.h"
#include "paddle/fluid/operators/transpose_op.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/for_range.h"
Expand All @@ -27,170 +28,6 @@ namespace paddle {
namespace operators {
using Tensor = framework::Tensor;

template <typename T>
struct DataMappingFunctor {
DataMappingFunctor(const T* x, T* out, size_t seq_length, size_t frame_length,
size_t n_frames, size_t hop_length)
: x_(x),
out_(out),
seq_length_(seq_length),
frame_length_(frame_length),
n_frames_(n_frames),
hop_length_(hop_length) {}

/*
Convert sequences to frames.
1. Dimension infomation:
Sequences Frames
(N, seq_length) -> (N, frame_length, n_frames)
2. Mapping from `i` to `src_idx` and `trg_idx` can be derived from:
a. Notion
- `i` stands for the flattened index of a bunch of frames.
- `src_idx` and `trg_idx` are the 1D indices of seqs and frames
respectivly.
b. Sample idx
```cpp
sample_idx = i / (n_frames_ * frame_length_);
```
c. Maps `i` to `f` and `n`.
```cpp
f = i % (n_frames_ * frame_length_) / n_frames_;
n = i % (n_frames_ * frame_length_) % n_frames_;
```
d. Replace `sample_idx`, `f` and `n` in the following eqations:
```cpp
src_idx = sample_idx * seq_length_ + n * hop_length_ + f;
trg_idx = sample_idx * n_frames_ * frame_length_ + f * n_frames_ + n;
out_[trg_idx] = x_[src_idx];
```
e. Result can be deduced shown in the function body below.
*/
HOSTDEVICE void operator()(size_t i) const {
size_t src_idx;
size_t trg_idx;
src_idx = i / (n_frames_ * frame_length_) * seq_length_ +
i % (n_frames_ * frame_length_) % n_frames_ * hop_length_ +
i % (n_frames_ * frame_length_) / n_frames_;
trg_idx = i / (n_frames_ * frame_length_) * n_frames_ * frame_length_ +
i % (n_frames_ * frame_length_) / n_frames_ * n_frames_ +
i % (n_frames_ * frame_length_) % n_frames_;
out_[trg_idx] = x_[src_idx];
}

const T* x_;
T* out_;
size_t seq_length_;
size_t frame_length_;
size_t n_frames_;
size_t hop_length_;
};

template <typename T>
struct DataMappingGradFunctor {
DataMappingGradFunctor(const T* d_out, T* d_x, size_t seq_length,
size_t frame_length, size_t n_frames,
size_t hop_length)
: d_out_(d_out),
d_x_(d_x),
seq_length_(seq_length),
frame_length_(frame_length),
n_frames_(n_frames),
hop_length_(hop_length) {}

/*
Accumulate output gradient d_out to d_x.
1. Dimension infomation:
d_out d_x
(N, frame_length, n_frames) -> (N, seq_length)
2. Using a sliding window to find source indices from `d_out` according to
`i`:
a. Notion
- `i` stands for the flattened index of `d_x`.
- `seq_i` stands for a relative index of a `d_x` sample.
- `left`: Starting index of a frame window.
- `right`: Ending index of a frame window.
b. Sample idx
```cpp
sample_idx = i / seq_length_;
```
c. Slides a window with length of `frame_length` to find `f` and `n`.
- `n`: The idx of num_frames_, increases in each hop.
- `f`: The idx of frame_lengths_, relative idx from left of a sliding
window.
d. Accumulate all grads from d_out.
```cpp
d_x_[i] +=
d_out_[sample_idx * frame_length_ * n_frames_ + f * n_frames_ + n];
```
*/
HOSTDEVICE void operator()(size_t i) const {
size_t sample_idx = i / seq_length_;
size_t seq_i = i % seq_length_;

// Sliding window
d_x_[i] = 0; // Init d_x_[i] to 0, and sums up all
// grads from d_out_ in the while loop.

size_t n = get_start_frame_idx(seq_i);
size_t f;
size_t left = n * hop_length_;
size_t right = left + frame_length_ - 1;

while (left <= seq_i && right < seq_length_) {
f = seq_i - left;
d_x_[i] +=
d_out_[sample_idx * frame_length_ * n_frames_ + f * n_frames_ + n];
// Next frame.
left += hop_length_;
right += hop_length_;
n += 1;
}
}

/*
Calculate minimum value of frame index `n` to satisfy the inequality:
seq_i <= right
==> seq_i <= left + frame_length - 1
==> seq_i <= hop_length_ * n + frame_length_ - 1
*/
HOSTDEVICE size_t get_start_frame_idx(size_t seq_i) const {
int64_t tmp = seq_i + 1 - frame_length_;
if (tmp > 0) {
size_t n = tmp / hop_length_;
if (tmp % hop_length_ == 0) {
return n;
} else {
return n + 1;
}
} else {
return 0;
}
}

const T* d_out_;
T* d_x_;
size_t seq_length_;
size_t frame_length_;
size_t n_frames_;
size_t hop_length_;
};

template <typename DeviceContext, typename T>
struct FrameFunctor {
void operator()(const DeviceContext& dev_ctx, const Tensor* input,
Expand All @@ -203,12 +40,12 @@ struct FrameFunctor {

platform::ForRange<DeviceContext> for_range(dev_ctx, numel);
if (!is_grad) {
DataMappingFunctor<T> functor(input_data, output_data, seq_length,
frame_length, n_frames, hop_length);
math::Seq2ColFunctor<T> functor(input_data, output_data, seq_length,
frame_length, n_frames, hop_length);
for_range(functor);
} else {
DataMappingGradFunctor<T> functor(input_data, output_data, seq_length,
frame_length, n_frames, hop_length);
math::Col2SeqFunctor<T> functor(input_data, output_data, seq_length,
frame_length, n_frames, hop_length);
for_range(functor);
}
}
Expand Down Expand Up @@ -385,10 +222,8 @@ class FrameGradKernel : public framework::OpKernel<T> {
falls into Case 2. Finally, it restores the dims of `d_x` tensor.
*/
void Compute(const framework::ExecutionContext& ctx) const {
const framework::Tensor* d_out =
ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
framework::Tensor* d_x =
ctx.Output<framework::Tensor>(framework::GradVarName("X"));
const Tensor* d_out = ctx.Input<Tensor>(framework::GradVarName("Out"));
Tensor* d_x = ctx.Output<Tensor>(framework::GradVarName("X"));
d_x->mutable_data<T>(ctx.GetPlace());
const size_t d_out_rank = d_out->dims().size();
const size_t d_x_rank = d_x->dims().size();
Expand Down
Loading

0 comments on commit f9e3309

Please sign in to comment.