diff --git a/src/feat/Makefile b/src/feat/Makefile index e987de55b38..af207402a15 100644 --- a/src/feat/Makefile +++ b/src/feat/Makefile @@ -6,18 +6,17 @@ include ../kaldi.mk TESTFILES = feature-mfcc-test feature-plp-test feature-fbank-test \ feature-functions-test pitch-functions-test feature-sdc-test \ - resample-test online-feature-test signal-test + resample-test online-feature-test signal-test wave-reader-test OBJFILES = feature-functions.o feature-mfcc.o feature-plp.o feature-fbank.o \ feature-spectrogram.o mel-computations.o wave-reader.o \ pitch-functions.o resample.o online-feature.o signal.o \ - feature-window.o + feature-window.o LIBNAME = kaldi-feat ADDLIBS = ../transform/kaldi-transform.a ../gmm/kaldi-gmm.a \ ../tree/kaldi-tree.a ../util/kaldi-util.a ../thread/kaldi-thread.a \ - ../matrix/kaldi-matrix.a ../base/kaldi-base.a + ../matrix/kaldi-matrix.a ../base/kaldi-base.a include ../makefiles/default_rules.mk - diff --git a/src/feat/online-feature-test.cc b/src/feat/online-feature-test.cc index 556160f8e53..aea89d0099d 100644 --- a/src/feat/online-feature-test.cc +++ b/src/feat/online-feature-test.cc @@ -21,22 +21,11 @@ #include "feat/online-feature.h" #include "feat/wave-reader.h" +#include "matrix/kaldi-matrix.h" #include "transform/transform-common.h" namespace kaldi { - -template static void AssertEqual(const Matrix &A, - const Matrix &B, - float tol = 0.001) { - KALDI_ASSERT(A.NumRows() == B.NumRows()&&A.NumCols() == B.NumCols()); - for (MatrixIndexT i = 0;i < A.NumRows();i++) - for (MatrixIndexT j = 0;j < A.NumCols();j++) { - KALDI_ASSERT(std::abs(A(i, j)-B(i, j)) < tol * std::max(1.0, - static_cast(std::abs(A(i, j))+std::abs(B(i, j))))); - } -} - void GetOutput(OnlineFeatureInterface *a, Matrix *output) { int32 dim = a->Dim(); diff --git a/src/feat/wave-reader-test.cc b/src/feat/wave-reader-test.cc new file mode 100644 index 00000000000..f9a71e8af34 --- /dev/null +++ b/src/feat/wave-reader-test.cc @@ -0,0 +1,217 @@ +// feat/wave-reader-test.cc + +// Copyright 2017 Smart Action LLC (kkm) + +// See ../../COPYING for clarification regarding multiple authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED +// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, +// MERCHANTABLITY OR NON-INFRINGEMENT. +// See the Apache 2 License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "base/kaldi-math.h" +#include "feat/wave-reader.h" +#include "matrix/kaldi-matrix.h" + +using namespace kaldi; + +// Ugly macros to package bytes in wave file order (low-endian). +#define BY(n,k) ((char)((uint32)(n) >> (8 * (k)) & 0xFF)) +#define WRD(n) BY(n,0), BY(n,1) +#define DWRD(n) BY(n,0), BY(n,1), BY(n,2), BY(n,3) + +static void UnitTestStereo8K() { + /* Reference file written with Adobe Audition (random data): +00000000 52 49 46 46 32 00 00 00 57 41 56 45 66 6d 74 20 |RIFF2...WAVEfmt | +00000010 12 00 00 00 01 00 02 00 40 1f 00 00 00 7d 00 00 |........@....}..| +00000020 04 00 10 00 00 00 64 61 74 61 0c 00 00 00 00 00 |......data......| +00000030 31 51 ff 21 f4 63 38 4c 26 60 |1Q.!.c8L&`| + */ + + const int hz = 8000; + const int byps = hz * 2 /* channels */ * 2 /* bytes/sample */; + const char file_data[] = { + 'R', 'I', 'F', 'F', + DWRD(50), // File length after this point. + 'W', 'A', 'V', 'E', + 'f', 'm', 't', ' ', + DWRD(18), // sizeof(struct WAVEFORMATEX) + WRD(1), // WORD wFormatTag; + WRD(2), // WORD nChannels; + DWRD(hz), // DWORD nSamplesPerSec; 40 1f 00 00 + DWRD(byps), // DWORD nAvgBytesPerSec; 00 7d 00 00 + WRD(4), // WORD nBlockAlign; + WRD(16), // WORD wBitsPerSample; + WRD(0), // WORD cbSize; + 'd', 'a', 't', 'a', + DWRD(12), // 'data' chunk length. + WRD(0), WRD(-1), + WRD(-32768), WRD(0), + WRD(32767), WRD(1) + }; + + const char expect_mat[] = "[ 0 -32768 32767 \n -1 0 1 ]"; + + // Read binary file data. + std::istringstream iws(std::string(file_data, sizeof file_data), + std::ios::in | std::ios::binary); + WaveData wave; + wave.Read(iws); + + // Read expected wave data. + std::istringstream ies(expect_mat, std::ios::in); + Matrix expected; + expected.Read(ies, false /* text */); + + AssertEqual(wave.SampFreq(), hz, 0); + AssertEqual(wave.Duration(), 3.0 /* samples */ / hz /* Hz */, 1E-6); + AssertEqual(wave.Data(), expected); +} + +static void UnitTestMono22K() { + /* Reference file written with Adobe Audition (random data): +00000000 52 49 46 46 30 00 00 00 57 41 56 45 66 6d 74 20 |RIFF0...WAVEfmt | +00000010 12 00 00 00 01 00 01 00 22 56 00 00 44 ac 00 00 |........"V..D...| +00000020 02 00 10 00 00 00 64 61 74 61 0a 00 00 00 25 36 |......data....%6| +00000030 cb 41 1b 4d 04 4e 62 3d |.A.M.Nb=| + */ + + const int hz = 22050; + const int byps = hz * 1 /* channels */ * 2 /* bytes/sample */; + const char file_data[] = { + 'R', 'I', 'F', 'F', + DWRD(48), // File length after this point. + 'W', 'A', 'V', 'E', + 'f', 'm', 't', ' ', + DWRD(18), // sizeof(struct WAVEFORMATEX) + WRD(1), // WORD wFormatTag; + WRD(1), // WORD nChannels; + DWRD(hz), // DWORD nSamplesPerSec; + DWRD(byps), // DWORD nAvgBytesPerSec; + WRD(2), // WORD nBlockAlign; + WRD(16), // WORD wBitsPerSample; + WRD(0), // WORD cbSize; + 'd', 'a', 't', 'a', + DWRD(10), // 'data' chunk length. + WRD(0), WRD(-1), WRD(-32768), WRD(32767), WRD(1) + }; + + const char expect_mat[] = "[ 0 -1 -32768 32767 1 ]"; + + // Read binary file data. + std::istringstream iws(std::string(file_data, sizeof file_data), + std::ios::in | std::ios::binary); + WaveData wave; + wave.Read(iws); + + // Read expected matrix. + std::istringstream ies(expect_mat, std::ios::in); + Matrix expected; + expected.Read(ies, false /* text */); + + AssertEqual(wave.SampFreq(), hz, 0); + AssertEqual(wave.Duration(), 5.0 /* samples */ / hz /* Hz */, 1E-6); + AssertEqual(wave.Data(), expected); +} + +static void UnitTestEndless1() { + const int hz = 8000; + const int byps = hz * 1 /* channels */ * 2 /* bytes/sample */; + const char file_data[] = { + 'R', 'I', 'F', 'F', + DWRD(0), // File length unknown + 'W', 'A', 'V', 'E', + 'f', 'm', 't', ' ', + DWRD(18), // sizeof(struct WAVEFORMATEX) + WRD(1), // WORD wFormatTag; + WRD(1), // WORD nChannels; + DWRD(hz), // DWORD nSamplesPerSec; + DWRD(byps), // DWORD nAvgBytesPerSec; + WRD(2), // WORD nBlockAlign; + WRD(16), // WORD wBitsPerSample; + WRD(0), // WORD cbSize; + 'd', 'a', 't', 'a', + DWRD(0), // 'data' chunk length unknown. + WRD(1), WRD(2), WRD(3) + }; + + const char expect_mat[] = "[ 1 2 3 ]"; + + // Read binary file data. + std::istringstream iws(std::string(file_data, sizeof file_data), + std::ios::in | std::ios::binary); + WaveData wave; + wave.Read(iws); + + // Read expected matrix. + std::istringstream ies(expect_mat, std::ios::in); + Matrix expected; + expected.Read(ies, false /* text */); + + AssertEqual(wave.Data(), expected); +} + +static void UnitTestEndless2() { + const int hz = 8000; + const int byps = hz * 1 /* channels */ * 2 /* bytes/sample */; + const char file_data[] = { + 'R', 'I', 'F', 'F', + DWRD(-1), // File length unknown + 'W', 'A', 'V', 'E', + 'f', 'm', 't', ' ', + DWRD(18), // sizeof(struct WAVEFORMATEX) + WRD(1), // WORD wFormatTag; + WRD(1), // WORD nChannels; + DWRD(hz), // DWORD nSamplesPerSec; + DWRD(byps), // DWORD nAvgBytesPerSec; + WRD(2), // WORD nBlockAlign; + WRD(16), // WORD wBitsPerSample; + WRD(0), // WORD cbSize; + 'd', 'a', 't', 'a', + DWRD(-1), // 'data' chunk length unknown. + WRD(1), WRD(2), WRD(3) + }; + + const char expect_mat[] = "[ 1 2 3 ]"; + + // Read binary file data. + std::istringstream iws(std::string(file_data, sizeof file_data), + std::ios::in | std::ios::binary); + WaveData wave; + wave.Read(iws); + + // Read expected matrix. + std::istringstream ies(expect_mat, std::ios::in); + Matrix expected; + expected.Read(ies, false /* text */); + + AssertEqual(wave.Data(), expected); +} + +static void UnitTest() { + UnitTestStereo8K(); + UnitTestMono22K(); + UnitTestEndless1(); + UnitTestEndless2(); +} + +int main() { + try { + UnitTest(); + std::cout << "LGTM\n"; + return 0; + } catch (const std::exception &e) { + std::cerr << e.what(); + return 1; + } +} diff --git a/src/feat/wave-reader.cc b/src/feat/wave-reader.cc index 389b461d86c..904cee9f519 100644 --- a/src/feat/wave-reader.cc +++ b/src/feat/wave-reader.cc @@ -36,7 +36,8 @@ void WaveData::Expect4ByteTag(std::istream &is, const char *expected) { tmp[4] = '\0'; is.read(tmp, 4); if (is.fail()) - KALDI_ERR << "WaveData: expected " << expected << ", failed to read anything"; + KALDI_ERR << "WaveData: expected " << expected + << ", failed to read anything"; if (strcmp(tmp, expected)) KALDI_ERR << "WaveData: expected " << expected << ", got " << tmp; } @@ -176,11 +177,11 @@ void WaveData::Read(std::istream &is, ReadDataType read_data) { if (num_channels <= 0) KALDI_ERR << "WaveData: no channels present"; samp_freq_ = static_cast(sample_rate); - if (bits_per_sample != 8 && bits_per_sample != 16 && bits_per_sample != 32) - KALDI_ERR << "WaveData: bits_per_sample is " << bits_per_sample; + if (bits_per_sample != 16) + KALDI_ERR << "WaveData: unsupported bits_per_sample = " << bits_per_sample; if (byte_rate != sample_rate * bits_per_sample/8 * num_channels) KALDI_ERR << "Unexpected byte rate " << byte_rate << " vs. " - << sample_rate <<" * " << (bits_per_sample/8) + << sample_rate << " * " << (bits_per_sample/8) << " * " << num_channels; if (block_align != num_channels * bits_per_sample/8) KALDI_ERR << "Unexpected block_align: " << block_align << " vs. " @@ -221,10 +222,23 @@ void WaveData::Read(std::istream &is, ReadDataType read_data) { uint32 data_chunk_size = ReadUint32(is, swap); riff_chunk_read += 4; - if (std::abs(static_cast(riff_chunk_read) + - static_cast(data_chunk_size) - - static_cast(riff_chunk_size)) > 1) { - // we allow the size to be off by one without warning, because there is a + // Check if data is read in stream mode. To indicate that the data size is + // not known in advance.data_chunk_size and riff_chunk_size are both set to + // either 0 or 0xFFFFFFFF (all ones). + bool is_stream_mode = + (riff_chunk_size == 0 && data_chunk_size == 0) + || (riff_chunk_size == std::numeric_limits::max() + && data_chunk_size == std::numeric_limits::max()); + if (is_stream_mode) + KALDI_VLOG(1) << "Read in RIFF chunk size: " << riff_chunk_size + << ", data chunk size: " << data_chunk_size + << ". Assume 'stream mode' (reading data to EOF)."; + + if (!is_stream_mode + && std::abs(static_cast(riff_chunk_read) + + static_cast(data_chunk_size) - + static_cast(riff_chunk_size)) > 1) { + // We allow the size to be off by one without warning, because there is a // weirdness in the format of RIFF files that means that the input may // sometimes be padded with 1 unused byte to make the total size even. KALDI_WARN << "Expected " << riff_chunk_size << " bytes in RIFF chunk, but " @@ -234,86 +248,50 @@ void WaveData::Read(std::istream &is, ReadDataType read_data) { } if (read_data == kLeaveDataUndefined) { - // we won't actually be reading the data- we'll just be faking that we read + // We won't actually be reading the data- we'll just be faking that we read // that data, so the caller can get the metadata. - // assume we'd read the same number of bytes that the data-chunk header + // Assume we'd read the same number of bytes that the data chunk header // says we'd read. int32 num_bytes_read = data_chunk_size; uint32 num_samp = num_bytes_read / block_align; data_.Resize(num_channels, num_samp, kUndefined); return; - } else { - KALDI_ASSERT(read_data == kReadData); - } - - std::vector data_pointer_vec; - std::vector data_size_vec; - uint32 num_bytes_read = 0; - for (int32 remain_chunk_size = data_chunk_size; remain_chunk_size > 0; - remain_chunk_size -= kBlockSize) { - int32 this_block_size = remain_chunk_size; - if (kBlockSize < remain_chunk_size) - this_block_size = kBlockSize; - char *block_data_vec = new char[this_block_size]; - is.read(block_data_vec, this_block_size); - num_bytes_read += is.gcount(); - data_size_vec.push_back(is.gcount()); - data_pointer_vec.push_back(block_data_vec); - if (num_bytes_read < this_block_size) - break; } - std::vector chunk_data_vec(num_bytes_read); - uint32 data_address = 0; - for (int i = 0; i < data_pointer_vec.size(); i++) { - memcpy(&(chunk_data_vec[data_address]), data_pointer_vec[i], - data_size_vec[i]); - delete[] data_pointer_vec[i]; - data_address += data_size_vec[i]; - } + KALDI_ASSERT(read_data == kReadData); - char *data_ptr = &(chunk_data_vec[0]); - if (num_bytes_read == 0 && num_bytes_read != data_chunk_size) { - KALDI_ERR << "WaveData: failed to read data chunk (read no bytes)"; - } else if (num_bytes_read != data_chunk_size) { - KALDI_ASSERT(num_bytes_read < data_chunk_size); - KALDI_WARN << "Read fewer bytes than specified in the header: " - << num_bytes_read << " < " << data_chunk_size; + std::vector chunk_data_vec; + uint32 num_bytes_read = 0; + if (!is_stream_mode) { + chunk_data_vec.resize(data_chunk_size); + is.read(&chunk_data_vec[0], data_chunk_size); + num_bytes_read = is.gcount(); + KALDI_ASSERT(num_bytes_read == data_chunk_size); + } else { + // When data are read in stream mode, we don't know size in advance. + chunk_data_vec.clear(); + num_bytes_read = 0; + while (is.good()) { + char buffer[kBlockSize]; + is.read(buffer, sizeof buffer); + std::copy(buffer, buffer + is.gcount(), + std::back_inserter(chunk_data_vec)); + num_bytes_read += is.gcount(); + } } - - if (data_chunk_size == 0) + if (num_bytes_read == 0) KALDI_ERR << "WaveData: empty file (no data)"; + char *data_ptr = &(chunk_data_vec[0]); uint32 num_samp = num_bytes_read / block_align; data_.Resize(num_channels, num_samp); for (uint32 i = 0; i < num_samp; i++) { for (uint32 j = 0; j < num_channels; j++) { - switch (bits_per_sample) { - case 8: - data_(j, i) = *data_ptr; - data_ptr++; - break; - case 16: - { - int16 k = *reinterpret_cast(data_ptr); - if (swap) - KALDI_SWAP2(k); - data_(j, i) = k; - data_ptr += 2; - break; - } - case 32: - { - int32 k = *reinterpret_cast(data_ptr); - if (swap) - KALDI_SWAP4(k); - data_(j, i) = k; - data_ptr += 4; - break; - } - default: - KALDI_ERR << "bits per sample is " << bits_per_sample; // already checked this. - } + int16 k = *reinterpret_cast(data_ptr); + if (swap) + KALDI_SWAP2(k); + data_(j, i) = k; + data_ptr += 2; } } }