Skip to content

Commit 91c68a5

Browse files
committed
Update README.md
1 parent 2132962 commit 91c68a5

13 files changed

+381
-77
lines changed

.gitignore

+9
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,13 @@
1+
#common
2+
.vscode
13
build
4+
*.zip
5+
6+
# kaldi
7+
depends/kaldi-5.*
8+
depends/kaldi
9+
10+
# egs
211
egs/gop-compute/steps
312
egs/gop-compute/utils
413
egs/gop-compute/data

README.md

+31-8
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,40 @@
1-
21
# kaldi-gop
3-
This project computes GOP (Goodness of Pronunciation) bases on Kaldi.
2+
This project computes GMM-based GOP (Goodness of Pronunciation) using Kaldi.
43

54
## How to build
6-
1. Download and complile [Kaldi](http://www.kaldi-asr.org). Note that you need to check out the branch 5.1 instead of master, and do not use the "--shared" option.
7-
1. Edit src/CMakeLists.txt to set the variable $KALDI_ROOT.
8-
1. Compile the binary:
95
```
10-
cd src/
11-
mkdir build && cd build
12-
cmake .. && make
6+
./build.sh
137
```
148
## Run the example
159
```
1610
cd egs/gop-compute
1711
./run.sh
12+
```
13+
14+
## Theory
15+
16+
In the conventional GMM-HMM based system, GOP was first proposed in (Witt et al., 2000). It was defined as the duration normalised log of the posterior:
17+
18+
$$
19+
GOP(p)=\frac{1}{t_e-t_s+1} \log p(p|\mathbf o)
20+
$$
21+
22+
where $\mathbf o$ is the input observations, $p$ is the canonical phone, $t_s, t_e$ are the start and end frame indexes.
23+
24+
Assuming $p(q_i)\approx p(q_j)$ for any $q_i, q_j$, we have:
25+
26+
$$
27+
\log p(p|\mathbf o)=\frac{p(\mathbf o|p)p(p)}{\sum_{q\in Q} p(\mathbf o|q)p(q)}
28+
\approx\frac{p(\mathbf o|p)}{\sum_{q\in Q} p(\mathbf o|q)}
29+
$$
30+
31+
where $Q$ is the whole phone set.
32+
33+
The numerator of the equation is calculated from forced alignment result and the denominator is calculated from a Viterbi decoding with an unconstrained phone loop.
34+
35+
## DNN-based implementation
36+
37+
This implementation is GMM-based. For DNN-based implementation, please check Kaldi's official repository:
38+
> https://github.com/kaldi-asr/kaldi/tree/master/egs/gop
39+
40+
The performance of GOP-DNN should be much better than GOP-GMM.

build.sh

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/bin/sh
2+
3+
utils/check_dependencies.sh || exit
4+
cd src/
5+
[ -d build ] || mkdir build
6+
cd build
7+
cmake .. && make

depends/kaldi-makefile.patch

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
--- "Makefile" 2018-02-15 02:58:49.000000000 +0800
2+
+++ Makefile 2018-09-21 13:51:28.577621316 +0800
3+
@@ -6,10 +6,9 @@
4+
5+
6+
SUBDIRS = base matrix util feat tree gmm transform \
7+
- fstext hmm lm decoder lat kws cudamatrix nnet \
8+
- bin fstbin gmmbin fgmmbin featbin \
9+
- nnetbin latbin sgmm2 sgmm2bin nnet2 nnet3 rnnlm chain nnet3bin nnet2bin kwsbin \
10+
- ivector ivectorbin online2 online2bin lmbin chainbin rnnlmbin
11+
+ fstext hmm lm decoder lat cudamatrix \
12+
+ nnet3 chain \
13+
+ ivector online2
14+
15+
MEMTESTDIRS = base matrix util feat tree gmm transform \
16+
fstext hmm lm decoder lat nnet kws chain \

egs/gop-compute/local/compute-gmm-gop.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/bin/sh
1+
#!/bin/bash
22

33
# Copyright 2016-2017 Author: Junbo Zhang
44

egs/gop-compute/path.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
export KALDI_ROOT=~/src/kaldi
1+
export KALDI_ROOT=../../depends/kaldi
22
[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
33
export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:../../src/build:$PWD:$PATH
44
[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1

egs/gop-compute/run.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/bin/sh
1+
#!/bin/bash
22

33
# Copyright 2016-2017 Author: Junbo Zhang <[email protected]>
44

src/CMakeLists.txt

+30-13
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,40 @@
11
cmake_minimum_required (VERSION 2.6)
22
project (kaldi-gop)
33

4-
# BLAS
5-
find_package(BLAS REQUIRED)
4+
######
5+
# Check BLAS
6+
find_package(BLAS)
67
if(NOT BLAS_FOUND)
78
message(FATAL_ERROR "Can't find BLAS. On Ubunbu, you may want to "
89
"install it by:\n sudo apt-get install libatlas-dev")
910
endif()
1011

12+
######
1113
# Build Kaldi
12-
SET(KALDI_ROOT "${CMAKE_SOURCE_DIR}/kaldi-5.1")
13-
SET(BUILD_JOBS 8)
14-
if(NOT EXISTS ${KALDI_ROOT})
15-
execute_process(COMMAND git clone -b 5.1 https://github.com/kaldi-asr/kaldi.git ${KALDI_ROOT})
16-
execute_process(COMMAND extras/check_dependencies.sh WORKING_DIRECTORY ${KALDI_ROOT}/tools)
14+
set(KALDI_VERSION 5.3)
15+
set(BUILD_JOBS 8)
16+
set(DEPENDS_DIR "${CMAKE_SOURCE_DIR}/../depends")
17+
set(KALDI_ROOT "${DEPENDS_DIR}/kaldi-${KALDI_VERSION}")
18+
if(NOT EXISTS "${KALDI_ROOT}/tools/openfst/bin/fstcompile")
19+
if (NOT EXISTS "${DEPENDS_DIR}/${KALDI_VERSION}.zip")
20+
execute_process(COMMAND wget https://github.com/kaldi-asr/kaldi/archive/${KALDI_VERSION}.zip
21+
WORKING_DIRECTORY ${DEPENDS_DIR})
22+
endif()
23+
execute_process(COMMAND unzip -o ${KALDI_VERSION}.zip -d ${DEPENDS_DIR}
24+
WORKING_DIRECTORY ${DEPENDS_DIR})
25+
execute_process(COMMAND ln -s kaldi-${KALDI_VERSION} kaldi
26+
WORKING_DIRECTORY ${DEPENDS_DIR})
1727
execute_process(COMMAND make -j ${BUILD_JOBS} WORKING_DIRECTORY ${KALDI_ROOT}/tools)
28+
execute_process(COMMAND patch kaldi/src/Makefile kaldi-makefile.patch WORKING_DIRECTORY ${DEPENDS_DIR})
29+
endif()
30+
31+
if(NOT EXISTS "${KALDI_ROOT}/src/decoder/kaldi-decoder.a")
1832
execute_process(COMMAND ./configure WORKING_DIRECTORY ${KALDI_ROOT}/src)
1933
execute_process(COMMAND make depend -j ${BUILD_JOBS} WORKING_DIRECTORY ${KALDI_ROOT}/src)
2034
execute_process(COMMAND make -j ${BUILD_JOBS} WORKING_DIRECTORY ${KALDI_ROOT}/src)
2135
endif()
2236

37+
######
2338
# Settings
2439
add_compile_options(-std=c++11)
2540
add_definitions(-DHAVE_ATLAS)
@@ -28,7 +43,8 @@ include_directories("${KALDI_ROOT}/src")
2843
include_directories("${KALDI_ROOT}/tools/openfst/include")
2944
include_directories("${KALDI_ROOT}/tools/ATLAS/include")
3045

31-
# gop
46+
######
47+
# Build libgop.a
3248
include_directories(".")
3349
file(GLOB GOP "gop/*.cc")
3450
add_library(gop ${GOP})
@@ -42,12 +58,13 @@ target_link_libraries(gop ${KALDI_ROOT}/src/matrix/kaldi-matrix.a)
4258
target_link_libraries(gop ${KALDI_ROOT}/src/base/kaldi-base.a)
4359
target_link_libraries(gop ${KALDI_ROOT}/tools/openfst/lib/libfst.a)
4460
target_link_libraries(gop dl.so)
45-
target_link_libraries(gop /usr/lib/libatlas.so.3)
46-
target_link_libraries(gop /usr/lib/libf77blas.so.3)
47-
target_link_libraries(gop /usr/lib/libcblas.so.3)
48-
target_link_libraries(gop /usr/lib/liblapack_atlas.so.3)
61+
find_library (LIBCBLAS libcblas.so.3)
62+
find_library (LIBLAPACK liblapack_atlas.so.3)
63+
target_link_libraries(gop ${LIBCBLAS})
64+
target_link_libraries(gop ${LIBLAPACK})
4965

50-
# compute-gmm-gop
66+
######
67+
# Build compute-gmm-gop
5168
add_executable(compute-gmm-gop gopbin/compute-gmm-gop.cc)
5269
target_link_libraries(compute-gmm-gop gop)
5370
find_package(Threads)

src/gop/gmm-gop.cc

100755100644
+37-32
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,14 @@
1919
#include <string>
2020
#include <vector>
2121
#include "base/kaldi-common.h"
22-
#include "util/common-utils.h"
23-
#include "gmm/am-diag-gmm.h"
24-
#include "hmm/transition-model.h"
25-
#include "fstext/fstext-utils.h"
2622
#include "decoder/decoder-wrappers.h"
23+
#include "fstext/fstext-utils.h"
24+
#include "gmm/am-diag-gmm.h"
2725
#include "gmm/decodable-am-diag-gmm.h"
28-
#include "lat/kaldi-lattice.h"
2926
#include "hmm/hmm-utils.h"
27+
#include "hmm/transition-model.h"
28+
#include "lat/kaldi-lattice.h"
29+
#include "util/common-utils.h"
3030
#include "gop/gmm-gop.h"
3131

3232
namespace kaldi {
@@ -35,21 +35,23 @@ typedef typename fst::StdArc Arc;
3535
typedef typename Arc::StateId StateId;
3636
typedef typename Arc::Weight Weight;
3737

38-
void GmmGop::Init(std::string &tree_in_filename,
39-
std::string &model_in_filename,
40-
std::string &lex_in_filename) {
38+
void GmmGop::Init(std::string &tree_in_filename, std::string &model_in_filename,
39+
std::string &lex_in_filename) {
4140
bool binary;
4241
Input ki(model_in_filename, &binary);
4342
tm_.Read(ki.Stream(), binary);
4443
am_.Read(ki.Stream(), binary);
4544
ReadKaldiObject(tree_in_filename, &ctx_dep_);
4645

4746
fst::VectorFst<fst::StdArc> *lex_fst = fst::ReadFstKaldi(lex_in_filename);
48-
std::vector<int32> disambig_syms;
47+
std::vector<int32> disambig_syms;
4948
TrainingGraphCompilerOptions gopts;
5049
gc_ = new TrainingGraphCompiler(tm_, ctx_dep_, lex_fst, disambig_syms, gopts);
5150

5251
for (size_t i = 0; i < tm_.NumTransitionIds(); i++) {
52+
// The transition-ids are only for building the denominator graph. Although
53+
// one pdf-id may have multiple transition-ids, all those transitions-ids
54+
// share the same HMM state (of course).
5355
pdfid_to_tid[tm_.TransitionIdToPdf(i)] = i;
5456
}
5557
}
@@ -61,23 +63,22 @@ BaseFloat GmmGop::Decode(fst::VectorFst<fst::StdArc> &fst,
6163
decode_opts.beam = 500;
6264
FasterDecoder decoder(fst, decode_opts);
6365
decoder.Decode(&decodable);
64-
if (! decoder.ReachedFinal()) {
66+
if (!decoder.ReachedFinal()) {
6567
KALDI_WARN << "Did not successfully decode.";
6668
}
6769
fst::VectorFst<LatticeArc> decoded;
6870
decoder.GetBestPath(&decoded);
6971
std::vector<int32> osymbols;
7072
LatticeWeight weight;
7173
GetLinearSymbolSequence(decoded, align, &osymbols, &weight);
72-
BaseFloat likelihood = -(weight.Value1()+weight.Value2());
74+
BaseFloat likelihood = -(weight.Value1() + weight.Value2());
7375

7476
return likelihood;
7577
}
7678

7779
BaseFloat GmmGop::ComputeGopNumera(DecodableAmDiagGmmScaled &decodable,
7880
std::vector<int32> &align,
79-
MatrixIndexT start_frame,
80-
int32 size) {
81+
MatrixIndexT start_frame, int32 size) {
8182
KALDI_ASSERT(start_frame + size <= align.size());
8283
BaseFloat likelihood = 0;
8384
for (MatrixIndexT frame = start_frame; frame < start_frame + size; frame++) {
@@ -88,7 +89,8 @@ BaseFloat GmmGop::ComputeGopNumera(DecodableAmDiagGmmScaled &decodable,
8889
}
8990

9091
BaseFloat GmmGop::ComputeGopNumeraViterbi(DecodableAmDiagGmmScaled &decodable,
91-
int32 phone_l, int32 phone, int32 phone_r) {
92+
int32 phone_l, int32 phone,
93+
int32 phone_r) {
9294
KALDI_ASSERT(ctx_dep_.ContextWidth() == 3);
9395
KALDI_ASSERT(ctx_dep_.CentralPosition() == 1);
9496
std::vector<int32> phoneseq(3);
@@ -101,7 +103,8 @@ BaseFloat GmmGop::ComputeGopNumeraViterbi(DecodableAmDiagGmmScaled &decodable,
101103
fst.SetStart(cur_state);
102104
for (size_t c = 0; c < tm_.GetTopo().NumPdfClasses(phone); c++) {
103105
int32 pdf_id;
104-
KALDI_ASSERT(ctx_dep_.Compute(phoneseq, c, &pdf_id));
106+
if (!ctx_dep_.Compute(phoneseq, c, &pdf_id))
107+
KALDI_ERR << "Failed to obtain pdf_id.";
105108
int32 tid = pdfid_to_tid[pdf_id];
106109

107110
StateId next_state = fst.AddState();
@@ -137,7 +140,8 @@ BaseFloat GmmGop::ComputeGopDenomin(DecodableAmDiagGmmScaled &decodable,
137140
StateId cur_state = start_state;
138141
for (size_t c = 0; c < pdfclass_num; c++) {
139142
int32 pdf_id;
140-
KALDI_ASSERT(ctx_dep_.Compute(phoneseq, c, &pdf_id));
143+
if (!ctx_dep_.Compute(phoneseq, c, &pdf_id))
144+
KALDI_ERR << "Failed to obtain pdf_id.";
141145
int32 tid = pdfid_to_tid[pdf_id];
142146

143147
StateId next_state = fst.AddState();
@@ -157,11 +161,14 @@ BaseFloat GmmGop::ComputeGopDenomin(DecodableAmDiagGmmScaled &decodable,
157161
}
158162

159163
void GmmGop::GetContextFromSplit(std::vector<std::vector<int32> > split,
160-
int32 index, int32 &phone_l, int32 &phone, int32 &phone_r) {
164+
int32 index, int32 &phone_l, int32 &phone,
165+
int32 &phone_r) {
161166
KALDI_ASSERT(index < split.size());
162-
phone_l = (index > 0) ? tm_.TransitionIdToPhone(split[index-1][0]) : 1;
167+
phone_l = (index > 0) ? tm_.TransitionIdToPhone(split[index - 1][0]) : 1;
163168
phone = tm_.TransitionIdToPhone(split[index][0]);
164-
phone_r = (index < split.size() - 1) ? tm_.TransitionIdToPhone(split[index+1][0]): 1;
169+
phone_r = (index < split.size() - 1)
170+
? tm_.TransitionIdToPhone(split[index + 1][0])
171+
: 1;
165172
}
166173

167174
void GmmGop::Compute(const Matrix<BaseFloat> &feats,
@@ -181,33 +188,31 @@ void GmmGop::Compute(const Matrix<BaseFloat> &feats,
181188
phones_.resize(split.size());
182189
int32 frame_start_idx = 0;
183190
for (MatrixIndexT i = 0; i < split.size(); i++) {
184-
SubMatrix<BaseFloat> feats_in_phone = feats.Range(frame_start_idx, split[i].size(),
185-
0, feats.NumCols());
191+
SubMatrix<BaseFloat> feats_in_phone =
192+
feats.Range(frame_start_idx, split[i].size(), 0, feats.NumCols());
186193
const Matrix<BaseFloat> features(feats_in_phone);
187194
DecodableAmDiagGmmScaled split_decodable(am_, tm_, features, 1.0);
188195

189196
int32 phone, phone_l, phone_r;
190197
GetContextFromSplit(split, i, phone_l, phone, phone_r);
191198

192199
bool use_viterbi_numera = true;
193-
BaseFloat gop_numerator = use_viterbi_numera ?
194-
ComputeGopNumeraViterbi(split_decodable, phone_l, phone, phone_r):
195-
ComputeGopNumera(ali_decodable, align,
196-
frame_start_idx, split[i].size());
197-
BaseFloat gop_denominator = ComputeGopDenomin(split_decodable, phone_l, phone_r);
200+
BaseFloat gop_numerator =
201+
use_viterbi_numera
202+
? ComputeGopNumeraViterbi(split_decodable, phone_l, phone, phone_r)
203+
: ComputeGopNumera(ali_decodable, align, frame_start_idx,
204+
split[i].size());
205+
BaseFloat gop_denominator =
206+
ComputeGopDenomin(split_decodable, phone_l, phone_r);
198207
gop_result_(i) = (gop_numerator - gop_denominator) / split[i].size();
199208
phones_[i] = phone;
200209

201210
frame_start_idx += split[i].size();
202211
}
203212
}
204213

205-
Vector<BaseFloat>& GmmGop::Result() {
206-
return gop_result_;
207-
}
214+
Vector<BaseFloat> &GmmGop::Result() { return gop_result_; }
208215

209-
std::vector<int32>& GmmGop::Phonemes() {
210-
return phones_;
211-
}
216+
std::vector<int32> &GmmGop::Phonemes() { return phones_; }
212217

213218
} // End namespace kaldi

0 commit comments

Comments
 (0)