From 40095d3026bef08904acf831d69fff2a7ed72d8e Mon Sep 17 00:00:00 2001
From: YangZhou <goat.zhou@qq.com>
Date: Wed, 14 Dec 2022 15:44:03 +0800
Subject: [PATCH 1/6] fix openfst download error

---
 speechx/cmake/openfst.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/speechx/cmake/openfst.cmake b/speechx/cmake/openfst.cmake
index 9acf530a195..bb4512fc7c4 100644
--- a/speechx/cmake/openfst.cmake
+++ b/speechx/cmake/openfst.cmake
@@ -4,7 +4,7 @@ set(openfst_SOURCE_DIR ${fc_patch}/openfst-src)
 set(openfst_BINARY_DIR ${fc_patch}/openfst-build)
 
 ExternalProject_Add(openfst
-  URL               https://github.com/mjansche/openfst/archive/refs/tags/1.7.2.zip
+  URL               https://paddleaudio.bj.bcebos.com/build/openfst_1.7.2.zip
   URL_HASH          SHA256=ffc56931025579a8af3515741c0f3b0fc3a854c023421472c07ca0c6389c75e6
   PREFIX            ${openfst_PREFIX_DIR} 
   SOURCE_DIR        ${openfst_SOURCE_DIR}
@@ -17,4 +17,4 @@ ExternalProject_Add(openfst
   BUILD_COMMAND     make -j 4
 )
 link_directories(${openfst_PREFIX_DIR}/lib)
-include_directories(${openfst_PREFIX_DIR}/include)
\ No newline at end of file
+include_directories(${openfst_PREFIX_DIR}/include)

From f880229c25d04b2c75021a14f55f6a50cc4a657b Mon Sep 17 00:00:00 2001
From: YangZhou <goat.zhou@qq.com>
Date: Wed, 14 Dec 2022 15:51:03 +0800
Subject: [PATCH 2/6] add acknowledgments of openfst

---
 speechx/cmake/openfst.cmake | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/speechx/cmake/openfst.cmake b/speechx/cmake/openfst.cmake
index bb4512fc7c4..07c33a74794 100644
--- a/speechx/cmake/openfst.cmake
+++ b/speechx/cmake/openfst.cmake
@@ -3,6 +3,13 @@ set(openfst_PREFIX_DIR ${fc_patch}/openfst)
 set(openfst_SOURCE_DIR ${fc_patch}/openfst-src)
 set(openfst_BINARY_DIR ${fc_patch}/openfst-build)
 
+# openfst Acknowledgments:
+#Cyril Allauzen, Michael Riley, Johan Schalkwyk, Wojciech Skut and Mehryar Mohri, 
+#"OpenFst: A General and Efficient Weighted Finite-State Transducer Library", 
+#Proceedings of the Ninth International Conference on Implementation and 
+#Application of Automata, (CIAA 2007), volume 4783 of Lecture Notes in 
+#Computer Science, pages 11-23. Springer, 2007. http://www.openfst.org.
+
 ExternalProject_Add(openfst
   URL               https://paddleaudio.bj.bcebos.com/build/openfst_1.7.2.zip
   URL_HASH          SHA256=ffc56931025579a8af3515741c0f3b0fc3a854c023421472c07ca0c6389c75e6

From a2b5eb19c8513e06bffdaca14d00ca0d8c2eadd2 Mon Sep 17 00:00:00 2001
From: YangZhou <goat.zhou@qq.com>
Date: Fri, 16 Dec 2022 11:21:06 +0800
Subject: [PATCH 3/6] refactor directory

---
 speechx/requirement.txt                       |   1 -
 speechx/speechx/CMakeLists.txt                |  51 +-
 speechx/speechx/asr/CMakeLists.txt            |  11 +
 .../speechx/{ => asr}/decoder/CMakeLists.txt  |   0
 speechx/speechx/{ => asr}/decoder/common.h    |   0
 .../decoder/ctc_beam_search_decoder.cc        |   0
 .../decoder/ctc_beam_search_decoder.h         |   0
 .../decoder/ctc_beam_search_decoder_main.cc   |   0
 .../{ => asr}/decoder/ctc_beam_search_opt.h   |   0
 .../asr/decoder/ctc_decoders/.gitignore       |   9 +
 .../decoder/ctc_decoders/COPYING.APACHE2.0    | 201 ++++++
 .../asr/decoder/ctc_decoders/COPYING.LESSER.3 | 165 +++++
 .../speechx/asr/decoder/ctc_decoders/LICENSE  |   8 +
 .../asr/decoder/ctc_decoders/__init__.py      |  13 +
 .../ctc_decoders/ctc_beam_search_decoder.cpp  | 607 ++++++++++++++++++
 .../ctc_decoders/ctc_beam_search_decoder.h    | 175 +++++
 .../ctc_decoders/ctc_greedy_decoder.cpp       |  61 ++
 .../decoder/ctc_decoders/ctc_greedy_decoder.h |  35 +
 .../decoder/ctc_decoders/decoder_utils.cpp    | 193 ++++++
 .../asr/decoder/ctc_decoders/decoder_utils.h  | 111 ++++
 .../asr/decoder/ctc_decoders/decoders.i       |  33 +
 .../asr/decoder/ctc_decoders/path_trie.cpp    | 164 +++++
 .../asr/decoder/ctc_decoders/path_trie.h      |  82 +++
 .../asr/decoder/ctc_decoders/scorer.cpp       | 232 +++++++
 .../speechx/asr/decoder/ctc_decoders/scorer.h | 114 ++++
 .../speechx/asr/decoder/ctc_decoders/setup.py | 138 ++++
 .../speechx/asr/decoder/ctc_decoders/setup.sh |  24 +
 .../decoder/ctc_prefix_beam_search_decoder.cc |   2 +-
 .../decoder/ctc_prefix_beam_search_decoder.h  |   0
 .../ctc_prefix_beam_search_decoder_main.cc    |   0
 .../decoder/ctc_prefix_beam_search_score.h    |   0
 .../{ => asr}/decoder/ctc_tlg_decoder.cc      |   0
 .../{ => asr}/decoder/ctc_tlg_decoder.h       |   0
 .../{ => asr}/decoder/ctc_tlg_decoder_main.cc |   0
 .../speechx/{ => asr}/decoder/decoder_itf.h   |   0
 .../decoder/nnet_logprob_decoder_main.cc      |   0
 speechx/speechx/{ => asr}/decoder/param.h     |   0
 speechx/speechx/{ => asr}/nnet/CMakeLists.txt |   0
 speechx/speechx/{ => asr}/nnet/decodable.cc   |   0
 speechx/speechx/{ => asr}/nnet/decodable.h    |   0
 speechx/speechx/{ => asr}/nnet/ds2_nnet.cc    |   0
 speechx/speechx/{ => asr}/nnet/ds2_nnet.h     |   0
 .../speechx/{ => asr}/nnet/ds2_nnet_main.cc   |   0
 speechx/speechx/{ => asr}/nnet/nnet_itf.h     |   0
 speechx/speechx/{ => asr}/nnet/u2_nnet.cc     |   0
 speechx/speechx/{ => asr}/nnet/u2_nnet.h      |   0
 .../speechx/{ => asr}/nnet/u2_nnet_main.cc    |   0
 .../{ => asr}/recognizer/CMakeLists.txt       |   0
 .../{ => asr}/recognizer/recognizer.cc        |   0
 .../speechx/{ => asr}/recognizer/recognizer.h |   0
 .../{ => asr}/recognizer/recognizer_main.cc   |   0
 .../{ => asr}/recognizer/u2_recognizer.cc     |   0
 .../{ => asr}/recognizer/u2_recognizer.h      |   0
 .../recognizer/u2_recognizer_main.cc          |   0
 .../{protocol => asr/server}/CMakeLists.txt   |   0
 .../server}/websocket/CMakeLists.txt          |   0
 .../server}/websocket/websocket_client.cc     |   0
 .../server}/websocket/websocket_client.h      |   0
 .../websocket/websocket_client_main.cc        |   0
 .../server}/websocket/websocket_server.cc     |   0
 .../server}/websocket/websocket_server.h      |   0
 .../websocket/websocket_server_main.cc        |   0
 speechx/speechx/common/CMakeLists.txt         |  16 +
 .../speechx/{ => common}/base/basic_types.h   |   0
 speechx/speechx/{ => common}/base/common.h    |   0
 speechx/speechx/{ => common}/base/flags.h     |   0
 speechx/speechx/{ => common}/base/log.h       |   0
 speechx/speechx/{ => common}/base/macros.h    |   0
 .../speechx/{ => common}/base/thread_pool.h   |   0
 .../{ => common}/frontend/CMakeLists.txt      |   0
 .../frontend/audio/CMakeLists.txt             |   0
 .../{ => common}/frontend/audio/assembler.cc  |   0
 .../{ => common}/frontend/audio/assembler.h   |   0
 .../frontend/audio/audio_cache.cc             |   0
 .../{ => common}/frontend/audio/audio_cache.h |   0
 .../{ => common}/frontend/audio/cmvn.cc       |   0
 .../{ => common}/frontend/audio/cmvn.h        |   0
 .../frontend/audio/cmvn_json2kaldi_main.cc    |   0
 .../frontend/audio/compute_fbank_main.cc      |   0
 .../audio/compute_linear_spectrogram_main.cc  |   0
 .../{ => common}/frontend/audio/data_cache.h  |   0
 .../{ => common}/frontend/audio/db_norm.cc    |   0
 .../{ => common}/frontend/audio/db_norm.h     |   0
 .../{ => common}/frontend/audio/fbank.cc      |   0
 .../{ => common}/frontend/audio/fbank.h       |   0
 .../frontend/audio/feature_cache.cc           |   0
 .../frontend/audio/feature_cache.h            |   0
 .../frontend/audio/feature_common.h           |   0
 .../frontend/audio/feature_common_inl.h       |   0
 .../frontend/audio/feature_pipeline.cc        |   0
 .../frontend/audio/feature_pipeline.h         |   0
 .../frontend/audio/frontend_itf.h             |   0
 .../frontend/audio/linear_spectrogram.cc      |   0
 .../frontend/audio/linear_spectrogram.h       |   0
 .../{ => common}/frontend/audio/mfcc.cc       |   0
 .../{ => common}/frontend/audio/mfcc.h        |   0
 .../{ => common}/frontend/audio/normalizer.h  |   0
 .../speechx/{ => common}/utils/CMakeLists.txt |   0
 .../speechx/{ => common}/utils/file_utils.cc  |   0
 .../speechx/{ => common}/utils/file_utils.h   |   0
 speechx/speechx/{ => common}/utils/math.cc    |   0
 speechx/speechx/{ => common}/utils/math.h     |   0
 speechx/speechx/decoder/ctc_decoders          |   1 -
 speechx/speechx/frontend/text/CMakeLists.txt  |   0
 speechx/speechx/kaldi/CMakeLists.txt          |   5 +-
 speechx/speechx/third_party/CMakeLists.txt    |   0
 speechx/speechx/third_party/README.md         |   4 -
 107 files changed, 2403 insertions(+), 53 deletions(-)
 delete mode 100644 speechx/requirement.txt
 create mode 100644 speechx/speechx/asr/CMakeLists.txt
 rename speechx/speechx/{ => asr}/decoder/CMakeLists.txt (100%)
 rename speechx/speechx/{ => asr}/decoder/common.h (100%)
 rename speechx/speechx/{ => asr}/decoder/ctc_beam_search_decoder.cc (100%)
 rename speechx/speechx/{ => asr}/decoder/ctc_beam_search_decoder.h (100%)
 rename speechx/speechx/{ => asr}/decoder/ctc_beam_search_decoder_main.cc (100%)
 rename speechx/speechx/{ => asr}/decoder/ctc_beam_search_opt.h (100%)
 create mode 100644 speechx/speechx/asr/decoder/ctc_decoders/.gitignore
 create mode 100644 speechx/speechx/asr/decoder/ctc_decoders/COPYING.APACHE2.0
 create mode 100644 speechx/speechx/asr/decoder/ctc_decoders/COPYING.LESSER.3
 create mode 100644 speechx/speechx/asr/decoder/ctc_decoders/LICENSE
 create mode 100644 speechx/speechx/asr/decoder/ctc_decoders/__init__.py
 create mode 100644 speechx/speechx/asr/decoder/ctc_decoders/ctc_beam_search_decoder.cpp
 create mode 100644 speechx/speechx/asr/decoder/ctc_decoders/ctc_beam_search_decoder.h
 create mode 100644 speechx/speechx/asr/decoder/ctc_decoders/ctc_greedy_decoder.cpp
 create mode 100644 speechx/speechx/asr/decoder/ctc_decoders/ctc_greedy_decoder.h
 create mode 100644 speechx/speechx/asr/decoder/ctc_decoders/decoder_utils.cpp
 create mode 100644 speechx/speechx/asr/decoder/ctc_decoders/decoder_utils.h
 create mode 100644 speechx/speechx/asr/decoder/ctc_decoders/decoders.i
 create mode 100644 speechx/speechx/asr/decoder/ctc_decoders/path_trie.cpp
 create mode 100644 speechx/speechx/asr/decoder/ctc_decoders/path_trie.h
 create mode 100644 speechx/speechx/asr/decoder/ctc_decoders/scorer.cpp
 create mode 100644 speechx/speechx/asr/decoder/ctc_decoders/scorer.h
 create mode 100644 speechx/speechx/asr/decoder/ctc_decoders/setup.py
 create mode 100755 speechx/speechx/asr/decoder/ctc_decoders/setup.sh
 rename speechx/speechx/{ => asr}/decoder/ctc_prefix_beam_search_decoder.cc (99%)
 rename speechx/speechx/{ => asr}/decoder/ctc_prefix_beam_search_decoder.h (100%)
 rename speechx/speechx/{ => asr}/decoder/ctc_prefix_beam_search_decoder_main.cc (100%)
 rename speechx/speechx/{ => asr}/decoder/ctc_prefix_beam_search_score.h (100%)
 rename speechx/speechx/{ => asr}/decoder/ctc_tlg_decoder.cc (100%)
 rename speechx/speechx/{ => asr}/decoder/ctc_tlg_decoder.h (100%)
 rename speechx/speechx/{ => asr}/decoder/ctc_tlg_decoder_main.cc (100%)
 rename speechx/speechx/{ => asr}/decoder/decoder_itf.h (100%)
 rename speechx/speechx/{ => asr}/decoder/nnet_logprob_decoder_main.cc (100%)
 rename speechx/speechx/{ => asr}/decoder/param.h (100%)
 rename speechx/speechx/{ => asr}/nnet/CMakeLists.txt (100%)
 rename speechx/speechx/{ => asr}/nnet/decodable.cc (100%)
 rename speechx/speechx/{ => asr}/nnet/decodable.h (100%)
 rename speechx/speechx/{ => asr}/nnet/ds2_nnet.cc (100%)
 rename speechx/speechx/{ => asr}/nnet/ds2_nnet.h (100%)
 rename speechx/speechx/{ => asr}/nnet/ds2_nnet_main.cc (100%)
 rename speechx/speechx/{ => asr}/nnet/nnet_itf.h (100%)
 rename speechx/speechx/{ => asr}/nnet/u2_nnet.cc (100%)
 rename speechx/speechx/{ => asr}/nnet/u2_nnet.h (100%)
 rename speechx/speechx/{ => asr}/nnet/u2_nnet_main.cc (100%)
 rename speechx/speechx/{ => asr}/recognizer/CMakeLists.txt (100%)
 rename speechx/speechx/{ => asr}/recognizer/recognizer.cc (100%)
 rename speechx/speechx/{ => asr}/recognizer/recognizer.h (100%)
 rename speechx/speechx/{ => asr}/recognizer/recognizer_main.cc (100%)
 rename speechx/speechx/{ => asr}/recognizer/u2_recognizer.cc (100%)
 rename speechx/speechx/{ => asr}/recognizer/u2_recognizer.h (100%)
 rename speechx/speechx/{ => asr}/recognizer/u2_recognizer_main.cc (100%)
 rename speechx/speechx/{protocol => asr/server}/CMakeLists.txt (100%)
 rename speechx/speechx/{protocol => asr/server}/websocket/CMakeLists.txt (100%)
 rename speechx/speechx/{protocol => asr/server}/websocket/websocket_client.cc (100%)
 rename speechx/speechx/{protocol => asr/server}/websocket/websocket_client.h (100%)
 rename speechx/speechx/{protocol => asr/server}/websocket/websocket_client_main.cc (100%)
 rename speechx/speechx/{protocol => asr/server}/websocket/websocket_server.cc (100%)
 rename speechx/speechx/{protocol => asr/server}/websocket/websocket_server.h (100%)
 rename speechx/speechx/{protocol => asr/server}/websocket/websocket_server_main.cc (100%)
 create mode 100644 speechx/speechx/common/CMakeLists.txt
 rename speechx/speechx/{ => common}/base/basic_types.h (100%)
 rename speechx/speechx/{ => common}/base/common.h (100%)
 rename speechx/speechx/{ => common}/base/flags.h (100%)
 rename speechx/speechx/{ => common}/base/log.h (100%)
 rename speechx/speechx/{ => common}/base/macros.h (100%)
 rename speechx/speechx/{ => common}/base/thread_pool.h (100%)
 rename speechx/speechx/{ => common}/frontend/CMakeLists.txt (100%)
 rename speechx/speechx/{ => common}/frontend/audio/CMakeLists.txt (100%)
 rename speechx/speechx/{ => common}/frontend/audio/assembler.cc (100%)
 rename speechx/speechx/{ => common}/frontend/audio/assembler.h (100%)
 rename speechx/speechx/{ => common}/frontend/audio/audio_cache.cc (100%)
 rename speechx/speechx/{ => common}/frontend/audio/audio_cache.h (100%)
 rename speechx/speechx/{ => common}/frontend/audio/cmvn.cc (100%)
 rename speechx/speechx/{ => common}/frontend/audio/cmvn.h (100%)
 rename speechx/speechx/{ => common}/frontend/audio/cmvn_json2kaldi_main.cc (100%)
 rename speechx/speechx/{ => common}/frontend/audio/compute_fbank_main.cc (100%)
 rename speechx/speechx/{ => common}/frontend/audio/compute_linear_spectrogram_main.cc (100%)
 rename speechx/speechx/{ => common}/frontend/audio/data_cache.h (100%)
 rename speechx/speechx/{ => common}/frontend/audio/db_norm.cc (100%)
 rename speechx/speechx/{ => common}/frontend/audio/db_norm.h (100%)
 rename speechx/speechx/{ => common}/frontend/audio/fbank.cc (100%)
 rename speechx/speechx/{ => common}/frontend/audio/fbank.h (100%)
 rename speechx/speechx/{ => common}/frontend/audio/feature_cache.cc (100%)
 rename speechx/speechx/{ => common}/frontend/audio/feature_cache.h (100%)
 rename speechx/speechx/{ => common}/frontend/audio/feature_common.h (100%)
 rename speechx/speechx/{ => common}/frontend/audio/feature_common_inl.h (100%)
 rename speechx/speechx/{ => common}/frontend/audio/feature_pipeline.cc (100%)
 rename speechx/speechx/{ => common}/frontend/audio/feature_pipeline.h (100%)
 rename speechx/speechx/{ => common}/frontend/audio/frontend_itf.h (100%)
 rename speechx/speechx/{ => common}/frontend/audio/linear_spectrogram.cc (100%)
 rename speechx/speechx/{ => common}/frontend/audio/linear_spectrogram.h (100%)
 rename speechx/speechx/{ => common}/frontend/audio/mfcc.cc (100%)
 rename speechx/speechx/{ => common}/frontend/audio/mfcc.h (100%)
 rename speechx/speechx/{ => common}/frontend/audio/normalizer.h (100%)
 rename speechx/speechx/{ => common}/utils/CMakeLists.txt (100%)
 rename speechx/speechx/{ => common}/utils/file_utils.cc (100%)
 rename speechx/speechx/{ => common}/utils/file_utils.h (100%)
 rename speechx/speechx/{ => common}/utils/math.cc (100%)
 rename speechx/speechx/{ => common}/utils/math.h (100%)
 delete mode 120000 speechx/speechx/decoder/ctc_decoders
 delete mode 100644 speechx/speechx/frontend/text/CMakeLists.txt
 delete mode 100644 speechx/speechx/third_party/CMakeLists.txt
 delete mode 100644 speechx/speechx/third_party/README.md

diff --git a/speechx/requirement.txt b/speechx/requirement.txt
deleted file mode 100644
index 6a6db09603f..00000000000
--- a/speechx/requirement.txt
+++ /dev/null
@@ -1 +0,0 @@
-paddlepaddle>=2.4rc
diff --git a/speechx/speechx/CMakeLists.txt b/speechx/speechx/CMakeLists.txt
index 60c183472ba..b522e158c81 100644
--- a/speechx/speechx/CMakeLists.txt
+++ b/speechx/speechx/CMakeLists.txt
@@ -2,50 +2,11 @@ cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
 
 project(speechx LANGUAGES CXX)
 
-include_directories(
-${CMAKE_CURRENT_SOURCE_DIR}
-${CMAKE_CURRENT_SOURCE_DIR}/kaldi
-)
-add_subdirectory(kaldi)
-
-include_directories(
-${CMAKE_CURRENT_SOURCE_DIR}
-${CMAKE_CURRENT_SOURCE_DIR}/utils
-)
-add_subdirectory(utils)
-
-include_directories(
-${CMAKE_CURRENT_SOURCE_DIR}
-${CMAKE_CURRENT_SOURCE_DIR}/frontend
-)
-add_subdirectory(frontend)
-
-include_directories(
-${CMAKE_CURRENT_SOURCE_DIR}
-${CMAKE_CURRENT_SOURCE_DIR}/nnet
-)
-add_subdirectory(nnet)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/kaldi)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/common)
 
-include_directories(
-${CMAKE_CURRENT_SOURCE_DIR}
-${CMAKE_CURRENT_SOURCE_DIR}/decoder
-)
-add_subdirectory(decoder)
-
-include_directories(
-${CMAKE_CURRENT_SOURCE_DIR}
-${CMAKE_CURRENT_SOURCE_DIR}/recognizer
-)
-add_subdirectory(recognizer)
-
-include_directories(
-${CMAKE_CURRENT_SOURCE_DIR}
-${CMAKE_CURRENT_SOURCE_DIR}/protocol
-)
-add_subdirectory(protocol)
-
-include_directories(
-${CMAKE_CURRENT_SOURCE_DIR}
-${CMAKE_CURRENT_SOURCE_DIR}/codelab
-)
+add_subdirectory(asr)
+add_subdirectory(common)
+add_subdirectory(kaldi)
 add_subdirectory(codelab)
diff --git a/speechx/speechx/asr/CMakeLists.txt b/speechx/speechx/asr/CMakeLists.txt
new file mode 100644
index 00000000000..ff4cdecbe38
--- /dev/null
+++ b/speechx/speechx/asr/CMakeLists.txt
@@ -0,0 +1,11 @@
+cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
+
+project(ASR LANGUAGES CXX)
+
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/server)
+
+add_subdirectory(decoder)
+add_subdirectory(recognizer)
+add_subdirectory(nnet)
+add_subdirectory(server)
diff --git a/speechx/speechx/decoder/CMakeLists.txt b/speechx/speechx/asr/decoder/CMakeLists.txt
similarity index 100%
rename from speechx/speechx/decoder/CMakeLists.txt
rename to speechx/speechx/asr/decoder/CMakeLists.txt
diff --git a/speechx/speechx/decoder/common.h b/speechx/speechx/asr/decoder/common.h
similarity index 100%
rename from speechx/speechx/decoder/common.h
rename to speechx/speechx/asr/decoder/common.h
diff --git a/speechx/speechx/decoder/ctc_beam_search_decoder.cc b/speechx/speechx/asr/decoder/ctc_beam_search_decoder.cc
similarity index 100%
rename from speechx/speechx/decoder/ctc_beam_search_decoder.cc
rename to speechx/speechx/asr/decoder/ctc_beam_search_decoder.cc
diff --git a/speechx/speechx/decoder/ctc_beam_search_decoder.h b/speechx/speechx/asr/decoder/ctc_beam_search_decoder.h
similarity index 100%
rename from speechx/speechx/decoder/ctc_beam_search_decoder.h
rename to speechx/speechx/asr/decoder/ctc_beam_search_decoder.h
diff --git a/speechx/speechx/decoder/ctc_beam_search_decoder_main.cc b/speechx/speechx/asr/decoder/ctc_beam_search_decoder_main.cc
similarity index 100%
rename from speechx/speechx/decoder/ctc_beam_search_decoder_main.cc
rename to speechx/speechx/asr/decoder/ctc_beam_search_decoder_main.cc
diff --git a/speechx/speechx/decoder/ctc_beam_search_opt.h b/speechx/speechx/asr/decoder/ctc_beam_search_opt.h
similarity index 100%
rename from speechx/speechx/decoder/ctc_beam_search_opt.h
rename to speechx/speechx/asr/decoder/ctc_beam_search_opt.h
diff --git a/speechx/speechx/asr/decoder/ctc_decoders/.gitignore b/speechx/speechx/asr/decoder/ctc_decoders/.gitignore
new file mode 100644
index 00000000000..0b1046ae8a4
--- /dev/null
+++ b/speechx/speechx/asr/decoder/ctc_decoders/.gitignore
@@ -0,0 +1,9 @@
+ThreadPool/
+build/
+dist/
+kenlm/
+openfst-1.6.3/
+openfst-1.6.3.tar.gz
+swig_decoders.egg-info/
+decoders_wrap.cxx
+swig_decoders.py
diff --git a/speechx/speechx/asr/decoder/ctc_decoders/COPYING.APACHE2.0 b/speechx/speechx/asr/decoder/ctc_decoders/COPYING.APACHE2.0
new file mode 100644
index 00000000000..261eeb9e9f8
--- /dev/null
+++ b/speechx/speechx/asr/decoder/ctc_decoders/COPYING.APACHE2.0
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/speechx/speechx/asr/decoder/ctc_decoders/COPYING.LESSER.3 b/speechx/speechx/asr/decoder/ctc_decoders/COPYING.LESSER.3
new file mode 100644
index 00000000000..cca7fc278f5
--- /dev/null
+++ b/speechx/speechx/asr/decoder/ctc_decoders/COPYING.LESSER.3
@@ -0,0 +1,165 @@
+		   GNU LESSER GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+
+  This version of the GNU Lesser General Public License incorporates
+the terms and conditions of version 3 of the GNU General Public
+License, supplemented by the additional permissions listed below.
+
+  0. Additional Definitions.
+
+  As used herein, "this License" refers to version 3 of the GNU Lesser
+General Public License, and the "GNU GPL" refers to version 3 of the GNU
+General Public License.
+
+  "The Library" refers to a covered work governed by this License,
+other than an Application or a Combined Work as defined below.
+
+  An "Application" is any work that makes use of an interface provided
+by the Library, but which is not otherwise based on the Library.
+Defining a subclass of a class defined by the Library is deemed a mode
+of using an interface provided by the Library.
+
+  A "Combined Work" is a work produced by combining or linking an
+Application with the Library.  The particular version of the Library
+with which the Combined Work was made is also called the "Linked
+Version".
+
+  The "Minimal Corresponding Source" for a Combined Work means the
+Corresponding Source for the Combined Work, excluding any source code
+for portions of the Combined Work that, considered in isolation, are
+based on the Application, and not on the Linked Version.
+
+  The "Corresponding Application Code" for a Combined Work means the
+object code and/or source code for the Application, including any data
+and utility programs needed for reproducing the Combined Work from the
+Application, but excluding the System Libraries of the Combined Work.
+
+  1. Exception to Section 3 of the GNU GPL.
+
+  You may convey a covered work under sections 3 and 4 of this License
+without being bound by section 3 of the GNU GPL.
+
+  2. Conveying Modified Versions.
+
+  If you modify a copy of the Library, and, in your modifications, a
+facility refers to a function or data to be supplied by an Application
+that uses the facility (other than as an argument passed when the
+facility is invoked), then you may convey a copy of the modified
+version:
+
+   a) under this License, provided that you make a good faith effort to
+   ensure that, in the event an Application does not supply the
+   function or data, the facility still operates, and performs
+   whatever part of its purpose remains meaningful, or
+
+   b) under the GNU GPL, with none of the additional permissions of
+   this License applicable to that copy.
+
+  3. Object Code Incorporating Material from Library Header Files.
+
+  The object code form of an Application may incorporate material from
+a header file that is part of the Library.  You may convey such object
+code under terms of your choice, provided that, if the incorporated
+material is not limited to numerical parameters, data structure
+layouts and accessors, or small macros, inline functions and templates
+(ten or fewer lines in length), you do both of the following:
+
+   a) Give prominent notice with each copy of the object code that the
+   Library is used in it and that the Library and its use are
+   covered by this License.
+
+   b) Accompany the object code with a copy of the GNU GPL and this license
+   document.
+
+  4. Combined Works.
+
+  You may convey a Combined Work under terms of your choice that,
+taken together, effectively do not restrict modification of the
+portions of the Library contained in the Combined Work and reverse
+engineering for debugging such modifications, if you also do each of
+the following:
+
+   a) Give prominent notice with each copy of the Combined Work that
+   the Library is used in it and that the Library and its use are
+   covered by this License.
+
+   b) Accompany the Combined Work with a copy of the GNU GPL and this license
+   document.
+
+   c) For a Combined Work that displays copyright notices during
+   execution, include the copyright notice for the Library among
+   these notices, as well as a reference directing the user to the
+   copies of the GNU GPL and this license document.
+
+   d) Do one of the following:
+
+       0) Convey the Minimal Corresponding Source under the terms of this
+       License, and the Corresponding Application Code in a form
+       suitable for, and under terms that permit, the user to
+       recombine or relink the Application with a modified version of
+       the Linked Version to produce a modified Combined Work, in the
+       manner specified by section 6 of the GNU GPL for conveying
+       Corresponding Source.
+
+       1) Use a suitable shared library mechanism for linking with the
+       Library.  A suitable mechanism is one that (a) uses at run time
+       a copy of the Library already present on the user's computer
+       system, and (b) will operate properly with a modified version
+       of the Library that is interface-compatible with the Linked
+       Version.
+
+   e) Provide Installation Information, but only if you would otherwise
+   be required to provide such information under section 6 of the
+   GNU GPL, and only to the extent that such information is
+   necessary to install and execute a modified version of the
+   Combined Work produced by recombining or relinking the
+   Application with a modified version of the Linked Version. (If
+   you use option 4d0, the Installation Information must accompany
+   the Minimal Corresponding Source and Corresponding Application
+   Code. If you use option 4d1, you must provide the Installation
+   Information in the manner specified by section 6 of the GNU GPL
+   for conveying Corresponding Source.)
+
+  5. Combined Libraries.
+
+  You may place library facilities that are a work based on the
+Library side by side in a single library together with other library
+facilities that are not Applications and are not covered by this
+License, and convey such a combined library under terms of your
+choice, if you do both of the following:
+
+   a) Accompany the combined library with a copy of the same work based
+   on the Library, uncombined with any other library facilities,
+   conveyed under the terms of this License.
+
+   b) Give prominent notice with the combined library that part of it
+   is a work based on the Library, and explaining where to find the
+   accompanying uncombined form of the same work.
+
+  6. Revised Versions of the GNU Lesser General Public License.
+
+  The Free Software Foundation may publish revised and/or new versions
+of the GNU Lesser General Public License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns.
+
+  Each version is given a distinguishing version number. If the
+Library as you received it specifies that a certain numbered version
+of the GNU Lesser General Public License "or any later version"
+applies to it, you have the option of following the terms and
+conditions either of that published version or of any later version
+published by the Free Software Foundation. If the Library as you
+received it does not specify a version number of the GNU Lesser
+General Public License, you may choose any version of the GNU Lesser
+General Public License ever published by the Free Software Foundation.
+
+  If the Library as you received it specifies that a proxy can decide
+whether future versions of the GNU Lesser General Public License shall
+apply, that proxy's public statement of acceptance of any version is
+permanent authorization for you to choose that version for the
+Library.
diff --git a/speechx/speechx/asr/decoder/ctc_decoders/LICENSE b/speechx/speechx/asr/decoder/ctc_decoders/LICENSE
new file mode 100644
index 00000000000..ad947f8d756
--- /dev/null
+++ b/speechx/speechx/asr/decoder/ctc_decoders/LICENSE
@@ -0,0 +1,8 @@
+Most of the code here is licensed under the Apache License 2.0.  
+There are exceptions that have their own licenses, listed below.  
+
+score.h and score.cpp is under the LGPL license. 
+The two files include the header files from KenLM project.
+
+For the rest:
+The default license of paddlespeech-ctcdecoders is Apache License 2.0.
diff --git a/speechx/speechx/asr/decoder/ctc_decoders/__init__.py b/speechx/speechx/asr/decoder/ctc_decoders/__init__.py
new file mode 100644
index 00000000000..185a92b8d94
--- /dev/null
+++ b/speechx/speechx/asr/decoder/ctc_decoders/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/speechx/speechx/asr/decoder/ctc_decoders/ctc_beam_search_decoder.cpp b/speechx/speechx/asr/decoder/ctc_decoders/ctc_beam_search_decoder.cpp
new file mode 100644
index 00000000000..ebea5c222a3
--- /dev/null
+++ b/speechx/speechx/asr/decoder/ctc_decoders/ctc_beam_search_decoder.cpp
@@ -0,0 +1,607 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ctc_beam_search_decoder.h"
+
+#include <algorithm>
+#include <cmath>
+#include <iostream>
+#include <limits>
+#include <map>
+#include <utility>
+
+#include "ThreadPool.h"
+#include "fst/fstlib.h"
+
+#include "decoder_utils.h"
+#include "path_trie.h"
+
+using FSTMATCH = fst::SortedMatcher<fst::StdVectorFst>;
+
+
+std::vector<std::pair<double, std::string>> ctc_beam_search_decoding(
+    const std::vector<std::vector<double>> &probs_seq,
+    const std::vector<std::string> &vocabulary,
+    size_t beam_size,
+    double cutoff_prob,
+    size_t cutoff_top_n,
+    Scorer *ext_scorer,
+    size_t blank_id) {
+    // dimension check
+    size_t num_time_steps = probs_seq.size();
+    for (size_t i = 0; i < num_time_steps; ++i) {
+        VALID_CHECK_EQ(probs_seq[i].size(),
+                       // vocabulary.size() + 1,
+                       vocabulary.size(),
+                       "The shape of probs_seq does not match with "
+                       "the shape of the vocabulary");
+    }
+
+
+    // assign space id
+    auto it = std::find(vocabulary.begin(), vocabulary.end(), kSPACE);
+    int space_id = it - vocabulary.begin();
+    // if no space in vocabulary
+    if ((size_t)space_id >= vocabulary.size()) {
+        space_id = -2;
+    }
+    // init prefixes' root
+    PathTrie root;
+    root.score = root.log_prob_b_prev = 0.0;
+    std::vector<PathTrie *> prefixes;
+    prefixes.push_back(&root);
+
+    if (ext_scorer != nullptr && !ext_scorer->is_character_based()) {
+        auto fst_dict =
+            static_cast<fst::StdVectorFst *>(ext_scorer->dictionary);
+        fst::StdVectorFst *dict_ptr = fst_dict->Copy(true);
+        root.set_dictionary(dict_ptr);
+        auto matcher = std::make_shared<FSTMATCH>(*dict_ptr, fst::MATCH_INPUT);
+        root.set_matcher(matcher);
+    }
+
+    // prefix search over time
+    for (size_t time_step = 0; time_step < num_time_steps; ++time_step) {
+        auto &prob = probs_seq[time_step];
+
+        float min_cutoff = -NUM_FLT_INF;
+        bool full_beam = false;
+        if (ext_scorer != nullptr) {
+            size_t num_prefixes = std::min(prefixes.size(), beam_size);
+            std::sort(prefixes.begin(),
+                      prefixes.begin() + num_prefixes,
+                      prefix_compare);
+            min_cutoff = prefixes[num_prefixes - 1]->score +
+                         std::log(prob[blank_id]) -
+                         std::max(0.0, ext_scorer->beta);
+            full_beam = (num_prefixes == beam_size);
+        }
+
+        std::vector<std::pair<size_t, float>> log_prob_idx =
+            get_pruned_log_probs(prob, cutoff_prob, cutoff_top_n);
+        // loop over chars
+        for (size_t index = 0; index < log_prob_idx.size(); index++) {
+            auto c = log_prob_idx[index].first;
+            auto log_prob_c = log_prob_idx[index].second;
+
+            for (size_t i = 0; i < prefixes.size() && i < beam_size; ++i) {
+                auto prefix = prefixes[i];
+                if (full_beam && log_prob_c + prefix->score < min_cutoff) {
+                    break;
+                }
+                // blank
+                if (c == blank_id) {
+                    prefix->log_prob_b_cur = log_sum_exp(
+                        prefix->log_prob_b_cur, log_prob_c + prefix->score);
+                    continue;
+                }
+                // repeated character
+                if (c == prefix->character) {
+                    prefix->log_prob_nb_cur =
+                        log_sum_exp(prefix->log_prob_nb_cur,
+                                    log_prob_c + prefix->log_prob_nb_prev);
+                }
+                // get new prefix
+                auto prefix_new = prefix->get_path_trie(c);
+
+                if (prefix_new != nullptr) {
+                    float log_p = -NUM_FLT_INF;
+
+                    if (c == prefix->character &&
+                        prefix->log_prob_b_prev > -NUM_FLT_INF) {
+                        log_p = log_prob_c + prefix->log_prob_b_prev;
+                    } else if (c != prefix->character) {
+                        log_p = log_prob_c + prefix->score;
+                    }
+
+                    // language model scoring
+                    if (ext_scorer != nullptr &&
+                        (c == space_id || ext_scorer->is_character_based())) {
+                        PathTrie *prefix_to_score = nullptr;
+                        // skip scoring the space
+                        if (ext_scorer->is_character_based()) {
+                            prefix_to_score = prefix_new;
+                        } else {
+                            prefix_to_score = prefix;
+                        }
+
+                        float score = 0.0;
+                        std::vector<std::string> ngram;
+                        ngram = ext_scorer->make_ngram(prefix_to_score);
+                        score = ext_scorer->get_log_cond_prob(ngram) *
+                                ext_scorer->alpha;
+                        log_p += score;
+                        log_p += ext_scorer->beta;
+                    }
+                    prefix_new->log_prob_nb_cur =
+                        log_sum_exp(prefix_new->log_prob_nb_cur, log_p);
+                }
+            }  // end of loop over prefix
+        }      // end of loop over vocabulary
+
+
+        prefixes.clear();
+        // update log probs
+        root.iterate_to_vec(prefixes);
+
+        // only preserve top beam_size prefixes
+        if (prefixes.size() >= beam_size) {
+            std::nth_element(prefixes.begin(),
+                             prefixes.begin() + beam_size,
+                             prefixes.end(),
+                             prefix_compare);
+            for (size_t i = beam_size; i < prefixes.size(); ++i) {
+                prefixes[i]->remove();
+            }
+        }
+    }  // end of loop over time
+
+    // score the last word of each prefix that doesn't end with space
+    if (ext_scorer != nullptr && !ext_scorer->is_character_based()) {
+        for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
+            auto prefix = prefixes[i];
+            if (!prefix->is_empty() && prefix->character != space_id) {
+                float score = 0.0;
+                std::vector<std::string> ngram = ext_scorer->make_ngram(prefix);
+                score =
+                    ext_scorer->get_log_cond_prob(ngram) * ext_scorer->alpha;
+                score += ext_scorer->beta;
+                prefix->score += score;
+            }
+        }
+    }
+
+    size_t num_prefixes = std::min(prefixes.size(), beam_size);
+    std::sort(
+        prefixes.begin(), prefixes.begin() + num_prefixes, prefix_compare);
+
+    // compute approximate ctc score as the return score, without affecting the
+    // return order of decoding result. To delete when decoder gets stable.
+    for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
+        double approx_ctc = prefixes[i]->score;
+        if (ext_scorer != nullptr) {
+            std::vector<int> output;
+            prefixes[i]->get_path_vec(output);
+            auto prefix_length = output.size();
+            auto words = ext_scorer->split_labels(output);
+            // remove word insert
+            approx_ctc = approx_ctc - prefix_length * ext_scorer->beta;
+            // remove language model weight:
+            approx_ctc -=
+                (ext_scorer->get_sent_log_prob(words)) * ext_scorer->alpha;
+        }
+        prefixes[i]->approx_ctc = approx_ctc;
+    }
+
+    return get_beam_search_result(prefixes, vocabulary, beam_size);
+}
+
+
+std::vector<std::vector<std::pair<double, std::string>>>
+ctc_beam_search_decoding_batch(
+    const std::vector<std::vector<std::vector<double>>> &probs_split,
+    const std::vector<std::string> &vocabulary,
+    size_t beam_size,
+    size_t num_processes,
+    double cutoff_prob,
+    size_t cutoff_top_n,
+    Scorer *ext_scorer,
+    size_t blank_id) {
+    VALID_CHECK_GT(num_processes, 0, "num_processes must be nonnegative!");
+    // thread pool
+    ThreadPool pool(num_processes);
+    // number of samples
+    size_t batch_size = probs_split.size();
+
+    // enqueue the tasks of decoding
+    std::vector<std::future<std::vector<std::pair<double, std::string>>>> res;
+    for (size_t i = 0; i < batch_size; ++i) {
+        res.emplace_back(pool.enqueue(ctc_beam_search_decoding,
+                                      probs_split[i],
+                                      vocabulary,
+                                      beam_size,
+                                      cutoff_prob,
+                                      cutoff_top_n,
+                                      ext_scorer,
+                                      blank_id));
+    }
+
+    // get decoding results
+    std::vector<std::vector<std::pair<double, std::string>>> batch_results;
+    for (size_t i = 0; i < batch_size; ++i) {
+        batch_results.emplace_back(res[i].get());
+    }
+    return batch_results;
+}
+
+void ctc_beam_search_decode_chunk_begin(PathTrie *root, Scorer *ext_scorer) {
+    if (ext_scorer != nullptr && !ext_scorer->is_character_based()) {
+        auto fst_dict =
+            static_cast<fst::StdVectorFst *>(ext_scorer->dictionary);
+        fst::StdVectorFst *dict_ptr = fst_dict->Copy(true);
+        root->set_dictionary(dict_ptr);
+        auto matcher = std::make_shared<FSTMATCH>(*dict_ptr, fst::MATCH_INPUT);
+        root->set_matcher(matcher);
+    }
+}
+
+void ctc_beam_search_decode_chunk(
+    PathTrie *root,
+    std::vector<PathTrie *> &prefixes,
+    const std::vector<std::vector<double>> &probs_seq,
+    const std::vector<std::string> &vocabulary,
+    size_t beam_size,
+    double cutoff_prob,
+    size_t cutoff_top_n,
+    Scorer *ext_scorer,
+    size_t blank_id) {
+    // dimension check
+    size_t num_time_steps = probs_seq.size();
+    for (size_t i = 0; i < num_time_steps; ++i) {
+        VALID_CHECK_EQ(probs_seq[i].size(),
+                       // vocabulary.size() + 1,
+                       vocabulary.size(),
+                       "The shape of probs_seq does not match with "
+                       "the shape of the vocabulary");
+    }
+
+    // assign space id
+    auto it = std::find(vocabulary.begin(), vocabulary.end(), kSPACE);
+    int space_id = it - vocabulary.begin();
+    // if no space in vocabulary
+    if ((size_t)space_id >= vocabulary.size()) {
+        space_id = -2;
+    }
+    // init prefixes' root
+    //
+    // prefix search over time
+    for (size_t time_step = 0; time_step < num_time_steps; ++time_step) {
+        auto &prob = probs_seq[time_step];
+
+        float min_cutoff = -NUM_FLT_INF;
+        bool full_beam = false;
+        if (ext_scorer != nullptr) {
+            size_t num_prefixes = std::min(prefixes.size(), beam_size);
+            std::sort(prefixes.begin(),
+                      prefixes.begin() + num_prefixes,
+                      prefix_compare);
+            min_cutoff = prefixes[num_prefixes - 1]->score +
+                         std::log(prob[blank_id]) -
+                         std::max(0.0, ext_scorer->beta);
+            full_beam = (num_prefixes == beam_size);
+        }
+
+        std::vector<std::pair<size_t, float>> log_prob_idx =
+            get_pruned_log_probs(prob, cutoff_prob, cutoff_top_n);
+        // loop over chars
+        for (size_t index = 0; index < log_prob_idx.size(); index++) {
+            auto c = log_prob_idx[index].first;
+            auto log_prob_c = log_prob_idx[index].second;
+
+            for (size_t i = 0; i < prefixes.size() && i < beam_size; ++i) {
+                auto prefix = prefixes[i];
+                if (full_beam && log_prob_c + prefix->score < min_cutoff) {
+                    break;
+                }
+                // blank
+                if (c == blank_id) {
+                    prefix->log_prob_b_cur = log_sum_exp(
+                        prefix->log_prob_b_cur, log_prob_c + prefix->score);
+                    continue;
+                }
+                // repeated character
+                if (c == prefix->character) {
+                    prefix->log_prob_nb_cur =
+                        log_sum_exp(prefix->log_prob_nb_cur,
+                                    log_prob_c + prefix->log_prob_nb_prev);
+                }
+                // get new prefix
+                auto prefix_new = prefix->get_path_trie(c);
+
+                if (prefix_new != nullptr) {
+                    float log_p = -NUM_FLT_INF;
+
+                    if (c == prefix->character &&
+                        prefix->log_prob_b_prev > -NUM_FLT_INF) {
+                        log_p = log_prob_c + prefix->log_prob_b_prev;
+                    } else if (c != prefix->character) {
+                        log_p = log_prob_c + prefix->score;
+                    }
+
+                    // language model scoring
+                    if (ext_scorer != nullptr &&
+                        (c == space_id || ext_scorer->is_character_based())) {
+                        PathTrie *prefix_to_score = nullptr;
+                        // skip scoring the space
+                        if (ext_scorer->is_character_based()) {
+                            prefix_to_score = prefix_new;
+                        } else {
+                            prefix_to_score = prefix;
+                        }
+
+                        float score = 0.0;
+                        std::vector<std::string> ngram;
+                        ngram = ext_scorer->make_ngram(prefix_to_score);
+                        score = ext_scorer->get_log_cond_prob(ngram) *
+                                ext_scorer->alpha;
+                        log_p += score;
+                        log_p += ext_scorer->beta;
+                    }
+                    prefix_new->log_prob_nb_cur =
+                        log_sum_exp(prefix_new->log_prob_nb_cur, log_p);
+                }
+            }  // end of loop over prefix
+        }      // end of loop over vocabulary
+
+        prefixes.clear();
+        // update log probs
+
+        root->iterate_to_vec(prefixes);
+
+        // only preserve top beam_size prefixes
+        if (prefixes.size() >= beam_size) {
+            std::nth_element(prefixes.begin(),
+                             prefixes.begin() + beam_size,
+                             prefixes.end(),
+                             prefix_compare);
+            for (size_t i = beam_size; i < prefixes.size(); ++i) {
+                prefixes[i]->remove();
+            }
+        }
+    }  // end of loop over time
+
+    return;
+}
+
+
+std::vector<std::pair<double, std::string>> get_decode_result(
+    std::vector<PathTrie *> &prefixes,
+    const std::vector<std::string> &vocabulary,
+    size_t beam_size,
+    Scorer *ext_scorer) {
+    auto it = std::find(vocabulary.begin(), vocabulary.end(), kSPACE);
+    int space_id = it - vocabulary.begin();
+    // if no space in vocabulary
+    if ((size_t)space_id >= vocabulary.size()) {
+        space_id = -2;
+    }
+    // score the last word of each prefix that doesn't end with space
+    if (ext_scorer != nullptr && !ext_scorer->is_character_based()) {
+        for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
+            auto prefix = prefixes[i];
+            if (!prefix->is_empty() && prefix->character != space_id) {
+                float score = 0.0;
+                std::vector<std::string> ngram = ext_scorer->make_ngram(prefix);
+                score =
+                    ext_scorer->get_log_cond_prob(ngram) * ext_scorer->alpha;
+                score += ext_scorer->beta;
+                prefix->score += score;
+            }
+        }
+    }
+
+    size_t num_prefixes = std::min(prefixes.size(), beam_size);
+    std::sort(
+        prefixes.begin(), prefixes.begin() + num_prefixes, prefix_compare);
+
+    // compute aproximate ctc score as the return score, without affecting the
+    // return order of decoding result. To delete when decoder gets stable.
+    for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
+        double approx_ctc = prefixes[i]->score;
+        if (ext_scorer != nullptr) {
+            std::vector<int> output;
+            prefixes[i]->get_path_vec(output);
+            auto prefix_length = output.size();
+            auto words = ext_scorer->split_labels(output);
+            // remove word insert
+            approx_ctc = approx_ctc - prefix_length * ext_scorer->beta;
+            // remove language model weight:
+            approx_ctc -=
+                (ext_scorer->get_sent_log_prob(words)) * ext_scorer->alpha;
+        }
+        prefixes[i]->approx_ctc = approx_ctc;
+    }
+
+    std::vector<std::pair<double, std::string>> res =
+        get_beam_search_result(prefixes, vocabulary, beam_size);
+
+    // pay back the last word of each prefix that doesn't end with space (for
+    // decoding by chunk)
+    if (ext_scorer != nullptr && !ext_scorer->is_character_based()) {
+        for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
+            auto prefix = prefixes[i];
+            if (!prefix->is_empty() && prefix->character != space_id) {
+                float score = 0.0;
+                std::vector<std::string> ngram = ext_scorer->make_ngram(prefix);
+                score =
+                    ext_scorer->get_log_cond_prob(ngram) * ext_scorer->alpha;
+                score += ext_scorer->beta;
+                prefix->score -= score;
+            }
+        }
+    }
+    return res;
+}
+
+
+void free_storage(std::unique_ptr<CtcBeamSearchDecoderStorage> &storage) {
+    storage = nullptr;
+}
+
+
+CtcBeamSearchDecoderBatch::~CtcBeamSearchDecoderBatch() {}
+
+CtcBeamSearchDecoderBatch::CtcBeamSearchDecoderBatch(
+    const std::vector<std::string> &vocabulary,
+    size_t batch_size,
+    size_t beam_size,
+    size_t num_processes,
+    double cutoff_prob,
+    size_t cutoff_top_n,
+    Scorer *ext_scorer,
+    size_t blank_id)
+    : batch_size(batch_size),
+      beam_size(beam_size),
+      num_processes(num_processes),
+      cutoff_prob(cutoff_prob),
+      cutoff_top_n(cutoff_top_n),
+      ext_scorer(ext_scorer),
+      blank_id(blank_id) {
+    VALID_CHECK_GT(this->beam_size, 0, "beam_size must be greater than 0!");
+    VALID_CHECK_GT(
+        this->num_processes, 0, "num_processes must be nonnegative!");
+    this->vocabulary = vocabulary;
+    for (size_t i = 0; i < batch_size; i++) {
+        this->decoder_storage_vector.push_back(
+            std::unique_ptr<CtcBeamSearchDecoderStorage>(
+                new CtcBeamSearchDecoderStorage()));
+        ctc_beam_search_decode_chunk_begin(
+            this->decoder_storage_vector[i]->root, ext_scorer);
+    }
+};
+
+/**
+ * Input
+ * probs_split: shape [B, T, D]
+ */
+void CtcBeamSearchDecoderBatch::next(
+    const std::vector<std::vector<std::vector<double>>> &probs_split,
+    const std::vector<std::string> &has_value) {
+    VALID_CHECK_GT(num_processes, 0, "num_processes must be nonnegative!");
+    // thread pool
+    size_t num_has_value = 0;
+    for (int i = 0; i < has_value.size(); i++)
+        if (has_value[i] == "true") num_has_value += 1;
+    ThreadPool pool(std::min(num_processes, num_has_value));
+    // number of samples
+    size_t probs_num = probs_split.size();
+    VALID_CHECK_EQ(this->batch_size,
+                   probs_num,
+                   "The batch size of the current input data should be same "
+                   "with the input data before");
+
+    // enqueue the tasks of decoding
+    std::vector<std::future<void>> res;
+    for (size_t i = 0; i < batch_size; ++i) {
+        if (has_value[i] == "true") {
+            res.emplace_back(pool.enqueue(
+                ctc_beam_search_decode_chunk,
+                std::ref(this->decoder_storage_vector[i]->root),
+                std::ref(this->decoder_storage_vector[i]->prefixes),
+                probs_split[i],
+                this->vocabulary,
+                this->beam_size,
+                this->cutoff_prob,
+                this->cutoff_top_n,
+                this->ext_scorer,
+                this->blank_id));
+        }
+    }
+
+    for (size_t i = 0; i < batch_size; ++i) {
+        res[i].get();
+    }
+    return;
+};
+
+/**
+ * Return
+ * batch_result: shape[B, beam_size,(-approx_ctc score, string)]
+ */
+std::vector<std::vector<std::pair<double, std::string>>>
+CtcBeamSearchDecoderBatch::decode() {
+    VALID_CHECK_GT(
+        this->num_processes, 0, "num_processes must be nonnegative!");
+    // thread pool
+    ThreadPool pool(this->num_processes);
+    // number of samples
+    // enqueue the tasks of decoding
+    std::vector<std::future<std::vector<std::pair<double, std::string>>>> res;
+    for (size_t i = 0; i < this->batch_size; ++i) {
+        res.emplace_back(
+            pool.enqueue(get_decode_result,
+                         std::ref(this->decoder_storage_vector[i]->prefixes),
+                         this->vocabulary,
+                         this->beam_size,
+                         this->ext_scorer));
+    }
+    // get decoding results
+    std::vector<std::vector<std::pair<double, std::string>>> batch_results;
+    for (size_t i = 0; i < this->batch_size; ++i) {
+        batch_results.emplace_back(res[i].get());
+    }
+    return batch_results;
+}
+
+
+/**
+ * reset the state of ctcBeamSearchDecoderBatch
+ */
+void CtcBeamSearchDecoderBatch::reset_state(size_t batch_size,
+                                            size_t beam_size,
+                                            size_t num_processes,
+                                            double cutoff_prob,
+                                            size_t cutoff_top_n) {
+    this->batch_size = batch_size;
+    this->beam_size = beam_size;
+    this->num_processes = num_processes;
+    this->cutoff_prob = cutoff_prob;
+    this->cutoff_top_n = cutoff_top_n;
+
+    VALID_CHECK_GT(this->beam_size, 0, "beam_size must be greater than 0!");
+    VALID_CHECK_GT(
+        this->num_processes, 0, "num_processes must be nonnegative!");
+    // thread pool
+    ThreadPool pool(this->num_processes);
+    // number of samples
+    // enqueue the tasks of decoding
+    std::vector<std::future<void>> res;
+    size_t storage_size = decoder_storage_vector.size();
+    for (size_t i = 0; i < storage_size; i++) {
+        res.emplace_back(pool.enqueue(
+            free_storage, std::ref(this->decoder_storage_vector[i])));
+    }
+    for (size_t i = 0; i < storage_size; ++i) {
+        res[i].get();
+    }
+    std::vector<std::unique_ptr<CtcBeamSearchDecoderStorage>>().swap(
+        decoder_storage_vector);
+    for (size_t i = 0; i < this->batch_size; i++) {
+        this->decoder_storage_vector.push_back(
+            std::unique_ptr<CtcBeamSearchDecoderStorage>(
+                new CtcBeamSearchDecoderStorage()));
+        ctc_beam_search_decode_chunk_begin(
+            this->decoder_storage_vector[i]->root, this->ext_scorer);
+    }
+}
\ No newline at end of file
diff --git a/speechx/speechx/asr/decoder/ctc_decoders/ctc_beam_search_decoder.h b/speechx/speechx/asr/decoder/ctc_decoders/ctc_beam_search_decoder.h
new file mode 100644
index 00000000000..92d2b855fc3
--- /dev/null
+++ b/speechx/speechx/asr/decoder/ctc_decoders/ctc_beam_search_decoder.h
@@ -0,0 +1,175 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef CTC_BEAM_SEARCH_DECODER_H_
+#define CTC_BEAM_SEARCH_DECODER_H_
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "scorer.h"
+
+/* CTC Beam Search Decoder
+
+ * Parameters:
+ *     probs_seq: 2-D vector that each element is a vector of probabilities
+ *               over vocabulary of one time step.
+ *     vocabulary: A vector of vocabulary.
+ *     beam_size: The width of beam search.
+ *     cutoff_prob: Cutoff probability for pruning.
+ *     cutoff_top_n: Cutoff number for pruning.
+ *     ext_scorer: External scorer to evaluate a prefix, which consists of
+ *                 n-gram language model scoring and word insertion term.
+ *                 Default null, decoding the input sample without scorer.
+ * Return:
+ *     A vector that each element is a pair of score  and decoding result,
+ *     in desending order.
+*/
+std::vector<std::pair<double, std::string>> ctc_beam_search_decoding(
+    const std::vector<std::vector<double>> &probs_seq,
+    const std::vector<std::string> &vocabulary,
+    size_t beam_size,
+    double cutoff_prob = 1.0,
+    size_t cutoff_top_n = 40,
+    Scorer *ext_scorer = nullptr,
+    size_t blank_id = 0);
+
+
+/* CTC Beam Search Decoder for batch data
+
+ * Parameters:
+ *     probs_seq: 3-D vector that each element is a 2-D vector that can be used
+ *                by ctc_beam_search_decoder().
+ *     vocabulary: A vector of vocabulary.
+ *     beam_size: The width of beam search.
+ *     num_processes: Number of threads for beam search.
+ *     cutoff_prob: Cutoff probability for pruning.
+ *     cutoff_top_n: Cutoff number for pruning.
+ *     ext_scorer: External scorer to evaluate a prefix, which consists of
+ *                 n-gram language model scoring and word insertion term.
+ *                 Default null, decoding the input sample without scorer.
+ * Return:
+ *     A 2-D vector that each element is a vector of beam search decoding
+ *     result for one audio sample.
+*/
+std::vector<std::vector<std::pair<double, std::string>>>
+ctc_beam_search_decoding_batch(
+    const std::vector<std::vector<std::vector<double>>> &probs_split,
+    const std::vector<std::string> &vocabulary,
+    size_t beam_size,
+    size_t num_processes,
+    double cutoff_prob = 1.0,
+    size_t cutoff_top_n = 40,
+    Scorer *ext_scorer = nullptr,
+    size_t blank_id = 0);
+
+/**
+ * Store the root and prefixes for decoder
+ */
+
+class CtcBeamSearchDecoderStorage {
+  public:
+    PathTrie *root = nullptr;
+    std::vector<PathTrie *> prefixes;
+
+    CtcBeamSearchDecoderStorage() {
+        // init prefixes' root
+        this->root = new PathTrie();
+        this->root->log_prob_b_prev = 0.0;
+        // The score of root is in log scale.Since the prob=1.0, the prob score
+        // in log scale is 0.0
+        this->root->score = root->log_prob_b_prev;
+        // std::vector<PathTrie *> prefixes;
+        this->prefixes.push_back(root);
+    };
+
+    ~CtcBeamSearchDecoderStorage() {
+        if (root != nullptr) {
+            delete root;
+            root = nullptr;
+        }
+    };
+};
+
+/**
+ * The ctc beam search decoder, support batchsize >= 1
+ */
+class CtcBeamSearchDecoderBatch {
+  public:
+    CtcBeamSearchDecoderBatch(const std::vector<std::string> &vocabulary,
+                              size_t batch_size,
+                              size_t beam_size,
+                              size_t num_processes,
+                              double cutoff_prob,
+                              size_t cutoff_top_n,
+                              Scorer *ext_scorer,
+                              size_t blank_id);
+
+    ~CtcBeamSearchDecoderBatch();
+    void next(const std::vector<std::vector<std::vector<double>>> &probs_split,
+              const std::vector<std::string> &has_value);
+
+    std::vector<std::vector<std::pair<double, std::string>>> decode();
+
+    void reset_state(size_t batch_size,
+                     size_t beam_size,
+                     size_t num_processes,
+                     double cutoff_prob,
+                     size_t cutoff_top_n);
+
+  private:
+    std::vector<std::string> vocabulary;
+    size_t batch_size;
+    size_t beam_size;
+    size_t num_processes;
+    double cutoff_prob;
+    size_t cutoff_top_n;
+    Scorer *ext_scorer;
+    size_t blank_id;
+    std::vector<std::unique_ptr<CtcBeamSearchDecoderStorage>>
+        decoder_storage_vector;
+};
+
+/**
+ * function for chunk decoding
+ */
+void ctc_beam_search_decode_chunk(
+    PathTrie *root,
+    std::vector<PathTrie *> &prefixes,
+    const std::vector<std::vector<double>> &probs_seq,
+    const std::vector<std::string> &vocabulary,
+    size_t beam_size,
+    double cutoff_prob,
+    size_t cutoff_top_n,
+    Scorer *ext_scorer,
+    size_t blank_id);
+
+std::vector<std::pair<double, std::string>> get_decode_result(
+    std::vector<PathTrie *> &prefixes,
+    const std::vector<std::string> &vocabulary,
+    size_t beam_size,
+    Scorer *ext_scorer);
+
+/**
+ * free the CtcBeamSearchDecoderStorage
+ */
+void free_storage(std::unique_ptr<CtcBeamSearchDecoderStorage> &storage);
+
+/**
+ * initialize the root
+ */
+void ctc_beam_search_decode_chunk_begin(PathTrie *root, Scorer *ext_scorer);
+
+#endif  // CTC_BEAM_SEARCH_DECODER_H_
diff --git a/speechx/speechx/asr/decoder/ctc_decoders/ctc_greedy_decoder.cpp b/speechx/speechx/asr/decoder/ctc_decoders/ctc_greedy_decoder.cpp
new file mode 100644
index 00000000000..6aa3c99647d
--- /dev/null
+++ b/speechx/speechx/asr/decoder/ctc_decoders/ctc_greedy_decoder.cpp
@@ -0,0 +1,61 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ctc_greedy_decoder.h"
+#include "decoder_utils.h"
+
+std::string ctc_greedy_decoding(
+    const std::vector<std::vector<double>> &probs_seq,
+    const std::vector<std::string> &vocabulary,
+    size_t blank_id) {
+    // dimension check
+    size_t num_time_steps = probs_seq.size();
+    for (size_t i = 0; i < num_time_steps; ++i) {
+        VALID_CHECK_EQ(probs_seq[i].size(),
+                       vocabulary.size(),
+                       "The shape of probs_seq does not match with "
+                       "the shape of the vocabulary");
+    }
+
+    // size_t blank_id = vocabulary.size();
+
+    std::vector<size_t> max_idx_vec(num_time_steps, 0);
+    std::vector<size_t> idx_vec;
+    for (size_t i = 0; i < num_time_steps; ++i) {
+        double max_prob = 0.0;
+        size_t max_idx = 0;
+        const std::vector<double> &probs_step = probs_seq[i];
+        for (size_t j = 0; j < probs_step.size(); ++j) {
+            if (max_prob < probs_step[j]) {
+                max_idx = j;
+                max_prob = probs_step[j];
+            }
+        }
+        // id with maximum probability in current time step
+        max_idx_vec[i] = max_idx;
+        // deduplicate
+        if ((i == 0) || ((i > 0) && max_idx_vec[i] != max_idx_vec[i - 1])) {
+            idx_vec.push_back(max_idx_vec[i]);
+        }
+    }
+
+    std::string best_path_result;
+    for (size_t i = 0; i < idx_vec.size(); ++i) {
+        if (idx_vec[i] != blank_id) {
+            std::string ch = vocabulary[idx_vec[i]];
+            best_path_result += (ch == kSPACE) ? tSPACE : ch;
+        }
+    }
+    return best_path_result;
+}
diff --git a/speechx/speechx/asr/decoder/ctc_decoders/ctc_greedy_decoder.h b/speechx/speechx/asr/decoder/ctc_decoders/ctc_greedy_decoder.h
new file mode 100644
index 00000000000..4451600d629
--- /dev/null
+++ b/speechx/speechx/asr/decoder/ctc_decoders/ctc_greedy_decoder.h
@@ -0,0 +1,35 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef CTC_GREEDY_DECODER_H
+#define CTC_GREEDY_DECODER_H
+
+#include <string>
+#include <vector>
+
+/* CTC Greedy (Best Path) Decoder
+ *
+ * Parameters:
+ *     probs_seq: 2-D vector that each element is a vector of probabilities
+ *               over vocabulary of one time step.
+ *     vocabulary: A vector of vocabulary.
+ * Return:
+ *     The decoding result in string
+ */
+std::string ctc_greedy_decoding(
+    const std::vector<std::vector<double>>& probs_seq,
+    const std::vector<std::string>& vocabulary,
+    size_t blank_id);
+
+#endif  // CTC_GREEDY_DECODER_H
diff --git a/speechx/speechx/asr/decoder/ctc_decoders/decoder_utils.cpp b/speechx/speechx/asr/decoder/ctc_decoders/decoder_utils.cpp
new file mode 100644
index 00000000000..c7ef65428e1
--- /dev/null
+++ b/speechx/speechx/asr/decoder/ctc_decoders/decoder_utils.cpp
@@ -0,0 +1,193 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "decoder_utils.h"
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+
+std::vector<std::pair<size_t, float>> get_pruned_log_probs(
+    const std::vector<double> &prob_step,
+    double cutoff_prob,
+    size_t cutoff_top_n) {
+    std::vector<std::pair<int, double>> prob_idx;
+    for (size_t i = 0; i < prob_step.size(); ++i) {
+        prob_idx.push_back(std::pair<int, double>(i, prob_step[i]));
+    }
+    // pruning of vocabulary
+    size_t cutoff_len = prob_step.size();
+    if (cutoff_prob < 1.0 || cutoff_top_n < cutoff_len) {
+        std::sort(prob_idx.begin(),
+                  prob_idx.end(),
+                  pair_comp_second_rev<int, double>);
+        if (cutoff_prob < 1.0) {
+            double cum_prob = 0.0;
+            cutoff_len = 0;
+            for (size_t i = 0; i < prob_idx.size(); ++i) {
+                cum_prob += prob_idx[i].second;
+                cutoff_len += 1;
+                if (cum_prob >= cutoff_prob || cutoff_len >= cutoff_top_n)
+                    break;
+            }
+        }
+        prob_idx = std::vector<std::pair<int, double>>(
+            prob_idx.begin(), prob_idx.begin() + cutoff_len);
+    }
+    std::vector<std::pair<size_t, float>> log_prob_idx;
+    for (size_t i = 0; i < cutoff_len; ++i) {
+        log_prob_idx.push_back(std::pair<int, float>(
+            prob_idx[i].first, log(prob_idx[i].second + NUM_FLT_MIN)));
+    }
+    return log_prob_idx;
+}
+
+
+std::vector<std::pair<double, std::string>> get_beam_search_result(
+    const std::vector<PathTrie *> &prefixes,
+    const std::vector<std::string> &vocabulary,
+    size_t beam_size) {
+    // allow for the post processing
+    std::vector<PathTrie *> space_prefixes;
+    if (space_prefixes.empty()) {
+        for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
+            space_prefixes.push_back(prefixes[i]);
+        }
+    }
+
+    std::sort(space_prefixes.begin(), space_prefixes.end(), prefix_compare);
+    std::vector<std::pair<double, std::string>> output_vecs;
+    for (size_t i = 0; i < beam_size && i < space_prefixes.size(); ++i) {
+        std::vector<int> output;
+        space_prefixes[i]->get_path_vec(output);
+        // convert index to string
+        std::string output_str;
+        for (size_t j = 0; j < output.size(); j++) {
+            std::string ch = vocabulary[output[j]];
+            output_str += (ch == kSPACE) ? tSPACE : ch;
+        }
+        std::pair<double, std::string> output_pair(
+            -space_prefixes[i]->approx_ctc, output_str);
+        output_vecs.emplace_back(output_pair);
+    }
+
+    return output_vecs;
+}
+
+size_t get_utf8_str_len(const std::string &str) {
+    size_t str_len = 0;
+    for (char c : str) {
+        str_len += ((c & 0xc0) != 0x80);
+    }
+    return str_len;
+}
+
+std::vector<std::string> split_utf8_str(const std::string &str) {
+    std::vector<std::string> result;
+    std::string out_str;
+
+    for (char c : str) {
+        if ((c & 0xc0) != 0x80)  // new UTF-8 character
+        {
+            if (!out_str.empty()) {
+                result.push_back(out_str);
+                out_str.clear();
+            }
+        }
+
+        out_str.append(1, c);
+    }
+    result.push_back(out_str);
+    return result;
+}
+
+std::vector<std::string> split_str(const std::string &s,
+                                   const std::string &delim) {
+    std::vector<std::string> result;
+    std::size_t start = 0, delim_len = delim.size();
+    while (true) {
+        std::size_t end = s.find(delim, start);
+        if (end == std::string::npos) {
+            if (start < s.size()) {
+                result.push_back(s.substr(start));
+            }
+            break;
+        }
+        if (end > start) {
+            result.push_back(s.substr(start, end - start));
+        }
+        start = end + delim_len;
+    }
+    return result;
+}
+
+bool prefix_compare(const PathTrie *x, const PathTrie *y) {
+    if (x->score == y->score) {
+        if (x->character == y->character) {
+            return false;
+        } else {
+            return (x->character < y->character);
+        }
+    } else {
+        return x->score > y->score;
+    }
+}
+
+void add_word_to_fst(const std::vector<int> &word,
+                     fst::StdVectorFst *dictionary) {
+    if (dictionary->NumStates() == 0) {
+        fst::StdVectorFst::StateId start = dictionary->AddState();
+        assert(start == 0);
+        dictionary->SetStart(start);
+    }
+    fst::StdVectorFst::StateId src = dictionary->Start();
+    fst::StdVectorFst::StateId dst;
+    for (auto c : word) {
+        dst = dictionary->AddState();
+        dictionary->AddArc(src, fst::StdArc(c, c, 0, dst));
+        src = dst;
+    }
+    dictionary->SetFinal(dst, fst::StdArc::Weight::One());
+}
+
+bool add_word_to_dictionary(
+    const std::string &word,
+    const std::unordered_map<std::string, int> &char_map,
+    bool add_space,
+    int SPACE_ID,
+    fst::StdVectorFst *dictionary) {
+    auto characters = split_utf8_str(word);
+
+    std::vector<int> int_word;
+
+    for (auto &c : characters) {
+        if (c == " ") {
+            int_word.push_back(SPACE_ID);
+        } else {
+            auto int_c = char_map.find(c);
+            if (int_c != char_map.end()) {
+                int_word.push_back(int_c->second);
+            } else {
+                return false;  // return without adding
+            }
+        }
+    }
+
+    if (add_space) {
+        int_word.push_back(SPACE_ID);
+    }
+
+    add_word_to_fst(int_word, dictionary);
+    return true;  // return with successful adding
+}
diff --git a/speechx/speechx/asr/decoder/ctc_decoders/decoder_utils.h b/speechx/speechx/asr/decoder/ctc_decoders/decoder_utils.h
new file mode 100644
index 00000000000..0987415529a
--- /dev/null
+++ b/speechx/speechx/asr/decoder/ctc_decoders/decoder_utils.h
@@ -0,0 +1,111 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef DECODER_UTILS_H_
+#define DECODER_UTILS_H_
+
+#include <string>
+#include <utility>
+#include "fst/log.h"
+#include "path_trie.h"
+
+const std::string kSPACE = "<space>";
+const std::string tSPACE = " ";
+const float NUM_FLT_INF = std::numeric_limits<float>::max();
+const float NUM_FLT_MIN = std::numeric_limits<float>::min();
+
+// inline function for validation check
+inline void check(
+    bool x, const char *expr, const char *file, int line, const char *err) {
+    if (!x) {
+        std::cout << "[" << file << ":" << line << "] ";
+        LOG(FATAL) << "\"" << expr << "\" check failed. " << err;
+    }
+}
+
+#define VALID_CHECK(x, info) \
+    check(static_cast<bool>(x), #x, __FILE__, __LINE__, info)
+#define VALID_CHECK_EQ(x, y, info) VALID_CHECK((x) == (y), info)
+#define VALID_CHECK_GT(x, y, info) VALID_CHECK((x) > (y), info)
+#define VALID_CHECK_LT(x, y, info) VALID_CHECK((x) < (y), info)
+
+
+// Function template for comparing two pairs
+template <typename T1, typename T2>
+bool pair_comp_first_rev(const std::pair<T1, T2> &a,
+                         const std::pair<T1, T2> &b) {
+    return a.first > b.first;
+}
+
+// Function template for comparing two pairs
+template <typename T1, typename T2>
+bool pair_comp_second_rev(const std::pair<T1, T2> &a,
+                          const std::pair<T1, T2> &b) {
+    return a.second > b.second;
+}
+
+// Return the sum of two probabilities in log scale
+template <typename T>
+T log_sum_exp(const T &x, const T &y) {
+    static T num_min = -std::numeric_limits<T>::max();
+    if (x <= num_min) return y;
+    if (y <= num_min) return x;
+    T xmax = std::max(x, y);
+    return std::log(std::exp(x - xmax) + std::exp(y - xmax)) + xmax;
+}
+
+// Get pruned probability vector for each time step's beam search
+std::vector<std::pair<size_t, float>> get_pruned_log_probs(
+    const std::vector<double> &prob_step,
+    double cutoff_prob,
+    size_t cutoff_top_n);
+
+// Get beam search result from prefixes in trie tree
+std::vector<std::pair<double, std::string>> get_beam_search_result(
+    const std::vector<PathTrie *> &prefixes,
+    const std::vector<std::string> &vocabulary,
+    size_t beam_size);
+
+// Functor for prefix comparsion
+bool prefix_compare(const PathTrie *x, const PathTrie *y);
+
+/* Get length of utf8 encoding string
+ * See: http://stackoverflow.com/a/4063229
+ */
+size_t get_utf8_str_len(const std::string &str);
+
+/* Split a string into a list of strings on a given string
+ * delimiter. NB: delimiters on beginning / end of string are
+ * trimmed. Eg, "FooBarFoo" split on "Foo" returns ["Bar"].
+ */
+std::vector<std::string> split_str(const std::string &s,
+                                   const std::string &delim);
+
+/* Splits string into vector of strings representing
+ * UTF-8 characters (not same as chars)
+ */
+std::vector<std::string> split_utf8_str(const std::string &str);
+
+// Add a word in index to the dicionary of fst
+void add_word_to_fst(const std::vector<int> &word,
+                     fst::StdVectorFst *dictionary);
+
+// Add a word in string to dictionary
+bool add_word_to_dictionary(
+    const std::string &word,
+    const std::unordered_map<std::string, int> &char_map,
+    bool add_space,
+    int SPACE_ID,
+    fst::StdVectorFst *dictionary);
+#endif  // DECODER_UTILS_H
diff --git a/speechx/speechx/asr/decoder/ctc_decoders/decoders.i b/speechx/speechx/asr/decoder/ctc_decoders/decoders.i
new file mode 100644
index 00000000000..8fe3b279f59
--- /dev/null
+++ b/speechx/speechx/asr/decoder/ctc_decoders/decoders.i
@@ -0,0 +1,33 @@
+%module paddlespeech_ctcdecoders
+%{
+#include "scorer.h"
+#include "ctc_greedy_decoder.h"
+#include "ctc_beam_search_decoder.h"
+#include "decoder_utils.h"
+%}
+
+%include "std_vector.i"
+%include "std_pair.i"
+%include "std_string.i"
+%import "decoder_utils.h"
+
+namespace std {
+    %template(DoubleVector) std::vector<double>;
+    %template(IntVector) std::vector<int>;
+    %template(StringVector) std::vector<std::string>;
+    %template(VectorOfStructVector) std::vector<std::vector<double> >;
+    %template(FloatVector) std::vector<float>;
+    %template(Pair) std::pair<float, std::string>;
+    %template(PairFloatStringVector)  std::vector<std::pair<float, std::string> >;
+    %template(PairDoubleStringVector) std::vector<std::pair<double, std::string> >;
+    %template(PairDoubleStringVector2) std::vector<std::vector<std::pair<double, std::string> > >;
+    %template(DoubleVector3) std::vector<std::vector<std::vector<double> > >;
+}
+
+%template(IntDoublePairCompSecondRev) pair_comp_second_rev<int, double>;
+%template(StringDoublePairCompSecondRev) pair_comp_second_rev<std::string, double>;
+%template(DoubleStringPairCompFirstRev) pair_comp_first_rev<double, std::string>;
+
+%include "scorer.h"
+%include "ctc_greedy_decoder.h"
+%include "ctc_beam_search_decoder.h"
diff --git a/speechx/speechx/asr/decoder/ctc_decoders/path_trie.cpp b/speechx/speechx/asr/decoder/ctc_decoders/path_trie.cpp
new file mode 100644
index 00000000000..777ca05201d
--- /dev/null
+++ b/speechx/speechx/asr/decoder/ctc_decoders/path_trie.cpp
@@ -0,0 +1,164 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "path_trie.h"
+
+#include <algorithm>
+#include <limits>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "decoder_utils.h"
+
+PathTrie::PathTrie() {
+    log_prob_b_prev = -NUM_FLT_INF;
+    log_prob_nb_prev = -NUM_FLT_INF;
+    log_prob_b_cur = -NUM_FLT_INF;
+    log_prob_nb_cur = -NUM_FLT_INF;
+    score = -NUM_FLT_INF;
+
+    ROOT_ = -1;
+    character = ROOT_;
+    exists_ = true;
+    parent = nullptr;
+
+    dictionary_ = nullptr;
+    dictionary_state_ = 0;
+    has_dictionary_ = false;
+
+    matcher_ = nullptr;
+}
+
+PathTrie::~PathTrie() {
+    for (auto child : children_) {
+        delete child.second;
+        child.second = nullptr;
+    }
+}
+
+PathTrie* PathTrie::get_path_trie(int new_char, bool reset) {
+    auto child = children_.begin();
+    for (child = children_.begin(); child != children_.end(); ++child) {
+        if (child->first == new_char) {
+            break;
+        }
+    }
+    if (child != children_.end()) {
+        if (!child->second->exists_) {
+            child->second->exists_ = true;
+            child->second->log_prob_b_prev = -NUM_FLT_INF;
+            child->second->log_prob_nb_prev = -NUM_FLT_INF;
+            child->second->log_prob_b_cur = -NUM_FLT_INF;
+            child->second->log_prob_nb_cur = -NUM_FLT_INF;
+        }
+        return (child->second);
+    } else {
+        if (has_dictionary_) {
+            matcher_->SetState(dictionary_state_);
+            bool found = matcher_->Find(new_char + 1);
+            if (!found) {
+                // Adding this character causes word outside dictionary
+                auto FSTZERO = fst::TropicalWeight::Zero();
+                auto final_weight = dictionary_->Final(dictionary_state_);
+                bool is_final = (final_weight != FSTZERO);
+                if (is_final && reset) {
+                    dictionary_state_ = dictionary_->Start();
+                }
+                return nullptr;
+            } else {
+                PathTrie* new_path = new PathTrie;
+                new_path->character = new_char;
+                new_path->parent = this;
+                new_path->dictionary_ = dictionary_;
+                new_path->dictionary_state_ = matcher_->Value().nextstate;
+                new_path->has_dictionary_ = true;
+                new_path->matcher_ = matcher_;
+                children_.push_back(std::make_pair(new_char, new_path));
+                return new_path;
+            }
+        } else {
+            PathTrie* new_path = new PathTrie;
+            new_path->character = new_char;
+            new_path->parent = this;
+            children_.push_back(std::make_pair(new_char, new_path));
+            return new_path;
+        }
+    }
+}
+
+PathTrie* PathTrie::get_path_vec(std::vector<int>& output) {
+    return get_path_vec(output, ROOT_);
+}
+
+PathTrie* PathTrie::get_path_vec(std::vector<int>& output,
+                                 int stop,
+                                 size_t max_steps) {
+    if (character == stop || character == ROOT_ || output.size() == max_steps) {
+        std::reverse(output.begin(), output.end());
+        return this;
+    } else {
+        output.push_back(character);
+        return parent->get_path_vec(output, stop, max_steps);
+    }
+}
+
+void PathTrie::iterate_to_vec(std::vector<PathTrie*>& output) {
+    if (exists_) {
+        log_prob_b_prev = log_prob_b_cur;
+        log_prob_nb_prev = log_prob_nb_cur;
+
+        log_prob_b_cur = -NUM_FLT_INF;
+        log_prob_nb_cur = -NUM_FLT_INF;
+
+        score = log_sum_exp(log_prob_b_prev, log_prob_nb_prev);
+        output.push_back(this);
+    }
+    for (auto child : children_) {
+        child.second->iterate_to_vec(output);
+    }
+}
+
+void PathTrie::remove() {
+    exists_ = false;
+    if (children_.size() == 0) {
+        if (parent != nullptr) {
+            auto child = parent->children_.begin();
+            for (child = parent->children_.begin();
+                 child != parent->children_.end();
+                 ++child) {
+                if (child->first == character) {
+                    parent->children_.erase(child);
+                    break;
+                }
+            }
+            if (parent->children_.size() == 0 && !parent->exists_) {
+                parent->remove();
+            }
+        }
+        delete this;
+    }
+}
+
+
+void PathTrie::set_dictionary(fst::StdVectorFst* dictionary) {
+    dictionary_ = dictionary;
+    dictionary_state_ = dictionary->Start();
+    has_dictionary_ = true;
+}
+
+using FSTMATCH = fst::SortedMatcher<fst::StdVectorFst>;
+void PathTrie::set_matcher(std::shared_ptr<FSTMATCH> matcher) {
+    matcher_ = matcher;
+}
diff --git a/speechx/speechx/asr/decoder/ctc_decoders/path_trie.h b/speechx/speechx/asr/decoder/ctc_decoders/path_trie.h
new file mode 100644
index 00000000000..5193e0a47e6
--- /dev/null
+++ b/speechx/speechx/asr/decoder/ctc_decoders/path_trie.h
@@ -0,0 +1,82 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "COPYING.APACHE2.0");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef PATH_TRIE_H
+#define PATH_TRIE_H
+
+#include <algorithm>
+#include <limits>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "fst/fstlib.h"
+
+/* Trie tree for prefix storing and manipulating, with a dictionary in
+ * finite-state transducer for spelling correction.
+ */
+class PathTrie {
+  public:
+    PathTrie();
+    ~PathTrie();
+
+    // get new prefix after appending new char
+    PathTrie* get_path_trie(int new_char, bool reset = true);
+
+    // get the prefix in index from root to current node
+    PathTrie* get_path_vec(std::vector<int>& output);
+
+    // get the prefix in index from some stop node to current nodel
+    PathTrie* get_path_vec(
+        std::vector<int>& output,
+        int stop,
+        size_t max_steps = std::numeric_limits<size_t>::max());
+
+    // update log probs
+    void iterate_to_vec(std::vector<PathTrie*>& output);
+
+    // set dictionary for FST
+    void set_dictionary(fst::StdVectorFst* dictionary);
+
+    void set_matcher(std::shared_ptr<fst::SortedMatcher<fst::StdVectorFst>>);
+
+    bool is_empty() { return ROOT_ == character; }
+
+    // remove current path from root
+    void remove();
+
+    float log_prob_b_prev;
+    float log_prob_nb_prev;
+    float log_prob_b_cur;
+    float log_prob_nb_cur;
+    float score;
+    float approx_ctc;
+    int character;
+    PathTrie* parent;
+
+  private:
+    int ROOT_;
+    bool exists_;
+    bool has_dictionary_;
+
+    std::vector<std::pair<int, PathTrie*>> children_;
+
+    // pointer to dictionary of FST
+    fst::StdVectorFst* dictionary_;
+    fst::StdVectorFst::StateId dictionary_state_;
+    // true if finding ars in FST
+    std::shared_ptr<fst::SortedMatcher<fst::StdVectorFst>> matcher_;
+};
+
+#endif  // PATH_TRIE_H
diff --git a/speechx/speechx/asr/decoder/ctc_decoders/scorer.cpp b/speechx/speechx/asr/decoder/ctc_decoders/scorer.cpp
new file mode 100644
index 00000000000..6e7f68cf6ba
--- /dev/null
+++ b/speechx/speechx/asr/decoder/ctc_decoders/scorer.cpp
@@ -0,0 +1,232 @@
+// Licensed under GNU Lesser General Public License v3 (LGPLv3) (LGPL-3) (the
+// "COPYING.LESSER.3");
+
+#include "scorer.h"
+
+#include <unistd.h>
+#include <iostream>
+
+#include "lm/config.hh"
+#include "lm/model.hh"
+#include "lm/state.hh"
+
+#include "decoder_utils.h"
+
+using namespace lm::ngram;
+// if your platform is windows ,you need add the define
+#define    F_OK    0
+Scorer::Scorer(double alpha,
+               double beta,
+               const std::string& lm_path,
+               const std::vector<std::string>& vocab_list) {
+    this->alpha = alpha;
+    this->beta = beta;
+
+    dictionary = nullptr;
+    is_character_based_ = true;
+    language_model_ = nullptr;
+
+    max_order_ = 0;
+    dict_size_ = 0;
+    SPACE_ID_ = -1;
+
+    setup(lm_path, vocab_list);
+}
+
+Scorer::~Scorer() {
+    if (language_model_ != nullptr) {
+        delete static_cast<lm::base::Model*>(language_model_);
+    }
+    if (dictionary != nullptr) {
+        delete static_cast<fst::StdVectorFst*>(dictionary);
+    }
+}
+
+void Scorer::setup(const std::string& lm_path,
+                   const std::vector<std::string>& vocab_list) {
+    // load language model
+    load_lm(lm_path);
+    // set char map for scorer
+    set_char_map(vocab_list);
+    // fill the dictionary for FST
+    if (!is_character_based()) {
+        fill_dictionary(true);
+    }
+}
+
+void Scorer::load_lm(const std::string& lm_path) {
+    const char* filename = lm_path.c_str();
+    VALID_CHECK_EQ(access(filename, F_OK), 0, "Invalid language model path");
+
+    RetriveStrEnumerateVocab enumerate;
+    lm::ngram::Config config;
+    config.enumerate_vocab = &enumerate;
+    language_model_ = lm::ngram::LoadVirtual(filename, config);
+    max_order_ = static_cast<lm::base::Model*>(language_model_)->Order();
+    vocabulary_ = enumerate.vocabulary;
+    for (size_t i = 0; i < vocabulary_.size(); ++i) {
+        if (is_character_based_ && vocabulary_[i] != UNK_TOKEN &&
+            vocabulary_[i] != START_TOKEN && vocabulary_[i] != END_TOKEN &&
+            get_utf8_str_len(enumerate.vocabulary[i]) > 1) {
+            is_character_based_ = false;
+        }
+    }
+}
+
+double Scorer::get_log_cond_prob(const std::vector<std::string>& words) {
+    lm::base::Model* model = static_cast<lm::base::Model*>(language_model_);
+    double cond_prob;
+    lm::ngram::State state, tmp_state, out_state;
+    // avoid to inserting <s> in begin
+    model->NullContextWrite(&state);
+    for (size_t i = 0; i < words.size(); ++i) {
+        lm::WordIndex word_index = model->BaseVocabulary().Index(words[i]);
+        // encounter OOV
+        if (word_index == 0) {
+            return OOV_SCORE;
+        }
+        cond_prob = model->BaseScore(&state, word_index, &out_state);
+        tmp_state = state;
+        state = out_state;
+        out_state = tmp_state;
+    }
+    // return  log10 prob
+    return cond_prob;
+}
+
+double Scorer::get_sent_log_prob(const std::vector<std::string>& words) {
+    std::vector<std::string> sentence;
+    if (words.size() == 0) {
+        for (size_t i = 0; i < max_order_; ++i) {
+            sentence.push_back(START_TOKEN);
+        }
+    } else {
+        for (size_t i = 0; i < max_order_ - 1; ++i) {
+            sentence.push_back(START_TOKEN);
+        }
+        sentence.insert(sentence.end(), words.begin(), words.end());
+    }
+    sentence.push_back(END_TOKEN);
+    return get_log_prob(sentence);
+}
+
+double Scorer::get_log_prob(const std::vector<std::string>& words) {
+    assert(words.size() > max_order_);
+    double score = 0.0;
+    for (size_t i = 0; i < words.size() - max_order_ + 1; ++i) {
+        std::vector<std::string> ngram(words.begin() + i,
+                                       words.begin() + i + max_order_);
+        score += get_log_cond_prob(ngram);
+    }
+    return score;
+}
+
+void Scorer::reset_params(float alpha, float beta) {
+    this->alpha = alpha;
+    this->beta = beta;
+}
+
+std::string Scorer::vec2str(const std::vector<int>& input) {
+    std::string word;
+    for (auto ind : input) {
+        word += char_list_[ind];
+    }
+    return word;
+}
+
+std::vector<std::string> Scorer::split_labels(const std::vector<int>& labels) {
+    if (labels.empty()) return {};
+
+    std::string s = vec2str(labels);
+    std::vector<std::string> words;
+    if (is_character_based_) {
+        words = split_utf8_str(s);
+    } else {
+        words = split_str(s, " ");
+    }
+    return words;
+}
+
+void Scorer::set_char_map(const std::vector<std::string>& char_list) {
+    char_list_ = char_list;
+    char_map_.clear();
+
+    // Set the char map for the FST for spelling correction
+    for (size_t i = 0; i < char_list_.size(); i++) {
+        if (char_list_[i] == kSPACE) {
+            SPACE_ID_ = i;
+        }
+        // The initial state of FST is state 0, hence the index of chars in
+        // the FST should start from 1 to avoid the conflict with the initial
+        // state, otherwise wrong decoding results would be given.
+        char_map_[char_list_[i]] = i + 1;
+    }
+}
+
+std::vector<std::string> Scorer::make_ngram(PathTrie* prefix) {
+    std::vector<std::string> ngram;
+    PathTrie* current_node = prefix;
+    PathTrie* new_node = nullptr;
+
+    for (int order = 0; order < max_order_; order++) {
+        std::vector<int> prefix_vec;
+
+        if (is_character_based_) {
+            new_node = current_node->get_path_vec(prefix_vec, SPACE_ID_, 1);
+            current_node = new_node;
+        } else {
+            new_node = current_node->get_path_vec(prefix_vec, SPACE_ID_);
+            current_node = new_node->parent;  // Skipping spaces
+        }
+
+        // reconstruct word
+        std::string word = vec2str(prefix_vec);
+        ngram.push_back(word);
+
+        if (new_node->character == -1) {
+            // No more spaces, but still need order
+            for (int i = 0; i < max_order_ - order - 1; i++) {
+                ngram.push_back(START_TOKEN);
+            }
+            break;
+        }
+    }
+    std::reverse(ngram.begin(), ngram.end());
+    return ngram;
+}
+
+void Scorer::fill_dictionary(bool add_space) {
+    fst::StdVectorFst dictionary;
+    // For each unigram convert to ints and put in trie
+    int dict_size = 0;
+    for (const auto& word : vocabulary_) {
+        bool added = add_word_to_dictionary(
+            word, char_map_, add_space, SPACE_ID_ + 1, &dictionary);
+        dict_size += added ? 1 : 0;
+    }
+
+    dict_size_ = dict_size;
+
+    /* Simplify FST
+
+     * This gets rid of "epsilon" transitions in the FST.
+     * These are transitions that don't require a string input to be taken.
+     * Getting rid of them is necessary to make the FST deterministic, but
+     * can greatly increase the size of the FST
+     */
+    fst::RmEpsilon(&dictionary);
+    fst::StdVectorFst* new_dict = new fst::StdVectorFst;
+
+    /* This makes the FST deterministic, meaning for any string input there's
+     * only one possible state the FST could be in.  It is assumed our
+     * dictionary is deterministic when using it.
+     * (lest we'd have to check for multiple transitions at each state)
+     */
+    fst::Determinize(dictionary, new_dict);
+
+    /* Finds the simplest equivalent fst. This is unnecessary but decreases
+     * memory usage of the dictionary
+     */
+    fst::Minimize(new_dict);
+    this->dictionary = new_dict;
+}
diff --git a/speechx/speechx/asr/decoder/ctc_decoders/scorer.h b/speechx/speechx/asr/decoder/ctc_decoders/scorer.h
new file mode 100644
index 00000000000..08e109b78e3
--- /dev/null
+++ b/speechx/speechx/asr/decoder/ctc_decoders/scorer.h
@@ -0,0 +1,114 @@
+// Licensed under GNU Lesser General Public License v3 (LGPLv3) (LGPL-3) (the
+// "COPYING.LESSER.3");
+
+#ifndef SCORER_H_
+#define SCORER_H_
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "lm/enumerate_vocab.hh"
+#include "lm/virtual_interface.hh"
+#include "lm/word_index.hh"
+
+#include "path_trie.h"
+
+const double OOV_SCORE = -1000.0;
+const std::string START_TOKEN = "<s>";
+const std::string UNK_TOKEN = "<unk>";
+const std::string END_TOKEN = "</s>";
+
+// Implement a callback to retrive the dictionary of language model.
+class RetriveStrEnumerateVocab : public lm::EnumerateVocab {
+  public:
+    RetriveStrEnumerateVocab() {}
+
+    void Add(lm::WordIndex index, const StringPiece &str) {
+        vocabulary.push_back(std::string(str.data(), str.length()));
+    }
+
+    std::vector<std::string> vocabulary;
+};
+
+/* External scorer to query score for n-gram or sentence, including language
+ * model scoring and word insertion.
+ *
+ * Example:
+ *     Scorer scorer(alpha, beta, "path_of_language_model");
+ *     scorer.get_log_cond_prob({ "WORD1", "WORD2", "WORD3" });
+ *     scorer.get_sent_log_prob({ "WORD1", "WORD2", "WORD3" });
+ */
+class Scorer {
+  public:
+    Scorer(double alpha,
+           double beta,
+           const std::string &lm_path,
+           const std::vector<std::string> &vocabulary);
+    ~Scorer();
+
+    double get_log_cond_prob(const std::vector<std::string> &words);
+
+    double get_sent_log_prob(const std::vector<std::string> &words);
+
+    // return the max order
+    size_t get_max_order() const { return max_order_; }
+
+    // return the dictionary size of language model
+    size_t get_dict_size() const { return dict_size_; }
+
+    // retrun true if the language model is character based
+    bool is_character_based() const { return is_character_based_; }
+
+    // reset params alpha & beta
+    void reset_params(float alpha, float beta);
+
+    // make ngram for a given prefix
+    std::vector<std::string> make_ngram(PathTrie *prefix);
+
+    // trransform the labels in index to the vector of words (word based lm) or
+    // the vector of characters (character based lm)
+    std::vector<std::string> split_labels(const std::vector<int> &labels);
+
+    // language model weight
+    double alpha;
+    // word insertion weight
+    double beta;
+
+    // pointer to the dictionary of FST
+    void *dictionary;
+
+  protected:
+    // necessary setup: load language model, set char map, fill FST's dictionary
+    void setup(const std::string &lm_path,
+               const std::vector<std::string> &vocab_list);
+
+    // load language model from given path
+    void load_lm(const std::string &lm_path);
+
+    // fill dictionary for FST
+    void fill_dictionary(bool add_space);
+
+    // set char map
+    void set_char_map(const std::vector<std::string> &char_list);
+
+    double get_log_prob(const std::vector<std::string> &words);
+
+    // translate the vector in index to string
+    std::string vec2str(const std::vector<int> &input);
+
+  private:
+    void *language_model_;
+    bool is_character_based_;
+    size_t max_order_;
+    size_t dict_size_;
+
+    int SPACE_ID_;
+    std::vector<std::string> char_list_;
+    std::unordered_map<std::string, int> char_map_;
+
+    std::vector<std::string> vocabulary_;
+};
+
+#endif  // SCORER_H_
diff --git a/speechx/speechx/asr/decoder/ctc_decoders/setup.py b/speechx/speechx/asr/decoder/ctc_decoders/setup.py
new file mode 100644
index 00000000000..9a8b292a07b
--- /dev/null
+++ b/speechx/speechx/asr/decoder/ctc_decoders/setup.py
@@ -0,0 +1,138 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Script to build and install decoder package."""
+import argparse
+import glob
+import multiprocessing.pool
+import os
+import platform
+import sys
+
+from setuptools import distutils
+from setuptools import Extension
+from setuptools import setup
+
+parser = argparse.ArgumentParser(description=__doc__)
+parser.add_argument(
+    "--num_processes",
+    default=1,
+    type=int,
+    help="Number of cpu processes to build package. (default: %(default)d)")
+args = parser.parse_known_args()
+
+# reconstruct sys.argv to pass to setup below
+sys.argv = [sys.argv[0]] + args[1]
+
+
+# monkey-patch for parallel compilation
+# See: https://stackoverflow.com/a/13176803
+def parallelCCompile(self,
+                     sources,
+                     output_dir=None,
+                     macros=None,
+                     include_dirs=None,
+                     debug=0,
+                     extra_preargs=None,
+                     extra_postargs=None,
+                     depends=None):
+    # those lines are copied from distutils.ccompiler.CCompiler directly
+    macros, objects, extra_postargs, pp_opts, build = self._setup_compile(
+        output_dir, macros, include_dirs, sources, depends, extra_postargs)
+    cc_args = self._get_cc_args(pp_opts, debug, extra_preargs)
+
+    # parallel code
+    def _single_compile(obj):
+        try:
+            src, ext = build[obj]
+        except KeyError:
+            return
+        self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)
+
+    # convert to list, imap is evaluated on-demand
+    thread_pool = multiprocessing.pool.ThreadPool(args[0].num_processes)
+    list(thread_pool.imap(_single_compile, objects))
+    return objects
+
+
+def compile_test(header, library):
+    dummy_path = os.path.join(os.path.dirname(__file__), "dummy")
+    command = "bash -c \"g++ -include " + header \
+        + " -l" + library + " -x c++ - <<<'int main() {}' -o " \
+        + dummy_path + " >/dev/null 2>/dev/null && rm " \
+        + dummy_path + " 2>/dev/null\""
+    return os.system(command) == 0
+
+
+# hack compile to support parallel compiling
+distutils.ccompiler.CCompiler.compile = parallelCCompile
+
+FILES = glob.glob('kenlm/util/*.cc') \
+    + glob.glob('kenlm/lm/*.cc') \
+    + glob.glob('kenlm/util/double-conversion/*.cc')
+
+FILES += glob.glob('openfst-1.6.3/src/lib/*.cc')
+
+# yapf: disable
+FILES = [
+    fn for fn in FILES if not (fn.endswith('main.cc') or fn.endswith('test.cc')
+                               or fn.endswith('unittest.cc'))
+]
+# yapf: enable
+LIBS = ['stdc++']
+if platform.system() != 'Darwin':
+    LIBS.append('rt')
+if platform.system() == 'Windows':
+    LIBS = ['-static-libstdc++']
+
+ARGS = ['-O3', '-DNDEBUG', '-DKENLM_MAX_ORDER=6', '-std=c++11']
+
+if compile_test('zlib.h', 'z'):
+    ARGS.append('-DHAVE_ZLIB')
+    LIBS.append('z')
+
+if compile_test('bzlib.h', 'bz2'):
+    ARGS.append('-DHAVE_BZLIB')
+    LIBS.append('bz2')
+
+if compile_test('lzma.h', 'lzma'):
+    ARGS.append('-DHAVE_XZLIB')
+    LIBS.append('lzma')
+
+os.system('swig -python -c++ ./decoders.i')
+
+decoders_module = [
+    Extension(
+        name='_paddlespeech_ctcdecoders',
+        sources=FILES + glob.glob('*.cxx') + glob.glob('*.cpp'),
+        language='c++',
+        include_dirs=[
+            '.',
+            'kenlm',
+            'openfst-1.6.3/src/include',
+            'ThreadPool',
+        ],
+        libraries=LIBS,
+        extra_compile_args=ARGS)
+]
+
+setup(
+    name='paddlespeech_ctcdecoders',
+    version='0.2.0',
+    description="CTC decoders in paddlespeech",
+    author="PaddlePaddle Speech and Language Team",
+    author_email="paddlesl@baidu.com",
+    url="https://github.com/PaddlePaddle/PaddleSpeech",
+    license='Apache 2.0, GNU Lesser General Public License v3 (LGPLv3) (LGPL-3)',
+    ext_modules=decoders_module,
+    py_modules=['paddlespeech_ctcdecoders'])
diff --git a/speechx/speechx/asr/decoder/ctc_decoders/setup.sh b/speechx/speechx/asr/decoder/ctc_decoders/setup.sh
new file mode 100755
index 00000000000..302c5550250
--- /dev/null
+++ b/speechx/speechx/asr/decoder/ctc_decoders/setup.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+if [ ! -d kenlm ]; then
+    git clone https://github.com/kpu/kenlm.git
+    cd kenlm/
+    git checkout df2d717e95183f79a90b2fa6e4307083a351ca6a
+    cd ..
+    echo -e "\n"
+fi
+
+if [ ! -d openfst-1.6.3 ]; then
+    echo "Download and extract openfst ..."
+    wget http://www.openfst.org/twiki/pub/FST/FstDownload/openfst-1.6.3.tar.gz --no-check-certificate
+    tar -xzvf openfst-1.6.3.tar.gz
+    echo -e "\n"
+fi
+
+if [ ! -d ThreadPool ]; then
+    git clone https://github.com/progschj/ThreadPool.git
+    echo -e "\n"
+fi
+
+echo "Install decoders ..."
+python3 setup.py install --num_processes 4
diff --git a/speechx/speechx/decoder/ctc_prefix_beam_search_decoder.cc b/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder.cc
similarity index 99%
rename from speechx/speechx/decoder/ctc_prefix_beam_search_decoder.cc
rename to speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder.cc
index 07e8e5608b9..15dbd7e91d3 100644
--- a/speechx/speechx/decoder/ctc_prefix_beam_search_decoder.cc
+++ b/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder.cc
@@ -84,7 +84,7 @@ void CTCPrefixBeamSearch::AdvanceDecode(
 
         timer.Reset();
         std::vector<std::vector<kaldi::BaseFloat>> likelihood;
-        likelihood.push_back(frame_prob);
+        likelihood.push_back(std::move(frame_prob));
         AdvanceDecoding(likelihood);
         search_cost += timer.Elapsed();
 
diff --git a/speechx/speechx/decoder/ctc_prefix_beam_search_decoder.h b/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder.h
similarity index 100%
rename from speechx/speechx/decoder/ctc_prefix_beam_search_decoder.h
rename to speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder.h
diff --git a/speechx/speechx/decoder/ctc_prefix_beam_search_decoder_main.cc b/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder_main.cc
similarity index 100%
rename from speechx/speechx/decoder/ctc_prefix_beam_search_decoder_main.cc
rename to speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder_main.cc
diff --git a/speechx/speechx/decoder/ctc_prefix_beam_search_score.h b/speechx/speechx/asr/decoder/ctc_prefix_beam_search_score.h
similarity index 100%
rename from speechx/speechx/decoder/ctc_prefix_beam_search_score.h
rename to speechx/speechx/asr/decoder/ctc_prefix_beam_search_score.h
diff --git a/speechx/speechx/decoder/ctc_tlg_decoder.cc b/speechx/speechx/asr/decoder/ctc_tlg_decoder.cc
similarity index 100%
rename from speechx/speechx/decoder/ctc_tlg_decoder.cc
rename to speechx/speechx/asr/decoder/ctc_tlg_decoder.cc
diff --git a/speechx/speechx/decoder/ctc_tlg_decoder.h b/speechx/speechx/asr/decoder/ctc_tlg_decoder.h
similarity index 100%
rename from speechx/speechx/decoder/ctc_tlg_decoder.h
rename to speechx/speechx/asr/decoder/ctc_tlg_decoder.h
diff --git a/speechx/speechx/decoder/ctc_tlg_decoder_main.cc b/speechx/speechx/asr/decoder/ctc_tlg_decoder_main.cc
similarity index 100%
rename from speechx/speechx/decoder/ctc_tlg_decoder_main.cc
rename to speechx/speechx/asr/decoder/ctc_tlg_decoder_main.cc
diff --git a/speechx/speechx/decoder/decoder_itf.h b/speechx/speechx/asr/decoder/decoder_itf.h
similarity index 100%
rename from speechx/speechx/decoder/decoder_itf.h
rename to speechx/speechx/asr/decoder/decoder_itf.h
diff --git a/speechx/speechx/decoder/nnet_logprob_decoder_main.cc b/speechx/speechx/asr/decoder/nnet_logprob_decoder_main.cc
similarity index 100%
rename from speechx/speechx/decoder/nnet_logprob_decoder_main.cc
rename to speechx/speechx/asr/decoder/nnet_logprob_decoder_main.cc
diff --git a/speechx/speechx/decoder/param.h b/speechx/speechx/asr/decoder/param.h
similarity index 100%
rename from speechx/speechx/decoder/param.h
rename to speechx/speechx/asr/decoder/param.h
diff --git a/speechx/speechx/nnet/CMakeLists.txt b/speechx/speechx/asr/nnet/CMakeLists.txt
similarity index 100%
rename from speechx/speechx/nnet/CMakeLists.txt
rename to speechx/speechx/asr/nnet/CMakeLists.txt
diff --git a/speechx/speechx/nnet/decodable.cc b/speechx/speechx/asr/nnet/decodable.cc
similarity index 100%
rename from speechx/speechx/nnet/decodable.cc
rename to speechx/speechx/asr/nnet/decodable.cc
diff --git a/speechx/speechx/nnet/decodable.h b/speechx/speechx/asr/nnet/decodable.h
similarity index 100%
rename from speechx/speechx/nnet/decodable.h
rename to speechx/speechx/asr/nnet/decodable.h
diff --git a/speechx/speechx/nnet/ds2_nnet.cc b/speechx/speechx/asr/nnet/ds2_nnet.cc
similarity index 100%
rename from speechx/speechx/nnet/ds2_nnet.cc
rename to speechx/speechx/asr/nnet/ds2_nnet.cc
diff --git a/speechx/speechx/nnet/ds2_nnet.h b/speechx/speechx/asr/nnet/ds2_nnet.h
similarity index 100%
rename from speechx/speechx/nnet/ds2_nnet.h
rename to speechx/speechx/asr/nnet/ds2_nnet.h
diff --git a/speechx/speechx/nnet/ds2_nnet_main.cc b/speechx/speechx/asr/nnet/ds2_nnet_main.cc
similarity index 100%
rename from speechx/speechx/nnet/ds2_nnet_main.cc
rename to speechx/speechx/asr/nnet/ds2_nnet_main.cc
diff --git a/speechx/speechx/nnet/nnet_itf.h b/speechx/speechx/asr/nnet/nnet_itf.h
similarity index 100%
rename from speechx/speechx/nnet/nnet_itf.h
rename to speechx/speechx/asr/nnet/nnet_itf.h
diff --git a/speechx/speechx/nnet/u2_nnet.cc b/speechx/speechx/asr/nnet/u2_nnet.cc
similarity index 100%
rename from speechx/speechx/nnet/u2_nnet.cc
rename to speechx/speechx/asr/nnet/u2_nnet.cc
diff --git a/speechx/speechx/nnet/u2_nnet.h b/speechx/speechx/asr/nnet/u2_nnet.h
similarity index 100%
rename from speechx/speechx/nnet/u2_nnet.h
rename to speechx/speechx/asr/nnet/u2_nnet.h
diff --git a/speechx/speechx/nnet/u2_nnet_main.cc b/speechx/speechx/asr/nnet/u2_nnet_main.cc
similarity index 100%
rename from speechx/speechx/nnet/u2_nnet_main.cc
rename to speechx/speechx/asr/nnet/u2_nnet_main.cc
diff --git a/speechx/speechx/recognizer/CMakeLists.txt b/speechx/speechx/asr/recognizer/CMakeLists.txt
similarity index 100%
rename from speechx/speechx/recognizer/CMakeLists.txt
rename to speechx/speechx/asr/recognizer/CMakeLists.txt
diff --git a/speechx/speechx/recognizer/recognizer.cc b/speechx/speechx/asr/recognizer/recognizer.cc
similarity index 100%
rename from speechx/speechx/recognizer/recognizer.cc
rename to speechx/speechx/asr/recognizer/recognizer.cc
diff --git a/speechx/speechx/recognizer/recognizer.h b/speechx/speechx/asr/recognizer/recognizer.h
similarity index 100%
rename from speechx/speechx/recognizer/recognizer.h
rename to speechx/speechx/asr/recognizer/recognizer.h
diff --git a/speechx/speechx/recognizer/recognizer_main.cc b/speechx/speechx/asr/recognizer/recognizer_main.cc
similarity index 100%
rename from speechx/speechx/recognizer/recognizer_main.cc
rename to speechx/speechx/asr/recognizer/recognizer_main.cc
diff --git a/speechx/speechx/recognizer/u2_recognizer.cc b/speechx/speechx/asr/recognizer/u2_recognizer.cc
similarity index 100%
rename from speechx/speechx/recognizer/u2_recognizer.cc
rename to speechx/speechx/asr/recognizer/u2_recognizer.cc
diff --git a/speechx/speechx/recognizer/u2_recognizer.h b/speechx/speechx/asr/recognizer/u2_recognizer.h
similarity index 100%
rename from speechx/speechx/recognizer/u2_recognizer.h
rename to speechx/speechx/asr/recognizer/u2_recognizer.h
diff --git a/speechx/speechx/recognizer/u2_recognizer_main.cc b/speechx/speechx/asr/recognizer/u2_recognizer_main.cc
similarity index 100%
rename from speechx/speechx/recognizer/u2_recognizer_main.cc
rename to speechx/speechx/asr/recognizer/u2_recognizer_main.cc
diff --git a/speechx/speechx/protocol/CMakeLists.txt b/speechx/speechx/asr/server/CMakeLists.txt
similarity index 100%
rename from speechx/speechx/protocol/CMakeLists.txt
rename to speechx/speechx/asr/server/CMakeLists.txt
diff --git a/speechx/speechx/protocol/websocket/CMakeLists.txt b/speechx/speechx/asr/server/websocket/CMakeLists.txt
similarity index 100%
rename from speechx/speechx/protocol/websocket/CMakeLists.txt
rename to speechx/speechx/asr/server/websocket/CMakeLists.txt
diff --git a/speechx/speechx/protocol/websocket/websocket_client.cc b/speechx/speechx/asr/server/websocket/websocket_client.cc
similarity index 100%
rename from speechx/speechx/protocol/websocket/websocket_client.cc
rename to speechx/speechx/asr/server/websocket/websocket_client.cc
diff --git a/speechx/speechx/protocol/websocket/websocket_client.h b/speechx/speechx/asr/server/websocket/websocket_client.h
similarity index 100%
rename from speechx/speechx/protocol/websocket/websocket_client.h
rename to speechx/speechx/asr/server/websocket/websocket_client.h
diff --git a/speechx/speechx/protocol/websocket/websocket_client_main.cc b/speechx/speechx/asr/server/websocket/websocket_client_main.cc
similarity index 100%
rename from speechx/speechx/protocol/websocket/websocket_client_main.cc
rename to speechx/speechx/asr/server/websocket/websocket_client_main.cc
diff --git a/speechx/speechx/protocol/websocket/websocket_server.cc b/speechx/speechx/asr/server/websocket/websocket_server.cc
similarity index 100%
rename from speechx/speechx/protocol/websocket/websocket_server.cc
rename to speechx/speechx/asr/server/websocket/websocket_server.cc
diff --git a/speechx/speechx/protocol/websocket/websocket_server.h b/speechx/speechx/asr/server/websocket/websocket_server.h
similarity index 100%
rename from speechx/speechx/protocol/websocket/websocket_server.h
rename to speechx/speechx/asr/server/websocket/websocket_server.h
diff --git a/speechx/speechx/protocol/websocket/websocket_server_main.cc b/speechx/speechx/asr/server/websocket/websocket_server_main.cc
similarity index 100%
rename from speechx/speechx/protocol/websocket/websocket_server_main.cc
rename to speechx/speechx/asr/server/websocket/websocket_server_main.cc
diff --git a/speechx/speechx/common/CMakeLists.txt b/speechx/speechx/common/CMakeLists.txt
new file mode 100644
index 00000000000..dea9eb05df9
--- /dev/null
+++ b/speechx/speechx/common/CMakeLists.txt
@@ -0,0 +1,16 @@
+include_directories(
+${CMAKE_CURRENT_SOURCE_DIR}
+${CMAKE_CURRENT_SOURCE_DIR}/base
+)
+
+include_directories(
+${CMAKE_CURRENT_SOURCE_DIR}/../
+${CMAKE_CURRENT_SOURCE_DIR}/utils
+)
+add_subdirectory(utils)
+
+include_directories(
+${CMAKE_CURRENT_SOURCE_DIR}
+${CMAKE_CURRENT_SOURCE_DIR}/frontend
+)
+add_subdirectory(frontend)
diff --git a/speechx/speechx/base/basic_types.h b/speechx/speechx/common/base/basic_types.h
similarity index 100%
rename from speechx/speechx/base/basic_types.h
rename to speechx/speechx/common/base/basic_types.h
diff --git a/speechx/speechx/base/common.h b/speechx/speechx/common/base/common.h
similarity index 100%
rename from speechx/speechx/base/common.h
rename to speechx/speechx/common/base/common.h
diff --git a/speechx/speechx/base/flags.h b/speechx/speechx/common/base/flags.h
similarity index 100%
rename from speechx/speechx/base/flags.h
rename to speechx/speechx/common/base/flags.h
diff --git a/speechx/speechx/base/log.h b/speechx/speechx/common/base/log.h
similarity index 100%
rename from speechx/speechx/base/log.h
rename to speechx/speechx/common/base/log.h
diff --git a/speechx/speechx/base/macros.h b/speechx/speechx/common/base/macros.h
similarity index 100%
rename from speechx/speechx/base/macros.h
rename to speechx/speechx/common/base/macros.h
diff --git a/speechx/speechx/base/thread_pool.h b/speechx/speechx/common/base/thread_pool.h
similarity index 100%
rename from speechx/speechx/base/thread_pool.h
rename to speechx/speechx/common/base/thread_pool.h
diff --git a/speechx/speechx/frontend/CMakeLists.txt b/speechx/speechx/common/frontend/CMakeLists.txt
similarity index 100%
rename from speechx/speechx/frontend/CMakeLists.txt
rename to speechx/speechx/common/frontend/CMakeLists.txt
diff --git a/speechx/speechx/frontend/audio/CMakeLists.txt b/speechx/speechx/common/frontend/audio/CMakeLists.txt
similarity index 100%
rename from speechx/speechx/frontend/audio/CMakeLists.txt
rename to speechx/speechx/common/frontend/audio/CMakeLists.txt
diff --git a/speechx/speechx/frontend/audio/assembler.cc b/speechx/speechx/common/frontend/audio/assembler.cc
similarity index 100%
rename from speechx/speechx/frontend/audio/assembler.cc
rename to speechx/speechx/common/frontend/audio/assembler.cc
diff --git a/speechx/speechx/frontend/audio/assembler.h b/speechx/speechx/common/frontend/audio/assembler.h
similarity index 100%
rename from speechx/speechx/frontend/audio/assembler.h
rename to speechx/speechx/common/frontend/audio/assembler.h
diff --git a/speechx/speechx/frontend/audio/audio_cache.cc b/speechx/speechx/common/frontend/audio/audio_cache.cc
similarity index 100%
rename from speechx/speechx/frontend/audio/audio_cache.cc
rename to speechx/speechx/common/frontend/audio/audio_cache.cc
diff --git a/speechx/speechx/frontend/audio/audio_cache.h b/speechx/speechx/common/frontend/audio/audio_cache.h
similarity index 100%
rename from speechx/speechx/frontend/audio/audio_cache.h
rename to speechx/speechx/common/frontend/audio/audio_cache.h
diff --git a/speechx/speechx/frontend/audio/cmvn.cc b/speechx/speechx/common/frontend/audio/cmvn.cc
similarity index 100%
rename from speechx/speechx/frontend/audio/cmvn.cc
rename to speechx/speechx/common/frontend/audio/cmvn.cc
diff --git a/speechx/speechx/frontend/audio/cmvn.h b/speechx/speechx/common/frontend/audio/cmvn.h
similarity index 100%
rename from speechx/speechx/frontend/audio/cmvn.h
rename to speechx/speechx/common/frontend/audio/cmvn.h
diff --git a/speechx/speechx/frontend/audio/cmvn_json2kaldi_main.cc b/speechx/speechx/common/frontend/audio/cmvn_json2kaldi_main.cc
similarity index 100%
rename from speechx/speechx/frontend/audio/cmvn_json2kaldi_main.cc
rename to speechx/speechx/common/frontend/audio/cmvn_json2kaldi_main.cc
diff --git a/speechx/speechx/frontend/audio/compute_fbank_main.cc b/speechx/speechx/common/frontend/audio/compute_fbank_main.cc
similarity index 100%
rename from speechx/speechx/frontend/audio/compute_fbank_main.cc
rename to speechx/speechx/common/frontend/audio/compute_fbank_main.cc
diff --git a/speechx/speechx/frontend/audio/compute_linear_spectrogram_main.cc b/speechx/speechx/common/frontend/audio/compute_linear_spectrogram_main.cc
similarity index 100%
rename from speechx/speechx/frontend/audio/compute_linear_spectrogram_main.cc
rename to speechx/speechx/common/frontend/audio/compute_linear_spectrogram_main.cc
diff --git a/speechx/speechx/frontend/audio/data_cache.h b/speechx/speechx/common/frontend/audio/data_cache.h
similarity index 100%
rename from speechx/speechx/frontend/audio/data_cache.h
rename to speechx/speechx/common/frontend/audio/data_cache.h
diff --git a/speechx/speechx/frontend/audio/db_norm.cc b/speechx/speechx/common/frontend/audio/db_norm.cc
similarity index 100%
rename from speechx/speechx/frontend/audio/db_norm.cc
rename to speechx/speechx/common/frontend/audio/db_norm.cc
diff --git a/speechx/speechx/frontend/audio/db_norm.h b/speechx/speechx/common/frontend/audio/db_norm.h
similarity index 100%
rename from speechx/speechx/frontend/audio/db_norm.h
rename to speechx/speechx/common/frontend/audio/db_norm.h
diff --git a/speechx/speechx/frontend/audio/fbank.cc b/speechx/speechx/common/frontend/audio/fbank.cc
similarity index 100%
rename from speechx/speechx/frontend/audio/fbank.cc
rename to speechx/speechx/common/frontend/audio/fbank.cc
diff --git a/speechx/speechx/frontend/audio/fbank.h b/speechx/speechx/common/frontend/audio/fbank.h
similarity index 100%
rename from speechx/speechx/frontend/audio/fbank.h
rename to speechx/speechx/common/frontend/audio/fbank.h
diff --git a/speechx/speechx/frontend/audio/feature_cache.cc b/speechx/speechx/common/frontend/audio/feature_cache.cc
similarity index 100%
rename from speechx/speechx/frontend/audio/feature_cache.cc
rename to speechx/speechx/common/frontend/audio/feature_cache.cc
diff --git a/speechx/speechx/frontend/audio/feature_cache.h b/speechx/speechx/common/frontend/audio/feature_cache.h
similarity index 100%
rename from speechx/speechx/frontend/audio/feature_cache.h
rename to speechx/speechx/common/frontend/audio/feature_cache.h
diff --git a/speechx/speechx/frontend/audio/feature_common.h b/speechx/speechx/common/frontend/audio/feature_common.h
similarity index 100%
rename from speechx/speechx/frontend/audio/feature_common.h
rename to speechx/speechx/common/frontend/audio/feature_common.h
diff --git a/speechx/speechx/frontend/audio/feature_common_inl.h b/speechx/speechx/common/frontend/audio/feature_common_inl.h
similarity index 100%
rename from speechx/speechx/frontend/audio/feature_common_inl.h
rename to speechx/speechx/common/frontend/audio/feature_common_inl.h
diff --git a/speechx/speechx/frontend/audio/feature_pipeline.cc b/speechx/speechx/common/frontend/audio/feature_pipeline.cc
similarity index 100%
rename from speechx/speechx/frontend/audio/feature_pipeline.cc
rename to speechx/speechx/common/frontend/audio/feature_pipeline.cc
diff --git a/speechx/speechx/frontend/audio/feature_pipeline.h b/speechx/speechx/common/frontend/audio/feature_pipeline.h
similarity index 100%
rename from speechx/speechx/frontend/audio/feature_pipeline.h
rename to speechx/speechx/common/frontend/audio/feature_pipeline.h
diff --git a/speechx/speechx/frontend/audio/frontend_itf.h b/speechx/speechx/common/frontend/audio/frontend_itf.h
similarity index 100%
rename from speechx/speechx/frontend/audio/frontend_itf.h
rename to speechx/speechx/common/frontend/audio/frontend_itf.h
diff --git a/speechx/speechx/frontend/audio/linear_spectrogram.cc b/speechx/speechx/common/frontend/audio/linear_spectrogram.cc
similarity index 100%
rename from speechx/speechx/frontend/audio/linear_spectrogram.cc
rename to speechx/speechx/common/frontend/audio/linear_spectrogram.cc
diff --git a/speechx/speechx/frontend/audio/linear_spectrogram.h b/speechx/speechx/common/frontend/audio/linear_spectrogram.h
similarity index 100%
rename from speechx/speechx/frontend/audio/linear_spectrogram.h
rename to speechx/speechx/common/frontend/audio/linear_spectrogram.h
diff --git a/speechx/speechx/frontend/audio/mfcc.cc b/speechx/speechx/common/frontend/audio/mfcc.cc
similarity index 100%
rename from speechx/speechx/frontend/audio/mfcc.cc
rename to speechx/speechx/common/frontend/audio/mfcc.cc
diff --git a/speechx/speechx/frontend/audio/mfcc.h b/speechx/speechx/common/frontend/audio/mfcc.h
similarity index 100%
rename from speechx/speechx/frontend/audio/mfcc.h
rename to speechx/speechx/common/frontend/audio/mfcc.h
diff --git a/speechx/speechx/frontend/audio/normalizer.h b/speechx/speechx/common/frontend/audio/normalizer.h
similarity index 100%
rename from speechx/speechx/frontend/audio/normalizer.h
rename to speechx/speechx/common/frontend/audio/normalizer.h
diff --git a/speechx/speechx/utils/CMakeLists.txt b/speechx/speechx/common/utils/CMakeLists.txt
similarity index 100%
rename from speechx/speechx/utils/CMakeLists.txt
rename to speechx/speechx/common/utils/CMakeLists.txt
diff --git a/speechx/speechx/utils/file_utils.cc b/speechx/speechx/common/utils/file_utils.cc
similarity index 100%
rename from speechx/speechx/utils/file_utils.cc
rename to speechx/speechx/common/utils/file_utils.cc
diff --git a/speechx/speechx/utils/file_utils.h b/speechx/speechx/common/utils/file_utils.h
similarity index 100%
rename from speechx/speechx/utils/file_utils.h
rename to speechx/speechx/common/utils/file_utils.h
diff --git a/speechx/speechx/utils/math.cc b/speechx/speechx/common/utils/math.cc
similarity index 100%
rename from speechx/speechx/utils/math.cc
rename to speechx/speechx/common/utils/math.cc
diff --git a/speechx/speechx/utils/math.h b/speechx/speechx/common/utils/math.h
similarity index 100%
rename from speechx/speechx/utils/math.h
rename to speechx/speechx/common/utils/math.h
diff --git a/speechx/speechx/decoder/ctc_decoders b/speechx/speechx/decoder/ctc_decoders
deleted file mode 120000
index b280de09681..00000000000
--- a/speechx/speechx/decoder/ctc_decoders
+++ /dev/null
@@ -1 +0,0 @@
-../../../third_party/ctc_decoders
\ No newline at end of file
diff --git a/speechx/speechx/frontend/text/CMakeLists.txt b/speechx/speechx/frontend/text/CMakeLists.txt
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/speechx/speechx/kaldi/CMakeLists.txt b/speechx/speechx/kaldi/CMakeLists.txt
index ce6b43f632a..d27668fccd3 100644
--- a/speechx/speechx/kaldi/CMakeLists.txt
+++ b/speechx/speechx/kaldi/CMakeLists.txt
@@ -1,4 +1,7 @@
 project(kaldi)
+include_directories(
+${CMAKE_CURRENT_SOURCE_DIR}
+)
 
 add_subdirectory(base)
 add_subdirectory(util)
@@ -10,4 +13,4 @@ add_subdirectory(decoder)
 add_subdirectory(lm)
 
 add_subdirectory(fstbin)
-add_subdirectory(lmbin)
\ No newline at end of file
+add_subdirectory(lmbin)
diff --git a/speechx/speechx/third_party/CMakeLists.txt b/speechx/speechx/third_party/CMakeLists.txt
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/speechx/speechx/third_party/README.md b/speechx/speechx/third_party/README.md
deleted file mode 100644
index 2d620335b9c..00000000000
--- a/speechx/speechx/third_party/README.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# third party
-
-Those libs copied and developed from third pary opensource software projects.
-For all of these things, the official websites are the best place to go.

From 8cc56717929cd6f4b98870d8ede6a481186a39dd Mon Sep 17 00:00:00 2001
From: YangZhou <goat.zhou@qq.com>
Date: Fri, 16 Dec 2022 11:30:50 +0800
Subject: [PATCH 4/6] clean ctc_decoders dir

---
 .../decoder/ctc_decoders/COPYING.APACHE2.0    | 201 ------------------
 .../asr/decoder/ctc_decoders/COPYING.LESSER.3 | 165 --------------
 .../speechx/asr/decoder/ctc_decoders/LICENSE  |   8 -
 .../asr/decoder/ctc_decoders/__init__.py      |  13 --
 .../asr/decoder/ctc_decoders/decoders.i       |  33 ---
 .../speechx/asr/decoder/ctc_decoders/setup.py | 138 ------------
 .../speechx/asr/decoder/ctc_decoders/setup.sh |  24 ---
 7 files changed, 582 deletions(-)
 delete mode 100644 speechx/speechx/asr/decoder/ctc_decoders/COPYING.APACHE2.0
 delete mode 100644 speechx/speechx/asr/decoder/ctc_decoders/COPYING.LESSER.3
 delete mode 100644 speechx/speechx/asr/decoder/ctc_decoders/LICENSE
 delete mode 100644 speechx/speechx/asr/decoder/ctc_decoders/__init__.py
 delete mode 100644 speechx/speechx/asr/decoder/ctc_decoders/decoders.i
 delete mode 100644 speechx/speechx/asr/decoder/ctc_decoders/setup.py
 delete mode 100755 speechx/speechx/asr/decoder/ctc_decoders/setup.sh

diff --git a/speechx/speechx/asr/decoder/ctc_decoders/COPYING.APACHE2.0 b/speechx/speechx/asr/decoder/ctc_decoders/COPYING.APACHE2.0
deleted file mode 100644
index 261eeb9e9f8..00000000000
--- a/speechx/speechx/asr/decoder/ctc_decoders/COPYING.APACHE2.0
+++ /dev/null
@@ -1,201 +0,0 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/speechx/speechx/asr/decoder/ctc_decoders/COPYING.LESSER.3 b/speechx/speechx/asr/decoder/ctc_decoders/COPYING.LESSER.3
deleted file mode 100644
index cca7fc278f5..00000000000
--- a/speechx/speechx/asr/decoder/ctc_decoders/COPYING.LESSER.3
+++ /dev/null
@@ -1,165 +0,0 @@
-		   GNU LESSER GENERAL PUBLIC LICENSE
-                       Version 3, 29 June 2007
-
- Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
-
-  This version of the GNU Lesser General Public License incorporates
-the terms and conditions of version 3 of the GNU General Public
-License, supplemented by the additional permissions listed below.
-
-  0. Additional Definitions.
-
-  As used herein, "this License" refers to version 3 of the GNU Lesser
-General Public License, and the "GNU GPL" refers to version 3 of the GNU
-General Public License.
-
-  "The Library" refers to a covered work governed by this License,
-other than an Application or a Combined Work as defined below.
-
-  An "Application" is any work that makes use of an interface provided
-by the Library, but which is not otherwise based on the Library.
-Defining a subclass of a class defined by the Library is deemed a mode
-of using an interface provided by the Library.
-
-  A "Combined Work" is a work produced by combining or linking an
-Application with the Library.  The particular version of the Library
-with which the Combined Work was made is also called the "Linked
-Version".
-
-  The "Minimal Corresponding Source" for a Combined Work means the
-Corresponding Source for the Combined Work, excluding any source code
-for portions of the Combined Work that, considered in isolation, are
-based on the Application, and not on the Linked Version.
-
-  The "Corresponding Application Code" for a Combined Work means the
-object code and/or source code for the Application, including any data
-and utility programs needed for reproducing the Combined Work from the
-Application, but excluding the System Libraries of the Combined Work.
-
-  1. Exception to Section 3 of the GNU GPL.
-
-  You may convey a covered work under sections 3 and 4 of this License
-without being bound by section 3 of the GNU GPL.
-
-  2. Conveying Modified Versions.
-
-  If you modify a copy of the Library, and, in your modifications, a
-facility refers to a function or data to be supplied by an Application
-that uses the facility (other than as an argument passed when the
-facility is invoked), then you may convey a copy of the modified
-version:
-
-   a) under this License, provided that you make a good faith effort to
-   ensure that, in the event an Application does not supply the
-   function or data, the facility still operates, and performs
-   whatever part of its purpose remains meaningful, or
-
-   b) under the GNU GPL, with none of the additional permissions of
-   this License applicable to that copy.
-
-  3. Object Code Incorporating Material from Library Header Files.
-
-  The object code form of an Application may incorporate material from
-a header file that is part of the Library.  You may convey such object
-code under terms of your choice, provided that, if the incorporated
-material is not limited to numerical parameters, data structure
-layouts and accessors, or small macros, inline functions and templates
-(ten or fewer lines in length), you do both of the following:
-
-   a) Give prominent notice with each copy of the object code that the
-   Library is used in it and that the Library and its use are
-   covered by this License.
-
-   b) Accompany the object code with a copy of the GNU GPL and this license
-   document.
-
-  4. Combined Works.
-
-  You may convey a Combined Work under terms of your choice that,
-taken together, effectively do not restrict modification of the
-portions of the Library contained in the Combined Work and reverse
-engineering for debugging such modifications, if you also do each of
-the following:
-
-   a) Give prominent notice with each copy of the Combined Work that
-   the Library is used in it and that the Library and its use are
-   covered by this License.
-
-   b) Accompany the Combined Work with a copy of the GNU GPL and this license
-   document.
-
-   c) For a Combined Work that displays copyright notices during
-   execution, include the copyright notice for the Library among
-   these notices, as well as a reference directing the user to the
-   copies of the GNU GPL and this license document.
-
-   d) Do one of the following:
-
-       0) Convey the Minimal Corresponding Source under the terms of this
-       License, and the Corresponding Application Code in a form
-       suitable for, and under terms that permit, the user to
-       recombine or relink the Application with a modified version of
-       the Linked Version to produce a modified Combined Work, in the
-       manner specified by section 6 of the GNU GPL for conveying
-       Corresponding Source.
-
-       1) Use a suitable shared library mechanism for linking with the
-       Library.  A suitable mechanism is one that (a) uses at run time
-       a copy of the Library already present on the user's computer
-       system, and (b) will operate properly with a modified version
-       of the Library that is interface-compatible with the Linked
-       Version.
-
-   e) Provide Installation Information, but only if you would otherwise
-   be required to provide such information under section 6 of the
-   GNU GPL, and only to the extent that such information is
-   necessary to install and execute a modified version of the
-   Combined Work produced by recombining or relinking the
-   Application with a modified version of the Linked Version. (If
-   you use option 4d0, the Installation Information must accompany
-   the Minimal Corresponding Source and Corresponding Application
-   Code. If you use option 4d1, you must provide the Installation
-   Information in the manner specified by section 6 of the GNU GPL
-   for conveying Corresponding Source.)
-
-  5. Combined Libraries.
-
-  You may place library facilities that are a work based on the
-Library side by side in a single library together with other library
-facilities that are not Applications and are not covered by this
-License, and convey such a combined library under terms of your
-choice, if you do both of the following:
-
-   a) Accompany the combined library with a copy of the same work based
-   on the Library, uncombined with any other library facilities,
-   conveyed under the terms of this License.
-
-   b) Give prominent notice with the combined library that part of it
-   is a work based on the Library, and explaining where to find the
-   accompanying uncombined form of the same work.
-
-  6. Revised Versions of the GNU Lesser General Public License.
-
-  The Free Software Foundation may publish revised and/or new versions
-of the GNU Lesser General Public License from time to time. Such new
-versions will be similar in spirit to the present version, but may
-differ in detail to address new problems or concerns.
-
-  Each version is given a distinguishing version number. If the
-Library as you received it specifies that a certain numbered version
-of the GNU Lesser General Public License "or any later version"
-applies to it, you have the option of following the terms and
-conditions either of that published version or of any later version
-published by the Free Software Foundation. If the Library as you
-received it does not specify a version number of the GNU Lesser
-General Public License, you may choose any version of the GNU Lesser
-General Public License ever published by the Free Software Foundation.
-
-  If the Library as you received it specifies that a proxy can decide
-whether future versions of the GNU Lesser General Public License shall
-apply, that proxy's public statement of acceptance of any version is
-permanent authorization for you to choose that version for the
-Library.
diff --git a/speechx/speechx/asr/decoder/ctc_decoders/LICENSE b/speechx/speechx/asr/decoder/ctc_decoders/LICENSE
deleted file mode 100644
index ad947f8d756..00000000000
--- a/speechx/speechx/asr/decoder/ctc_decoders/LICENSE
+++ /dev/null
@@ -1,8 +0,0 @@
-Most of the code here is licensed under the Apache License 2.0.  
-There are exceptions that have their own licenses, listed below.  
-
-score.h and score.cpp is under the LGPL license. 
-The two files include the header files from KenLM project.
-
-For the rest:
-The default license of paddlespeech-ctcdecoders is Apache License 2.0.
diff --git a/speechx/speechx/asr/decoder/ctc_decoders/__init__.py b/speechx/speechx/asr/decoder/ctc_decoders/__init__.py
deleted file mode 100644
index 185a92b8d94..00000000000
--- a/speechx/speechx/asr/decoder/ctc_decoders/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
diff --git a/speechx/speechx/asr/decoder/ctc_decoders/decoders.i b/speechx/speechx/asr/decoder/ctc_decoders/decoders.i
deleted file mode 100644
index 8fe3b279f59..00000000000
--- a/speechx/speechx/asr/decoder/ctc_decoders/decoders.i
+++ /dev/null
@@ -1,33 +0,0 @@
-%module paddlespeech_ctcdecoders
-%{
-#include "scorer.h"
-#include "ctc_greedy_decoder.h"
-#include "ctc_beam_search_decoder.h"
-#include "decoder_utils.h"
-%}
-
-%include "std_vector.i"
-%include "std_pair.i"
-%include "std_string.i"
-%import "decoder_utils.h"
-
-namespace std {
-    %template(DoubleVector) std::vector<double>;
-    %template(IntVector) std::vector<int>;
-    %template(StringVector) std::vector<std::string>;
-    %template(VectorOfStructVector) std::vector<std::vector<double> >;
-    %template(FloatVector) std::vector<float>;
-    %template(Pair) std::pair<float, std::string>;
-    %template(PairFloatStringVector)  std::vector<std::pair<float, std::string> >;
-    %template(PairDoubleStringVector) std::vector<std::pair<double, std::string> >;
-    %template(PairDoubleStringVector2) std::vector<std::vector<std::pair<double, std::string> > >;
-    %template(DoubleVector3) std::vector<std::vector<std::vector<double> > >;
-}
-
-%template(IntDoublePairCompSecondRev) pair_comp_second_rev<int, double>;
-%template(StringDoublePairCompSecondRev) pair_comp_second_rev<std::string, double>;
-%template(DoubleStringPairCompFirstRev) pair_comp_first_rev<double, std::string>;
-
-%include "scorer.h"
-%include "ctc_greedy_decoder.h"
-%include "ctc_beam_search_decoder.h"
diff --git a/speechx/speechx/asr/decoder/ctc_decoders/setup.py b/speechx/speechx/asr/decoder/ctc_decoders/setup.py
deleted file mode 100644
index 9a8b292a07b..00000000000
--- a/speechx/speechx/asr/decoder/ctc_decoders/setup.py
+++ /dev/null
@@ -1,138 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Script to build and install decoder package."""
-import argparse
-import glob
-import multiprocessing.pool
-import os
-import platform
-import sys
-
-from setuptools import distutils
-from setuptools import Extension
-from setuptools import setup
-
-parser = argparse.ArgumentParser(description=__doc__)
-parser.add_argument(
-    "--num_processes",
-    default=1,
-    type=int,
-    help="Number of cpu processes to build package. (default: %(default)d)")
-args = parser.parse_known_args()
-
-# reconstruct sys.argv to pass to setup below
-sys.argv = [sys.argv[0]] + args[1]
-
-
-# monkey-patch for parallel compilation
-# See: https://stackoverflow.com/a/13176803
-def parallelCCompile(self,
-                     sources,
-                     output_dir=None,
-                     macros=None,
-                     include_dirs=None,
-                     debug=0,
-                     extra_preargs=None,
-                     extra_postargs=None,
-                     depends=None):
-    # those lines are copied from distutils.ccompiler.CCompiler directly
-    macros, objects, extra_postargs, pp_opts, build = self._setup_compile(
-        output_dir, macros, include_dirs, sources, depends, extra_postargs)
-    cc_args = self._get_cc_args(pp_opts, debug, extra_preargs)
-
-    # parallel code
-    def _single_compile(obj):
-        try:
-            src, ext = build[obj]
-        except KeyError:
-            return
-        self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)
-
-    # convert to list, imap is evaluated on-demand
-    thread_pool = multiprocessing.pool.ThreadPool(args[0].num_processes)
-    list(thread_pool.imap(_single_compile, objects))
-    return objects
-
-
-def compile_test(header, library):
-    dummy_path = os.path.join(os.path.dirname(__file__), "dummy")
-    command = "bash -c \"g++ -include " + header \
-        + " -l" + library + " -x c++ - <<<'int main() {}' -o " \
-        + dummy_path + " >/dev/null 2>/dev/null && rm " \
-        + dummy_path + " 2>/dev/null\""
-    return os.system(command) == 0
-
-
-# hack compile to support parallel compiling
-distutils.ccompiler.CCompiler.compile = parallelCCompile
-
-FILES = glob.glob('kenlm/util/*.cc') \
-    + glob.glob('kenlm/lm/*.cc') \
-    + glob.glob('kenlm/util/double-conversion/*.cc')
-
-FILES += glob.glob('openfst-1.6.3/src/lib/*.cc')
-
-# yapf: disable
-FILES = [
-    fn for fn in FILES if not (fn.endswith('main.cc') or fn.endswith('test.cc')
-                               or fn.endswith('unittest.cc'))
-]
-# yapf: enable
-LIBS = ['stdc++']
-if platform.system() != 'Darwin':
-    LIBS.append('rt')
-if platform.system() == 'Windows':
-    LIBS = ['-static-libstdc++']
-
-ARGS = ['-O3', '-DNDEBUG', '-DKENLM_MAX_ORDER=6', '-std=c++11']
-
-if compile_test('zlib.h', 'z'):
-    ARGS.append('-DHAVE_ZLIB')
-    LIBS.append('z')
-
-if compile_test('bzlib.h', 'bz2'):
-    ARGS.append('-DHAVE_BZLIB')
-    LIBS.append('bz2')
-
-if compile_test('lzma.h', 'lzma'):
-    ARGS.append('-DHAVE_XZLIB')
-    LIBS.append('lzma')
-
-os.system('swig -python -c++ ./decoders.i')
-
-decoders_module = [
-    Extension(
-        name='_paddlespeech_ctcdecoders',
-        sources=FILES + glob.glob('*.cxx') + glob.glob('*.cpp'),
-        language='c++',
-        include_dirs=[
-            '.',
-            'kenlm',
-            'openfst-1.6.3/src/include',
-            'ThreadPool',
-        ],
-        libraries=LIBS,
-        extra_compile_args=ARGS)
-]
-
-setup(
-    name='paddlespeech_ctcdecoders',
-    version='0.2.0',
-    description="CTC decoders in paddlespeech",
-    author="PaddlePaddle Speech and Language Team",
-    author_email="paddlesl@baidu.com",
-    url="https://github.com/PaddlePaddle/PaddleSpeech",
-    license='Apache 2.0, GNU Lesser General Public License v3 (LGPLv3) (LGPL-3)',
-    ext_modules=decoders_module,
-    py_modules=['paddlespeech_ctcdecoders'])
diff --git a/speechx/speechx/asr/decoder/ctc_decoders/setup.sh b/speechx/speechx/asr/decoder/ctc_decoders/setup.sh
deleted file mode 100755
index 302c5550250..00000000000
--- a/speechx/speechx/asr/decoder/ctc_decoders/setup.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env bash
-
-if [ ! -d kenlm ]; then
-    git clone https://github.com/kpu/kenlm.git
-    cd kenlm/
-    git checkout df2d717e95183f79a90b2fa6e4307083a351ca6a
-    cd ..
-    echo -e "\n"
-fi
-
-if [ ! -d openfst-1.6.3 ]; then
-    echo "Download and extract openfst ..."
-    wget http://www.openfst.org/twiki/pub/FST/FstDownload/openfst-1.6.3.tar.gz --no-check-certificate
-    tar -xzvf openfst-1.6.3.tar.gz
-    echo -e "\n"
-fi
-
-if [ ! -d ThreadPool ]; then
-    git clone https://github.com/progschj/ThreadPool.git
-    echo -e "\n"
-fi
-
-echo "Install decoders ..."
-python3 setup.py install --num_processes 4

From cd49b31a18dd6295f110cb8566ee60a0425fd46e Mon Sep 17 00:00:00 2001
From: YangZhou <goat.zhou@qq.com>
Date: Tue, 27 Dec 2022 16:03:22 +0800
Subject: [PATCH 5/6] add nnet cache && make 2 thread work

---
 speechx/CMakeLists.txt                        |   2 +-
 .../ctc_prefix_beam_search_decoder_main.cc    |  15 ++-
 speechx/speechx/asr/nnet/CMakeLists.txt       |  22 ++--
 speechx/speechx/asr/nnet/decodable.cc         |  88 ++++---------
 speechx/speechx/asr/nnet/decodable.h          |  16 +--
 speechx/speechx/asr/nnet/nnet_producer.cc     |  84 ++++++++++++
 speechx/speechx/asr/nnet/nnet_producer.h      |  73 +++++++++++
 speechx/speechx/asr/recognizer/CMakeLists.txt |   1 +
 .../speechx/asr/recognizer/u2_recognizer.cc   |  15 ++-
 .../speechx/asr/recognizer/u2_recognizer.h    |  10 +-
 .../recognizer/u2_recognizer_thread_main.cc   | 123 ++++++++++++++++++
 speechx/speechx/asr/server/CMakeLists.txt     |   2 +-
 speechx/speechx/common/base/common.h          |   2 +-
 speechx/speechx/common/base/safe_queue.h      |  71 ++++++++++
 14 files changed, 416 insertions(+), 108 deletions(-)
 create mode 100644 speechx/speechx/asr/nnet/nnet_producer.cc
 create mode 100644 speechx/speechx/asr/nnet/nnet_producer.h
 create mode 100644 speechx/speechx/asr/recognizer/u2_recognizer_thread_main.cc
 create mode 100644 speechx/speechx/common/base/safe_queue.h

diff --git a/speechx/CMakeLists.txt b/speechx/CMakeLists.txt
index 6b957160eec..2068b51ac71 100644
--- a/speechx/CMakeLists.txt
+++ b/speechx/CMakeLists.txt
@@ -35,7 +35,7 @@ option(TEST_DEBUG "option for debug" OFF)
 option(USE_PROFILING "enable c++ profling" OFF)
 
 option(USING_U2  "compile u2 model." ON)
-option(USING_DS2 "compile with ds2 model." ON)
+option(USING_DS2 "compile with ds2 model." OFF)
 
 option(USING_GPU "u2 compute on GPU." OFF)
 
diff --git a/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder_main.cc b/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder_main.cc
index c59b1f2e742..9baa836b2a6 100644
--- a/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder_main.cc
+++ b/speechx/speechx/asr/decoder/ctc_prefix_beam_search_decoder_main.cc
@@ -12,13 +12,14 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include "decoder/ctc_prefix_beam_search_decoder.h"
 #include "absl/strings/str_split.h"
 #include "base/common.h"
-#include "decoder/ctc_prefix_beam_search_decoder.h"
 #include "frontend/audio/data_cache.h"
 #include "fst/symbol-table.h"
 #include "kaldi/util/table-types.h"
 #include "nnet/decodable.h"
+#include "nnet/nnet_producer.h"
 #include "nnet/u2_nnet.h"
 
 DEFINE_string(feature_rspecifier, "", "test feature rspecifier");
@@ -40,7 +41,7 @@ using kaldi::BaseFloat;
 using kaldi::Matrix;
 using std::vector;
 
-// test ds2 online decoder by feeding speech feature
+// test u2 online decoder by feeding speech feature
 int main(int argc, char* argv[]) {
     gflags::SetUsageMessage("Usage:");
     gflags::ParseCommandLineFlags(&argc, &argv, false);
@@ -70,8 +71,10 @@ int main(int argc, char* argv[]) {
     // decodeable
     std::shared_ptr<ppspeech::DataCache> raw_data =
         std::make_shared<ppspeech::DataCache>();
+    std::shared_ptr<ppspeech::NnetProducer> nnet_producer =
+        std::make_shared<ppspeech::NnetProducer>(nnet, raw_data);
     std::shared_ptr<ppspeech::Decodable> decodable =
-        std::make_shared<ppspeech::Decodable>(nnet, raw_data);
+        std::make_shared<ppspeech::Decodable>(nnet_producer);
 
     // decoder
     ppspeech::CTCBeamSearchOptions opts;
@@ -115,9 +118,9 @@ int main(int argc, char* argv[]) {
                     ori_feature_len - chunk_idx * chunk_stride, chunk_size);
             }
             if (this_chunk_size < receptive_field_length) {
-                LOG(WARNING)
-                    << "utt: " << utt << " skip last " << this_chunk_size
-                    << " frames, expect is " << receptive_field_length;
+                LOG(WARNING) << "utt: " << utt << " skip last "
+                             << this_chunk_size << " frames, expect is "
+                             << receptive_field_length;
                 break;
             }
 
diff --git a/speechx/speechx/asr/nnet/CMakeLists.txt b/speechx/speechx/asr/nnet/CMakeLists.txt
index 435666163a9..750c77521d8 100644
--- a/speechx/speechx/asr/nnet/CMakeLists.txt
+++ b/speechx/speechx/asr/nnet/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(srcs decodable.cc)
+set(srcs decodable.cc nnet_producer.cc)
 
 if(USING_DS2)
   list(APPEND srcs ds2_nnet.cc)
@@ -27,13 +27,13 @@ if(USING_DS2)
 endif()
 
 # test bin
-if(USING_U2)
-  set(bin_name u2_nnet_main)
-  add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
-  target_include_directories(${bin_name} PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
-  target_link_libraries(${bin_name} utils kaldi-util kaldi-matrix gflags glog nnet)
-
-  target_compile_options(${bin_name}  PRIVATE ${PADDLE_COMPILE_FLAGS})
-  target_include_directories(${bin_name}  PRIVATE ${pybind11_INCLUDE_DIRS} ${PROJECT_SOURCE_DIR})
-  target_link_libraries(${bin_name}  ${PYTHON_LIBRARIES} ${PADDLE_LINK_FLAGS})
-endif()
+#if(USING_U2)
+#  set(bin_name u2_nnet_main)
+#  add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
+#  target_include_directories(${bin_name} PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
+#  target_link_libraries(${bin_name} utils kaldi-util kaldi-matrix gflags glog nnet)
+
+#  target_compile_options(${bin_name}  PRIVATE ${PADDLE_COMPILE_FLAGS})
+#  target_include_directories(${bin_name}  PRIVATE ${pybind11_INCLUDE_DIRS} ${PROJECT_SOURCE_DIR})
+#  target_link_libraries(${bin_name}  ${PYTHON_LIBRARIES} ${PADDLE_LINK_FLAGS})
+#endif()
diff --git a/speechx/speechx/asr/nnet/decodable.cc b/speechx/speechx/asr/nnet/decodable.cc
index 5fe2b984230..f01e9049324 100644
--- a/speechx/speechx/asr/nnet/decodable.cc
+++ b/speechx/speechx/asr/nnet/decodable.cc
@@ -21,19 +21,16 @@ using kaldi::Matrix;
 using kaldi::Vector;
 using std::vector;
 
-Decodable::Decodable(const std::shared_ptr<NnetBase>& nnet,
-                     const std::shared_ptr<FrontendInterface>& frontend,
+Decodable::Decodable(const std::shared_ptr<NnetProducer>& nnet_producer,
                      kaldi::BaseFloat acoustic_scale)
-    : frontend_(frontend),
-      nnet_(nnet),
+    : nnet_producer_(nnet_producer),
       frame_offset_(0),
       frames_ready_(0),
       acoustic_scale_(acoustic_scale) {}
 
 // for debug
 void Decodable::Acceptlikelihood(const Matrix<BaseFloat>& likelihood) {
-    nnet_out_cache_ = likelihood;
-    frames_ready_ += likelihood.NumRows();
+    nnet_producer_->Acceptlikelihood(likelihood);
 }
 
 
@@ -43,7 +40,7 @@ int32 Decodable::NumFramesReady() const { return frames_ready_; }
 
 // frame idx is from 0 to frame_ready_ -1;
 bool Decodable::IsLastFrame(int32 frame) {
-    bool flag = EnsureFrameHaveComputed(frame);
+    EnsureFrameHaveComputed(frame);
     return frame >= frames_ready_;
 }
 
@@ -64,32 +61,10 @@ bool Decodable::EnsureFrameHaveComputed(int32 frame) {
 
 bool Decodable::AdvanceChunk() {
     kaldi::Timer timer;
-    // read feats
-    Vector<BaseFloat> features;
-    if (frontend_ == NULL || frontend_->Read(&features) == false) {
-        // no feat or frontend_ not init.
-        VLOG(3) << "decodable exit;";
-        return false;
-    }
-    CHECK_GE(frontend_->Dim(), 0);
-    VLOG(1) << "AdvanceChunk feat cost: " << timer.Elapsed() << " sec.";
-    VLOG(2) << "Forward in " << features.Dim() / frontend_->Dim() << " feats.";
-
-    // forward feats
-    NnetOut out;
-    nnet_->FeedForward(features, frontend_->Dim(), &out);
-    int32& vocab_dim = out.vocab_dim;
-    Vector<BaseFloat>& logprobs = out.logprobs;
-
-    VLOG(2) << "Forward out " << logprobs.Dim() / vocab_dim
-            << " decoder frames.";
-    // cache nnet outupts
-    nnet_out_cache_.Resize(logprobs.Dim() / vocab_dim, vocab_dim);
-    nnet_out_cache_.CopyRowsFromVec(logprobs);
-
-    // update state, decoding frame.
+    bool flag = nnet_producer_->Read(&framelikelihood_);
+    if (flag == false) return false;
     frame_offset_ = frames_ready_;
-    frames_ready_ += nnet_out_cache_.NumRows();
+    frames_ready_ += 1;
     VLOG(1) << "AdvanceChunk feat + forward cost: " << timer.Elapsed()
             << " sec.";
     return true;
@@ -101,17 +76,17 @@ bool Decodable::AdvanceChunk(kaldi::Vector<kaldi::BaseFloat>* logprobs,
         return false;
     }
 
-    int nrows = nnet_out_cache_.NumRows();
-    CHECK(nrows == (frames_ready_ - frame_offset_));
-    if (nrows <= 0) {
+    if (framelikelihood_.empty()) {
         LOG(WARNING) << "No new nnet out in cache.";
         return false;
     }
 
-    logprobs->Resize(nnet_out_cache_.NumRows() * nnet_out_cache_.NumCols());
-    logprobs->CopyRowsFromMat(nnet_out_cache_);
-
-    *vocab_dim = nnet_out_cache_.NumCols();
+    size_t dim = framelikelihood_.size();
+    logprobs->Resize(framelikelihood_.size());
+    std::memcpy(logprobs->Data(),
+                framelikelihood_.data(),
+                dim * sizeof(kaldi::BaseFloat));
+    *vocab_dim = framelikelihood_.size();
     return true;
 }
 
@@ -122,19 +97,8 @@ bool Decodable::FrameLikelihood(int32 frame, vector<BaseFloat>* likelihood) {
         return false;
     }
 
-    int nrows = nnet_out_cache_.NumRows();
-    CHECK(nrows == (frames_ready_ - frame_offset_));
-    int vocab_size = nnet_out_cache_.NumCols();
-    likelihood->resize(vocab_size);
-
-    for (int32 idx = 0; idx < vocab_size; ++idx) {
-        (*likelihood)[idx] =
-            nnet_out_cache_(frame - frame_offset_, idx) * acoustic_scale_;
-
-        VLOG(4) << "nnet out: " << frame << " offset:" << frame_offset_ << " "
-                << nnet_out_cache_.NumRows()
-                << " logprob: " << nnet_out_cache_(frame - frame_offset_, idx);
-    }
+    CHECK_EQ(1, (frames_ready_ - frame_offset_));
+    *likelihood = framelikelihood_;
     return true;
 }
 
@@ -143,37 +107,31 @@ BaseFloat Decodable::LogLikelihood(int32 frame, int32 index) {
         return false;
     }
 
-    CHECK_LE(index, nnet_out_cache_.NumCols());
+    CHECK_LE(index, framelikelihood_.size());
     CHECK_LE(frame, frames_ready_);
 
     // the nnet output is prob ranther than log prob
     // the index - 1, because the ilabel
     BaseFloat logprob = 0.0;
     int32 frame_idx = frame - frame_offset_;
-    BaseFloat nnet_out = nnet_out_cache_(frame_idx, TokenId2NnetId(index));
-    if (nnet_->IsLogProb()) {
-        logprob = nnet_out;
-    } else {
-        logprob = std::log(nnet_out + std::numeric_limits<float>::epsilon());
-    }
-    CHECK(!std::isnan(logprob) && !std::isinf(logprob));
+    CHECK_EQ(frame_idx, 0);
+    logprob = framelikelihood_[TokenId2NnetId(index)];
     return acoustic_scale_ * logprob;
 }
 
 void Decodable::Reset() {
-    if (frontend_ != nullptr) frontend_->Reset();
-    if (nnet_ != nullptr) nnet_->Reset();
+    if (nnet_producer_ != nullptr) nnet_producer_->Reset();
     frame_offset_ = 0;
     frames_ready_ = 0;
-    nnet_out_cache_.Resize(0, 0);
+    framelikelihood_.clear();
 }
 
 void Decodable::AttentionRescoring(const std::vector<std::vector<int>>& hyps,
                                    float reverse_weight,
                                    std::vector<float>* rescoring_score) {
     kaldi::Timer timer;
-    nnet_->AttentionRescoring(hyps, reverse_weight, rescoring_score);
+    nnet_producer_->AttentionRescoring(hyps, reverse_weight, rescoring_score);
     VLOG(1) << "Attention Rescoring cost:  " << timer.Elapsed() << " sec.";
 }
 
-}  // namespace ppspeech
\ No newline at end of file
+}  // namespace ppspeech
diff --git a/speechx/speechx/asr/nnet/decodable.h b/speechx/speechx/asr/nnet/decodable.h
index dd7b329e581..cd498e42db2 100644
--- a/speechx/speechx/asr/nnet/decodable.h
+++ b/speechx/speechx/asr/nnet/decodable.h
@@ -13,10 +13,10 @@
 // limitations under the License.
 
 #include "base/common.h"
-#include "frontend/audio/frontend_itf.h"
 #include "kaldi/decoder/decodable-itf.h"
 #include "kaldi/matrix/kaldi-matrix.h"
 #include "nnet/nnet_itf.h"
+#include "nnet/nnet_producer.h"
 
 namespace ppspeech {
 
@@ -24,8 +24,7 @@ struct DecodableOpts;
 
 class Decodable : public kaldi::DecodableInterface {
   public:
-    explicit Decodable(const std::shared_ptr<NnetBase>& nnet,
-                       const std::shared_ptr<FrontendInterface>& frontend,
+    explicit Decodable(const std::shared_ptr<NnetProducer>& nnet_producer,
                        kaldi::BaseFloat acoustic_scale = 1.0);
 
     // void Init(DecodableOpts config);
@@ -57,23 +56,17 @@ class Decodable : public kaldi::DecodableInterface {
 
     void Reset();
 
-    bool IsInputFinished() const { return frontend_->IsFinished(); }
+    bool IsInputFinished() const { return nnet_producer_->IsFinished(); }
 
     bool EnsureFrameHaveComputed(int32 frame);
 
     int32 TokenId2NnetId(int32 token_id);
 
-    std::shared_ptr<NnetBase> Nnet() { return nnet_; }
-
     // for offline test
     void Acceptlikelihood(const kaldi::Matrix<kaldi::BaseFloat>& likelihood);
 
   private:
-    std::shared_ptr<FrontendInterface> frontend_;
-    std::shared_ptr<NnetBase> nnet_;
-
-    // nnet outputs' cache
-    kaldi::Matrix<kaldi::BaseFloat> nnet_out_cache_;
+    std::shared_ptr<NnetProducer> nnet_producer_;
 
     // the frame is nnet prob frame rather than audio feature frame
     // nnet frame subsample the feature frame
@@ -85,6 +78,7 @@ class Decodable : public kaldi::DecodableInterface {
     // so use subsampled_frame
     int32 current_log_post_subsampled_offset_;
     int32 num_chunk_computed_;
+    std::vector<kaldi::BaseFloat> framelikelihood_;
 
     kaldi::BaseFloat acoustic_scale_;
 };
diff --git a/speechx/speechx/asr/nnet/nnet_producer.cc b/speechx/speechx/asr/nnet/nnet_producer.cc
new file mode 100644
index 00000000000..3a0c4f18814
--- /dev/null
+++ b/speechx/speechx/asr/nnet/nnet_producer.cc
@@ -0,0 +1,84 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "nnet/nnet_producer.h"
+
+namespace ppspeech {
+
+using kaldi::Vector;
+using kaldi::BaseFloat;
+
+NnetProducer::NnetProducer(std::shared_ptr<NnetBase> nnet,
+                           std::shared_ptr<FrontendInterface> frontend)
+    : nnet_(nnet), frontend_(frontend) {}
+
+void NnetProducer::Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs) {
+    frontend_->Accept(inputs);
+    bool result = false;
+    do {
+        result = Compute();
+    } while (result);
+}
+
+void NnetProducer::Acceptlikelihood(
+    const kaldi::Matrix<BaseFloat>& likelihood) {
+    std::vector<BaseFloat> prob;
+    prob.resize(likelihood.NumCols());
+    for (size_t idx = 0; idx < likelihood.NumRows(); ++idx) {
+        for (size_t col = 0; col < likelihood.NumCols(); ++col) {
+            prob[col] = likelihood(idx, col);
+            cache_.push_back(prob);
+        }
+    }
+}
+
+bool NnetProducer::Read(std::vector<kaldi::BaseFloat>* nnet_prob) {
+    bool flag = cache_.pop(nnet_prob);
+    return flag;
+}
+
+bool NnetProducer::Compute() {
+    Vector<BaseFloat> features;
+    if (frontend_ == NULL || frontend_->Read(&features) == false) {
+        // no feat or frontend_ not init.
+        VLOG(3) << "no feat avalible";
+        return false;
+    }
+    CHECK_GE(frontend_->Dim(), 0);
+    VLOG(2) << "Forward in " << features.Dim() / frontend_->Dim() << " feats.";
+
+    NnetOut out;
+    nnet_->FeedForward(features, frontend_->Dim(), &out);
+    int32& vocab_dim = out.vocab_dim;
+    Vector<BaseFloat>& logprobs = out.logprobs;
+    size_t nframes = logprobs.Dim() / vocab_dim;
+    VLOG(2) << "Forward out " << nframes << " decoder frames.";
+    std::vector<BaseFloat> logprob(vocab_dim);
+    // remove later.
+    for (size_t idx = 0; idx < nframes; ++idx) {
+        for (size_t prob_idx = 0; prob_idx < vocab_dim; ++prob_idx) {
+            logprob[prob_idx] = logprobs(idx * vocab_dim + prob_idx);
+        }
+        cache_.push_back(logprob);
+    }
+    return true;
+}
+
+void NnetProducer::AttentionRescoring(const std::vector<std::vector<int>>& hyps,
+                                      float reverse_weight,
+                                      std::vector<float>* rescoring_score) {
+    nnet_->AttentionRescoring(hyps, reverse_weight, rescoring_score);
+}
+
+}  // namespace ppspeech
\ No newline at end of file
diff --git a/speechx/speechx/asr/nnet/nnet_producer.h b/speechx/speechx/asr/nnet/nnet_producer.h
new file mode 100644
index 00000000000..65e9116fff6
--- /dev/null
+++ b/speechx/speechx/asr/nnet/nnet_producer.h
@@ -0,0 +1,73 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "base/common.h"
+#include "base/safe_queue.h"
+#include "frontend/audio/frontend_itf.h"
+#include "nnet/nnet_itf.h"
+
+namespace ppspeech {
+
+class NnetProducer {
+  public:
+    explicit NnetProducer(std::shared_ptr<NnetBase> nnet,
+                          std::shared_ptr<FrontendInterface> frontend = NULL);
+
+    // Feed feats or waves
+    void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& inputs);
+
+    void Acceptlikelihood(const kaldi::Matrix<BaseFloat>& likelihood);
+
+    // nnet
+    bool Read(std::vector<kaldi::BaseFloat>* nnet_prob);
+
+    bool Empty() const { return cache_.empty(); }
+
+    void SetFinished() {
+        LOG(INFO) << "set finished";
+        // std::unique_lock<std::mutex> lock(mutex_);
+        frontend_->SetFinished();
+
+        // read the last chunk data
+        Compute();
+        // ready_feed_condition_.notify_one();
+        LOG(INFO) << "compute last feats done.";
+    }
+
+    bool IsFinished() const { return frontend_->IsFinished(); }
+
+    void Reset() {
+        frontend_->Reset();
+        nnet_->Reset();
+        VLOG(3) << "feature cache reset: cache size: " << cache_.size();
+        cache_.clear();
+    }
+
+    void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
+                            float reverse_weight,
+                            std::vector<float>* rescoring_score);
+
+  private:
+    bool Compute();
+
+    std::shared_ptr<FrontendInterface> frontend_;
+    std::shared_ptr<NnetBase> nnet_;
+    SafeQueue<std::vector<kaldi::BaseFloat>> cache_;
+
+    DISALLOW_COPY_AND_ASSIGN(NnetProducer);
+};
+
+}  // namespace ppspeech
diff --git a/speechx/speechx/asr/recognizer/CMakeLists.txt b/speechx/speechx/asr/recognizer/CMakeLists.txt
index 05078873952..53e2e58db68 100644
--- a/speechx/speechx/asr/recognizer/CMakeLists.txt
+++ b/speechx/speechx/asr/recognizer/CMakeLists.txt
@@ -30,6 +30,7 @@ endif()
 if (USING_U2)
   set(TEST_BINS 
     u2_recognizer_main
+    u2_recognizer_thread_main
   )
 
   foreach(bin_name IN LISTS TEST_BINS)
diff --git a/speechx/speechx/asr/recognizer/u2_recognizer.cc b/speechx/speechx/asr/recognizer/u2_recognizer.cc
index d1d308ebd4b..ea62ae1a1f1 100644
--- a/speechx/speechx/asr/recognizer/u2_recognizer.cc
+++ b/speechx/speechx/asr/recognizer/u2_recognizer.cc
@@ -27,13 +27,13 @@ using std::vector;
 
 U2Recognizer::U2Recognizer(const U2RecognizerResource& resource)
     : opts_(resource) {
+    BaseFloat am_scale = resource.acoustic_scale;
     const FeaturePipelineOptions& feature_opts = resource.feature_pipeline_opts;
-    feature_pipeline_.reset(new FeaturePipeline(feature_opts));
-
+    std::shared_ptr<FeaturePipeline> feature_pipeline(
+        new FeaturePipeline(feature_opts));
     std::shared_ptr<NnetBase> nnet(new U2Nnet(resource.model_opts));
-
-    BaseFloat am_scale = resource.acoustic_scale;
-    decodable_.reset(new Decodable(nnet, feature_pipeline_, am_scale));
+    nnet_producer_.reset(new NnetProducer(nnet, feature_pipeline));
+    decodable_.reset(new Decodable(nnet_producer_, am_scale));
 
     CHECK_NE(resource.vocab_path, "");
     decoder_.reset(new CTCPrefixBeamSearch(
@@ -49,6 +49,7 @@ U2Recognizer::U2Recognizer(const U2RecognizerResource& resource)
 
 void U2Recognizer::Reset() {
     global_frame_offset_ = 0;
+    input_finished_ = false;
     num_frames_ = 0;
     result_.clear();
 
@@ -68,7 +69,7 @@ void U2Recognizer::ResetContinuousDecoding() {
 
 void U2Recognizer::Accept(const VectorBase<BaseFloat>& waves) {
     kaldi::Timer timer;
-    feature_pipeline_->Accept(waves);
+    nnet_producer_->Accept(waves);
     VLOG(1) << "feed waves cost: " << timer.Elapsed() << " sec. " << waves.Dim()
             << " samples.";
 }
@@ -210,7 +211,7 @@ std::string U2Recognizer::GetFinalResult() { return result_[0].sentence; }
 std::string U2Recognizer::GetPartialResult() { return result_[0].sentence; }
 
 void U2Recognizer::SetFinished() {
-    feature_pipeline_->SetFinished();
+    nnet_producer_->SetFinished();
     input_finished_ = true;
 }
 
diff --git a/speechx/speechx/asr/recognizer/u2_recognizer.h b/speechx/speechx/asr/recognizer/u2_recognizer.h
index 25850863370..855d161a045 100644
--- a/speechx/speechx/asr/recognizer/u2_recognizer.h
+++ b/speechx/speechx/asr/recognizer/u2_recognizer.h
@@ -130,11 +130,11 @@ class U2Recognizer {
         return !result_.empty() && !result_[0].sentence.empty();
     }
 
-
     int FrameShiftInMs() const {
-        // one decoder frame length in ms
-        return decodable_->Nnet()->SubsamplingRate() *
-               feature_pipeline_->FrameShift();
+        // one decoder frame length in ms, todo
+        return 1;
+        //    return decodable_->Nnet()->SubsamplingRate() *
+        //          feature_pipeline_->FrameShift();
     }
 
 
@@ -149,7 +149,7 @@ class U2Recognizer {
 
     // std::shared_ptr<U2RecognizerResource> resource_;
     // U2RecognizerResource resource_;
-    std::shared_ptr<FeaturePipeline> feature_pipeline_;
+    std::shared_ptr<NnetProducer> nnet_producer_;
     std::shared_ptr<Decodable> decodable_;
     std::unique_ptr<CTCPrefixBeamSearch> decoder_;
 
diff --git a/speechx/speechx/asr/recognizer/u2_recognizer_thread_main.cc b/speechx/speechx/asr/recognizer/u2_recognizer_thread_main.cc
new file mode 100644
index 00000000000..e73efef11c7
--- /dev/null
+++ b/speechx/speechx/asr/recognizer/u2_recognizer_thread_main.cc
@@ -0,0 +1,123 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "recognizer/u2_recognizer.h"
+#include "decoder/param.h"
+#include "kaldi/feat/wave-reader.h"
+#include "kaldi/util/table-types.h"
+
+DEFINE_string(wav_rspecifier, "", "test feature rspecifier");
+DEFINE_string(result_wspecifier, "", "test result wspecifier");
+DEFINE_double(streaming_chunk, 0.36, "streaming feature chunk size");
+DEFINE_int32(sample_rate, 16000, "sample rate");
+
+void decode_func(std::shared_ptr<ppspeech::U2Recognizer> recognizer) {
+    while (!recognizer->IsFinished()) {
+        recognizer->Decode();
+        usleep(100);
+    }
+    recognizer->Decode();
+    recognizer->Rescoring();
+}
+
+int main(int argc, char* argv[]) {
+    gflags::SetUsageMessage("Usage:");
+    gflags::ParseCommandLineFlags(&argc, &argv, false);
+    google::InitGoogleLogging(argv[0]);
+    google::InstallFailureSignalHandler();
+    FLAGS_logtostderr = 1;
+
+    int32 num_done = 0, num_err = 0;
+    double tot_wav_duration = 0.0;
+    double tot_decode_time = 0.0;
+
+    kaldi::SequentialTableReader<kaldi::WaveHolder> wav_reader(
+        FLAGS_wav_rspecifier);
+    kaldi::TokenWriter result_writer(FLAGS_result_wspecifier);
+
+    int sample_rate = FLAGS_sample_rate;
+    float streaming_chunk = FLAGS_streaming_chunk;
+    int chunk_sample_size = streaming_chunk * sample_rate;
+    LOG(INFO) << "sr: " << sample_rate;
+    LOG(INFO) << "chunk size (s): " << streaming_chunk;
+    LOG(INFO) << "chunk size (sample): " << chunk_sample_size;
+
+    ppspeech::U2RecognizerResource resource =
+        ppspeech::U2RecognizerResource::InitFromFlags();
+    std::shared_ptr<ppspeech::U2Recognizer> recognizer_ptr(
+        new ppspeech::U2Recognizer(resource));
+
+    for (; !wav_reader.Done(); wav_reader.Next()) {
+        std::thread recognizer_thread(decode_func, recognizer_ptr);
+        std::string utt = wav_reader.Key();
+        const kaldi::WaveData& wave_data = wav_reader.Value();
+        LOG(INFO) << "utt: " << utt;
+        LOG(INFO) << "wav dur: " << wave_data.Duration() << " sec.";
+        double dur = wave_data.Duration();
+        tot_wav_duration += dur;
+
+        int32 this_channel = 0;
+        kaldi::SubVector<kaldi::BaseFloat> waveform(wave_data.Data(),
+                                                    this_channel);
+        int tot_samples = waveform.Dim();
+        LOG(INFO) << "wav len (sample): " << tot_samples;
+
+        int sample_offset = 0;
+        kaldi::Timer timer;
+        kaldi::Timer local_timer;
+
+        while (sample_offset < tot_samples) {
+            int cur_chunk_size =
+                std::min(chunk_sample_size, tot_samples - sample_offset);
+
+            kaldi::Vector<kaldi::BaseFloat> wav_chunk(cur_chunk_size);
+            for (int i = 0; i < cur_chunk_size; ++i) {
+                wav_chunk(i) = waveform(sample_offset + i);
+            }
+            // wav_chunk = waveform.Range(sample_offset + i, cur_chunk_size);
+
+            recognizer_ptr->Accept(wav_chunk);
+            if (cur_chunk_size < chunk_sample_size) {
+                recognizer_ptr->SetFinished();
+            }
+
+            // no overlap
+            sample_offset += cur_chunk_size;
+        }
+        CHECK(sample_offset == tot_samples);
+
+        recognizer_thread.join();
+        std::string result = recognizer_ptr->GetFinalResult();
+        recognizer_ptr->Reset();
+        if (result.empty()) {
+            // the TokenWriter can not write empty string.
+            ++num_err;
+            LOG(INFO) << " the result of " << utt << " is empty";
+            continue;
+        }
+
+        LOG(INFO) << utt << " " << result;
+        LOG(INFO) << " RTF: " << local_timer.Elapsed() / dur << " dur: " << dur
+                  << " cost: " << local_timer.Elapsed();
+
+        result_writer.Write(utt, result);
+
+        ++num_done;
+    }
+
+    LOG(INFO) << "Done " << num_done << " out of " << (num_err + num_done);
+    LOG(INFO) << "total wav duration is: " << tot_wav_duration << " sec";
+    LOG(INFO) << "total decode cost:" << tot_decode_time << " sec";
+    LOG(INFO) << "RTF is: " << tot_decode_time / tot_wav_duration;
+}
diff --git a/speechx/speechx/asr/server/CMakeLists.txt b/speechx/speechx/asr/server/CMakeLists.txt
index 71b33daa929..566b42eefe3 100644
--- a/speechx/speechx/asr/server/CMakeLists.txt
+++ b/speechx/speechx/asr/server/CMakeLists.txt
@@ -1 +1 @@
-add_subdirectory(websocket)
+#add_subdirectory(websocket)
diff --git a/speechx/speechx/common/base/common.h b/speechx/speechx/common/base/common.h
index 97bff96620e..2a066ee68bc 100644
--- a/speechx/speechx/common/base/common.h
+++ b/speechx/speechx/common/base/common.h
@@ -48,4 +48,4 @@
 #include "base/log.h"
 #include "base/macros.h"
 #include "utils/file_utils.h"
-#include "utils/math.h"
\ No newline at end of file
+#include "utils/math.h"
diff --git a/speechx/speechx/common/base/safe_queue.h b/speechx/speechx/common/base/safe_queue.h
new file mode 100644
index 00000000000..25a012afb2d
--- /dev/null
+++ b/speechx/speechx/common/base/safe_queue.h
@@ -0,0 +1,71 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "base/common.h"
+
+namespace ppspeech {
+
+template <typename T>
+class SafeQueue {
+  public:
+    explicit SafeQueue(size_t capacity = 0);
+    void push_back(const T& in);
+    bool pop(T* out);
+    bool empty() const { return buffer_.empty(); }
+    size_t size() const { return buffer_.size(); }
+    void clear();
+
+
+  private:
+    std::mutex mutex_;
+    std::condition_variable condition_;
+    std::deque<T> buffer_;
+    size_t capacity_;
+};
+
+template <typename T>
+SafeQueue<T>::SafeQueue(size_t capacity) : capacity_(capacity) {}
+
+template <typename T>
+void SafeQueue<T>::push_back(const T& in) {
+    std::unique_lock<std::mutex> lock(mutex_);
+    if (capacity_ > 0 && buffer_.size() == capacity_) {
+        condition_.wait(lock, [this] { return capacity_ >= buffer_.size(); });
+    }
+
+    buffer_.push_back(in);
+    condition_.notify_one();
+}
+
+template <typename T>
+bool SafeQueue<T>::pop(T* out) {
+    if (buffer_.empty()) {
+        return false;
+    }
+
+    std::unique_lock<std::mutex> lock(mutex_);
+    condition_.wait(lock, [this] { return buffer_.size() > 0; });
+    *out = std::move(buffer_.front());
+    buffer_.pop_front();
+    condition_.notify_one();
+    return true;
+}
+
+template <typename T>
+void SafeQueue<T>::clear() {
+    std::unique_lock<std::mutex> lock(mutex_);
+    buffer_.clear();
+    condition_.notify_one();
+}
+}  // namespace ppspeech

From 28fc05bb0e04ea0716948ee6d09f297353e21657 Mon Sep 17 00:00:00 2001
From: YangZhou <goat.zhou@qq.com>
Date: Tue, 27 Dec 2022 16:27:54 +0800
Subject: [PATCH 6/6] do not compile websocket

---
 speechx/speechx/asr/server/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/speechx/speechx/asr/server/CMakeLists.txt b/speechx/speechx/asr/server/CMakeLists.txt
index 71b33daa929..566b42eefe3 100644
--- a/speechx/speechx/asr/server/CMakeLists.txt
+++ b/speechx/speechx/asr/server/CMakeLists.txt
@@ -1 +1 @@
-add_subdirectory(websocket)
+#add_subdirectory(websocket)