Skip to content

Commit 070a2b2

Browse files
author
wxu
committed
deleted : Makefile src/segmentor/customized_options.h src/segmentor/customized_segmentor.cpp src/segmentor/customized_segmentor.h src/segmentor/decode_context.h src/segmentor/featurespace.cpp src/segmentor/featurespace.h src/segmentor/otcws_customized.cpp src/segmentor/parameter.h src/segmentor/rulebase.cpp src/segmentor/rulebase.h src/segmentor/score_matrix.h src/segmentor/segmentreader.h src/segmentor/segmentwriter.h src/utils/argtable.hpp test/CMakeLists.txt test/customized_cws_cmdline.cpp test/cws_cmdline.cpp test/ltp_test.cpp test/ltp_test2.cpp test/ltp_test_xml.cpp test/multi_customized_cws_cmdline.cpp test/multi_cws_cmdline.cpp test/multi_ltp_test.cpp test/multi_pos_cmdline.cpp test/par_cmdline.cpp test/pos_cmdline.cpp
modified : CMakeLists.txt src/CMakeLists.txt src/framework/serializable.h src/segmentor/CMakeLists.txt src/segmentor/decoder.cpp src/segmentor/decoder.h src/segmentor/extractor.cpp src/segmentor/extractor.h src/segmentor/instance.h src/segmentor/model.cpp src/segmentor/model.h src/segmentor/options.h src/segmentor/otcws.cpp src/segmentor/segment_dll.cpp src/segmentor/segment_dll.h src/segmentor/segmentor.cpp src/segmentor/segmentor.h src/segmentor/settings.h src/utils/cfgparser.hpp src/utils/chartypes.hpp src/utils/chartypes.tab src/utils/codecs.hpp src/utils/hasher.hpp src/utils/logging.hpp src/utils/math/featurevec.h src/utils/math/mat.h src/utils/math/sparsevec.h src/utils/sbcdbc.hpp src/utils/sentsplit.hpp src/utils/sentsplit.tab src/utils/smartmap.hpp src/utils/stringmap.hpp src/utils/strutils.hpp src/utils/strvec.hpp src/utils/template.hpp src/utils/time.hpp src/utils/unordered_map.hpp thirdparty/CMakeLists.txt thirdparty/boost/libs/CMakeLists.txt thirdparty/boost/libs/regex/CMakeLists.txt newfile : keep_stable.txt src/framework/decoder.h src/framework/featurespace.h src/framework/frontend.h src/framework/io.h src/framework/model.h src/framework/options.h src/framework/parameter.h src/segmentor/customized_segmentor_frontend.cpp src/segmentor/customized_segmentor_frontend.h src/segmentor/io.cpp src/segmentor/io.h src/segmentor/partial_segmentation.cpp src/segmentor/partial_segmentation.h src/segmentor/preprocessor.cpp src/segmentor/preprocessor.h src/segmentor/segmentor_frontend.cpp src/segmentor/segmentor_frontend.h src/segmentor/special_tokens.h src/utils/chartypes.tab.py src/utils/math/fast_binned.h src/utils/math/fast_binned.h.py src/utils/sentsplit.tab.py src/utils/unicode.tab src/utils/unicode.tab.py src/utils/unordered_set.hpp thirdparty/boost/libs/program_options/CMakeLists.txt thirdparty/boost/libs/program_options/src/cmdline.cpp thirdparty/boost/libs/program_options/src/config_file.cpp thirdparty/boost/libs/program_options/src/convert.cpp thirdparty/boost/libs/program_options/src/options_description.cpp thirdparty/boost/libs/program_options/src/parsers.cpp thirdparty/boost/libs/program_options/src/positional_options.cpp thirdparty/boost/libs/program_options/src/split.cpp thirdparty/boost/libs/program_options/src/utf8_codecvt_facet.cpp thirdparty/boost/libs/program_options/src/value_semantic.cpp thirdparty/boost/libs/program_options/src/variables_map.cpp thirdparty/boost/libs/program_options/src/winmain.cpp
1 parent 19a80ab commit 070a2b2

File tree

105 files changed

+8411
-5983
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

105 files changed

+8411
-5983
lines changed

CMakeLists.txt

+14-14
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,29 @@
11
cmake_minimum_required (VERSION 2.8.0)
2-
project ("LTP - Chinese Word Segmentation Component.")
2+
project ("LTP - Language Technology Platform")
33

44
# project attributes section
55
# -- config cmake modules path
6-
set (CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
6+
set (CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
77

88
if (APPLE)
9-
set(CMAKE_CXX_FLAGS "-std=c++11 -stdlib=libstdc++ -Wno-error=c++11-narrowing")
9+
add_definitions(-DGTEST_HAS_TR1_TUPLE=0)
10+
set(CMAKE_CXX_FLAGS "-std=c++11 -Wno-c++11-narrowing")
1011
endif(APPLE)
1112

12-
if (MINGW)
13-
set(CMAKE_CXX_FLAGS "-std=c++11 -Wno-narrowing -fpermissive")
14-
endif (MINGW)
15-
1613
# -- config output directories
1714
set (EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/bin)
18-
set (LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/lib)
19-
set (INCLUDE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/include)
15+
set (LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/lib)
16+
set (INCLUDE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/include)
2017

2118
# -- config source directories
22-
set (SOURCE_DIR ${PROJECT_SOURCE_DIR}/src)
23-
set (THIRDPARTY_DIR ${PROJECT_SOURCE_DIR}/thirdparty)
19+
set (SOURCE_DIR ${PROJECT_SOURCE_DIR}/src)
20+
set (THIRDPARTY_DIR ${PROJECT_SOURCE_DIR}/thirdparty)
21+
set (TOOLS_DIR ${PROJECT_SOURCE_DIR}/tools)
2422

2523
# -- config resource directories
26-
set (CONFIGURE_DIR ${PROJECT_SOURCE_DIR}/conf)
24+
set (CONFIGURE_DIR ${PROJECT_SOURCE_DIR}/conf)
25+
set (MODEL_DIR ${PROJECT_SOURCE_DIR}/ltp_data)
26+
set (DATA_DIR ${PROJECT_SOURCE_DIR}/test_data)
2727

2828
# compiling section
2929
# -- compile shipped libraries
@@ -32,5 +32,5 @@ add_subdirectory (thirdparty)
3232
# -- compile source code
3333
add_subdirectory (src)
3434

35-
# compile testing
36-
add_subdirectory (test)
35+
# testing section
36+
# -- generate configure file for ltp_test

Makefile

-40
This file was deleted.

keep_stable.txt

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
keep_stable.txt
2+
README.md
3+
configure

src/CMakeLists.txt

+16-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,19 @@
1-
set (utils_DIR ${SOURCE_DIR}/utils)
2-
set (segmentor_DIR ${SOURCE_DIR}/segmentor)
1+
set (utils_DIR ${SOURCE_DIR}/utils)
2+
set (xml4nlp_DIR ${SOURCE_DIR}/xml4nlp/)
3+
set (segmentor_DIR ${SOURCE_DIR}/segmentor)
4+
set (postagger_DIR ${SOURCE_DIR}/postagger)
5+
set (parser_DIR ${SOURCE_DIR}/parser)
6+
set (parser_n_DIR ${SOURCE_DIR}/parser_n)
7+
set (splitsnt_DIR ${SOURCE_DIR}/splitsnt)
8+
set (ner_DIR ${SOURCE_DIR}/ner/)
9+
set (srl_DIR ${SOURCE_DIR}/srl/)
10+
set (ltp_DIR ${SOURCE_DIR}/ltp/)
11+
set (server_DIR ${SOURCE_DIR}/server/)
312

413
add_subdirectory ("segmentor")
514

15+
# mongoose server is not supported in windows
16+
if (UNIX)
17+
endif()
18+
19+

src/framework/decoder.h

+273
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,273 @@
1+
#ifndef __LTP_FRAMEWORK_DECODER_H__
2+
#define __LTP_FRAMEWORK_DECODER_H__
3+
4+
#include "utils/math/mat.h"
5+
#include "utils/math/sparsevec.h"
6+
#include "utils/math/featurevec.h"
7+
8+
namespace ltp {
9+
namespace framework {
10+
11+
struct ViterbiLatticeItem {
12+
ViterbiLatticeItem (const size_t& _i, const size_t& _l, const double& _score,
13+
const ViterbiLatticeItem* _prev)
14+
: i(_i), l(_l), score(_score), prev(_prev) {}
15+
16+
ViterbiLatticeItem (const size_t& _l, const double& _score)
17+
: i(0), l(_l), score(_score), prev(0) {}
18+
19+
size_t i;
20+
size_t l;
21+
double score;
22+
const ViterbiLatticeItem* prev;
23+
};
24+
25+
class ViterbiDecodeConstrain {
26+
public:
27+
virtual bool can_emit(const size_t& i, const size_t& j) const {
28+
return true;
29+
}
30+
31+
virtual bool can_tran(const size_t& i, const size_t& j) const {
32+
return true;
33+
}
34+
};
35+
36+
class ViterbiFeatureContext {
37+
public:
38+
math::SparseVec correct_features; //! the gold features.
39+
math::SparseVec predict_features; //! the predicted features.
40+
math::Mat<math::FeatureVector*> uni_features; //! the feature cache.
41+
42+
ViterbiFeatureContext() {}
43+
~ViterbiFeatureContext() { clear(); }
44+
45+
void clear() {
46+
if (uni_features.total_size() > 0) {
47+
size_t d1 = uni_features.nrows();
48+
size_t d2 = uni_features.ncols();
49+
for (size_t i = 0; i < d1; ++ i) {
50+
if (uni_features[i][0]) {
51+
uni_features[i][0]->clear();
52+
}
53+
for (size_t j = 0; j < d2; ++j) {
54+
if (uni_features[i][j]) {
55+
delete uni_features[i][j];
56+
}
57+
}
58+
}
59+
}
60+
61+
uni_features.dealloc();
62+
correct_features.zero();
63+
predict_features.zero();
64+
} // end clear
65+
};
66+
67+
class ViterbiScoreMatrix {
68+
private:
69+
math::Mat< double > emit_scores;
70+
math::Mat< double > tran_scores;
71+
public:
72+
ViterbiScoreMatrix() {}
73+
ViterbiScoreMatrix(const size_t& L, const size_t& T) {
74+
emit_scores.resize(L, T);
75+
tran_scores.resize(T, T);
76+
}
77+
78+
~ViterbiScoreMatrix() {}
79+
80+
void clear() {
81+
emit_scores.dealloc();
82+
tran_scores.dealloc();
83+
}
84+
85+
void resize(const size_t& L, const size_t& T) {
86+
emit_scores.resize(L, T);
87+
tran_scores.resize(T, T);
88+
}
89+
90+
void resize(const size_t& L, const size_t& T, const double& V) {
91+
emit_scores.resize(L, T); emit_scores = V;
92+
tran_scores.resize(T, T); tran_scores = V;
93+
}
94+
95+
size_t labels() const {
96+
return emit_scores.ncols();
97+
}
98+
99+
size_t length() const {
100+
return emit_scores.nrows();
101+
}
102+
103+
double emit(const size_t& i, const size_t& j) const {
104+
return emit_scores[i][j];
105+
}
106+
107+
double tran(const size_t& i, const size_t& j) const {
108+
return tran_scores[i][j];
109+
}
110+
111+
double safe_emit(const size_t& i, const size_t& j,
112+
const double& default_retval = 0.) const {
113+
if (i >= emit_scores.nrows() || j >= tran_scores.ncols()) {
114+
return default_retval;
115+
}
116+
return emit_scores[i][j];
117+
}
118+
119+
double safe_tran(const size_t& i, const size_t& j,
120+
const double& default_retval = 0.) const {
121+
if (i >= tran_scores.nrows() || j >= tran_scores.ncols()) {
122+
return default_retval;
123+
}
124+
return tran_scores[i][j];
125+
}
126+
127+
void set_emit(const size_t& i, const size_t& j, const double& score) {
128+
emit_scores[i][j] = score;
129+
}
130+
131+
void set_tran(const size_t& i, const size_t& j, const double& score) {
132+
tran_scores[i][j] = score;
133+
}
134+
135+
void safe_set_emit(const size_t& i, const size_t& j, const double& score) {
136+
if (i >= emit_scores.nrows() || j >= tran_scores.ncols()) {
137+
return;
138+
}
139+
emit_scores[i][j] = score;
140+
}
141+
142+
void safe_set_tran(const size_t& i, const size_t& j, const double& score) {
143+
if (i >= tran_scores.nrows() || j >= tran_scores.ncols()) {
144+
return;
145+
}
146+
tran_scores[i][j] = score;
147+
}
148+
};
149+
150+
class ViterbiDecoder {
151+
public:
152+
void decode(const ViterbiScoreMatrix& scm, std::vector<int>& output) {
153+
size_t L = scm.length();
154+
size_t T = scm.labels();
155+
156+
init_lattice(L, T);
157+
158+
for (size_t i = 0; i < L; ++ i) {
159+
for (size_t t = 0; t < T; ++ t) {
160+
if (i == 0) {
161+
ViterbiLatticeItem* item = new ViterbiLatticeItem(i, t, scm.emit(i, t), NULL);
162+
lattice_insert(lattice[i][t], item);
163+
} else {
164+
for (size_t pt = 0; pt < T; ++ pt) {
165+
const ViterbiLatticeItem* prev = lattice[i-1][pt];
166+
if (!prev) { continue; }
167+
168+
double s = scm.emit(i, t) + scm.tran(pt, t) + prev->score;
169+
ViterbiLatticeItem* item = new ViterbiLatticeItem(i, t, s, prev);
170+
lattice_insert(lattice[i][t], item);
171+
}
172+
}
173+
}
174+
}
175+
176+
get_result(L-1, output);
177+
free_lattice();
178+
}
179+
180+
void decode(const ViterbiScoreMatrix& scm,
181+
const ViterbiDecodeConstrain& con,
182+
std::vector<int>& output) {
183+
size_t L = scm.length();
184+
size_t T = scm.labels();
185+
186+
init_lattice(L, T);
187+
188+
for (size_t i = 0; i < L; ++ i) {
189+
for (size_t t = 0; t < T; ++ t) {
190+
if (!con.can_emit(i, t)) { continue; }
191+
192+
if (i == 0) {
193+
ViterbiLatticeItem* item = new ViterbiLatticeItem(i, t, scm.emit(i, t), NULL);
194+
lattice_insert(lattice[i][t], item);
195+
} else {
196+
for (size_t pt = 0; pt < T; ++ pt) {
197+
if (!con.can_emit(i-1, pt) || !con.can_tran(pt, t)) { continue; }
198+
199+
const ViterbiLatticeItem* prev = lattice[i-1][pt];
200+
if (!prev) { continue; }
201+
202+
double s = scm.emit(i, t) + scm.tran(pt, t) + prev->score;
203+
ViterbiLatticeItem* item = new ViterbiLatticeItem(i, t, s, prev);
204+
lattice_insert(lattice[i][t], item);
205+
}
206+
}
207+
}
208+
}
209+
get_result(L-1, output);
210+
free_lattice();
211+
}
212+
protected:
213+
void init_lattice(const size_t& L, const size_t& T) {
214+
lattice.resize(L, T);
215+
lattice = NULL;
216+
}
217+
218+
void get_result(std::vector<int>& output) {
219+
size_t L = lattice.nrows();
220+
get_result(L- 1, output);
221+
}
222+
223+
void get_result(const size_t& p, std::vector<int>& output) {
224+
size_t T = lattice.ncols();
225+
226+
const ViterbiLatticeItem* best = NULL;
227+
for (size_t t = 0; t < T; ++ t) {
228+
if (!lattice[p][t]) {
229+
continue;
230+
}
231+
232+
if (best == NULL || lattice[p][t]->score > best->score) {
233+
best = lattice[p][t];
234+
}
235+
}
236+
237+
output.resize(p+1);
238+
while (best) {
239+
output[best->i] = best->l;
240+
best = best->prev;
241+
}
242+
}
243+
244+
void free_lattice() {
245+
size_t L = lattice.total_size();
246+
const ViterbiLatticeItem ** p = lattice.c_buf();
247+
for (size_t i = 0; i < L; ++ i) {
248+
if (p[i]) {
249+
delete p[i];
250+
p[i] = 0;
251+
}
252+
}
253+
}
254+
255+
void lattice_insert(const ViterbiLatticeItem* &position,
256+
const ViterbiLatticeItem * const item) {
257+
if (position == NULL) {
258+
position = item;
259+
} else if (position->score < item->score) {
260+
delete position;
261+
position = item;
262+
} else {
263+
delete item;
264+
}
265+
}
266+
267+
math::Mat< const ViterbiLatticeItem * > lattice;
268+
};
269+
270+
} // namespace framework
271+
} // namespace ltp
272+
273+
#endif // end for __LTP_FRAMEWORK_DECODER_H__

0 commit comments

Comments
 (0)