Skip to content

Commit 0c2b166

Browse files
committed
Updated bulk
1 parent 51683b9 commit 0c2b166

File tree

5 files changed

+495
-11
lines changed

5 files changed

+495
-11
lines changed

.gitignore

+4
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,15 @@ dynet-cpp/rnnlm-seq
77
dynet-cpp/treenn
88
dynet-cpp/treenn-bulk
99
dynet-cpp/bilstm-tagger
10+
dynet-cpp/bilstm-tagger-bulk
1011
dynet-cpp/bilstm-tagger-withchar
12+
dynet-cpp/bilstm-tagger-withchar-bulk
1113
dynet-cpp/rnnlm-batch-gpu
1214
dynet-cpp/rnnlm-seq-gpu
1315
dynet-cpp/treenn-gpu
1416
dynet-cpp/treenn-bulk-gpu
1517
dynet-cpp/bilstm-tagger-gpu
18+
dynet-cpp/bilstm-tagger-bulk-gpu
1619
dynet-cpp/bilstm-tagger-withchar-gpu
20+
dynet-cpp/bilstm-tagger-withchar-bulk-gpu
1721
dynet-benchmark-results*.tar.gz

dynet-cpp/Makefile

+15-3
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@ else
1616
CXX_FLAGS=-std=c++11 -I${EIGEN_PATH} -I${DYNET_PATH} -L${DYNET_PATH}/build/dynet -DBOOST_REGEX -lboost_regex -Ofast
1717
endif
1818

19-
all: rnnlm-batch treenn treenn-bulk bilstm-tagger bilstm-tagger-withchar
19+
all: rnnlm-batch treenn treenn-bulk bilstm-tagger bilstm-tagger-bulk bilstm-tagger-withchar bilstm-tagger-withchar-bulk
2020

21-
gpu: rnnlm-batch-gpu treenn-gpu treenn-bulk-gpu bilstm-tagger-gpu bilstm-tagger-withchar-gpu
21+
gpu: rnnlm-batch-gpu treenn-gpu treenn-bulk-gpu bilstm-tagger-gpu bilstm-tagger-bulk-gpu bilstm-tagger-withchar-gpu bilstm-tagger-withchar-bulk
2222

2323
clean:
24-
rm -f rnnlm-batch treenn treenn-bulk bilstm-tagger bilstm-tagger-withchar rnnlm-batch-gpu treenn-gpu treenn-bulk-gpu bilstm-tagger-gpu bilstm-tagger-withchar-gpu
24+
rm -f rnnlm-batch treenn treenn-bulk bilstm-tagger bilstm-tagger-bulk bilstm-tagger-withchar bilstm-tagger-withchar-bulk rnnlm-batch-gpu treenn-gpu treenn-bulk-gpu bilstm-tagger-gpu bilstm-tagger-bulk-gpu bilstm-tagger-withchar-gpu bilstm-tagger-withchar-bulk-gpu
2525

2626
rnnlm-batch: rnnlm-batch.cc
2727
${CC} -o rnnlm-batch rnnlm-batch.cc ${CXX_FLAGS} ${DYNET_LIB_CPU}
@@ -38,9 +38,15 @@ treenn-bulk: treenn-bulk.cc
3838
bilstm-tagger: bilstm-tagger.cc
3939
${CC} -o bilstm-tagger bilstm-tagger.cc ${CXX_FLAGS} ${DYNET_LIB_CPU}
4040

41+
bilstm-tagger-bulk: bilstm-tagger-bulk.cc
42+
${CC} -o bilstm-tagger-bulk bilstm-tagger-bulk.cc ${CXX_FLAGS} ${DYNET_LIB_CPU}
43+
4144
bilstm-tagger-withchar: bilstm-tagger-withchar.cc
4245
${CC} -o bilstm-tagger-withchar bilstm-tagger-withchar.cc ${CXX_FLAGS} ${DYNET_LIB_CPU}
4346

47+
bilstm-tagger-withchar-bulk: bilstm-tagger-withchar-bulk.cc
48+
${CC} -o bilstm-tagger-withchar-bulk bilstm-tagger-withchar-bulk.cc ${CXX_FLAGS} ${DYNET_LIB_CPU}
49+
4450
rnnlm-batch-gpu: rnnlm-batch.cc
4551
${CC} -o rnnlm-batch-gpu rnnlm-batch.cc ${CXX_FLAGS} ${DYNET_LIB_GPU}
4652

@@ -56,5 +62,11 @@ treenn-bulk-gpu: treenn-bulk.cc
5662
bilstm-tagger-gpu: bilstm-tagger.cc
5763
${CC} -o bilstm-tagger-gpu bilstm-tagger.cc ${CXX_FLAGS} ${DYNET_LIB_GPU}
5864

65+
bilstm-tagger-bulk-gpu: bilstm-tagger-bulk.cc
66+
${CC} -o bilstm-tagger-bulk-gpu bilstm-tagger-bulk.cc ${CXX_FLAGS} ${DYNET_LIB_GPU}
67+
5968
bilstm-tagger-withchar-gpu: bilstm-tagger-withchar.cc
6069
${CC} -o bilstm-tagger-withchar-gpu bilstm-tagger-withchar.cc ${CXX_FLAGS} ${DYNET_LIB_GPU}
70+
71+
bilstm-tagger-withchar-bulk-gpu: bilstm-tagger-withchar-bulk.cc
72+
${CC} -o bilstm-tagger-withchar-bulk-gpu bilstm-tagger-withchar-bulk.cc ${CXX_FLAGS} ${DYNET_LIB_GPU}

dynet-cpp/bilstm-tagger-bulk.cc

+221
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
#include <vector>
2+
#include <stdexcept>
3+
#include <fstream>
4+
#include <chrono>
5+
#ifdef BOOST_REGEX
6+
#include <boost/regex.hpp>
7+
using namespace boost;
8+
#else
9+
#include <regex>
10+
#endif
11+
12+
#include <dynet/training.h>
13+
#include <dynet/expr.h>
14+
#include <dynet/dict.h>
15+
#include <dynet/lstm.h>
16+
17+
using namespace std;
18+
using namespace std::chrono;
19+
using namespace dynet;
20+
using namespace dynet::expr;
21+
22+
// Read a file where each line is of the form "word1|tag1 word2|tag2 ..."
23+
// Yields pairs of lists of the form < [word1, word2, ...], [tag1, tag2, ...] >
24+
vector<pair<vector<string>, vector<string> > > read(const string & fname) {
25+
ifstream fh(fname);
26+
if(!fh) throw std::runtime_error("Could not open file");
27+
string str;
28+
regex re("[ |]");
29+
vector<pair<vector<string>, vector<string> > > sents;
30+
while(getline(fh, str)) {
31+
pair<vector<string>,vector<string> > word_tags;
32+
sregex_token_iterator first{str.begin(), str.end(), re, -1}, last;
33+
while(first != last) {
34+
word_tags.first.push_back(*first++);
35+
assert(first != last);
36+
word_tags.second.push_back(*first++);
37+
}
38+
sents.push_back(word_tags);
39+
}
40+
return sents;
41+
}
42+
43+
class BiLSTMTagger {
44+
public:
45+
46+
BiLSTMTagger(unsigned layers, unsigned wembed_dim, unsigned hidden_dim, unsigned mlp_dim, Model & model, Dict & wv, Dict & tv, unordered_map<string,int> & wc)
47+
: wv(wv), tv(tv), wc(wc) {
48+
unsigned nwords = wv.size();
49+
unsigned ntags = tv.size();
50+
word_lookup = model.add_lookup_parameters(nwords, {wembed_dim});
51+
52+
// MLP on top of biLSTM outputs 100 -> 32 -> ntags
53+
pH = model.add_parameters({mlp_dim, hidden_dim*2});
54+
pO = model.add_parameters({ntags, mlp_dim});
55+
56+
// word-level LSTMs
57+
fwdRNN = VanillaLSTMBuilder(layers, wembed_dim, hidden_dim, model); // layers, in-dim, out-dim, model
58+
bwdRNN = VanillaLSTMBuilder(layers, wembed_dim, hidden_dim, model);
59+
}
60+
61+
Dict &wv, &tv;
62+
unordered_map<string,int> & wc;
63+
LookupParameter word_lookup;
64+
Parameter pH, pO;
65+
VanillaLSTMBuilder fwdRNN, bwdRNN;
66+
67+
// Do word representation
68+
Expression word_rep(ComputationGraph & cg, const string & w) {
69+
return lookup(cg, word_lookup, wv.convert(wc[w] > 5 ? w : "<unk>"));
70+
}
71+
72+
vector<Expression> build_tagging_graph(ComputationGraph & cg, const vector<string> & words) {
73+
// parameters -> expressions
74+
Expression H = parameter(cg, pH);
75+
Expression O = parameter(cg, pO);
76+
77+
// initialize the RNNs
78+
fwdRNN.new_graph(cg);
79+
bwdRNN.new_graph(cg);
80+
81+
// get the word vectors. word_rep(...) returns a 128-dim vector expression for each word.
82+
vector<Expression> wembs(words.size()), fwds(words.size()), bwds(words.size()), fbwds(words.size());
83+
for(size_t i = 0; i < words.size(); ++i)
84+
wembs[i] = word_rep(cg, words[i]);
85+
86+
// feed word vectors into biLSTM
87+
fwdRNN.start_new_sequence();
88+
for(size_t i = 0; i < wembs.size(); ++i)
89+
fwds[i] = fwdRNN.add_input(wembs[i]);
90+
bwdRNN.start_new_sequence();
91+
for(size_t i = wembs.size(); i > 0; --i)
92+
bwds[i-1] = bwdRNN.add_input(wembs[i-1]);
93+
94+
// Concatenate and MLP
95+
for(size_t i = 0; i < wembs.size(); ++i)
96+
fbwds[i] = O * tanh( H * concatenate({fwds[i], bwds[i]}) );
97+
98+
return fbwds;
99+
}
100+
101+
Expression sent_loss(ComputationGraph & cg, vector<string> & words, vector<string> & tags) {
102+
vector<Expression> exprs = build_tagging_graph(cg, words), errs(words.size());
103+
for(size_t i = 0; i < tags.size(); ++i)
104+
errs[i] = pickneglogsoftmax(exprs[i], tv.convert(tags[i]));
105+
return sum(errs);
106+
}
107+
108+
vector<string> tag_sent(vector<string> & words) {
109+
ComputationGraph cg;
110+
vector<Expression> exprs = build_tagging_graph(cg, words), errs(words.size());
111+
vector<string> tags(words.size());
112+
for(size_t i = 0; i < words.size(); ++i) {
113+
vector<float> scores = as_vector(exprs[i].value());
114+
size_t max_id = distance(scores.begin(), max_element(scores.begin(), scores.end()));
115+
tags[i] = tv.convert(max_id);
116+
}
117+
return tags;
118+
}
119+
120+
};
121+
122+
int main(int argc, char**argv) {
123+
124+
time_point<system_clock> start = system_clock::now();
125+
126+
vector<pair<vector<string>, vector<string> > > train = read("../data/tags/train.txt");
127+
vector<pair<vector<string>, vector<string> > > dev = read("../data/tags/dev.txt");
128+
Dict word_voc, tag_voc;
129+
unordered_map<string, int> word_cnt;
130+
for(auto & sent : train) {
131+
for(auto & w : sent.first) {
132+
word_voc.convert(w);
133+
word_cnt[w]++;
134+
}
135+
for(auto & t : sent.second)
136+
tag_voc.convert(t);
137+
}
138+
tag_voc.freeze();
139+
word_voc.convert("<unk>"); word_voc.freeze(); word_voc.set_unk("<unk>");
140+
141+
// DyNet Starts
142+
dynet::initialize(argc, argv);
143+
Model model;
144+
AdamTrainer trainer(model);
145+
trainer.clipping_enabled = false;
146+
147+
if(argc != 8) {
148+
cerr << "Usage: " << argv[0] << " WEMBED_SIZE HIDDEN_SIZE MLP_SIZE SPARSE BATCH_SIZE LAST_STEP TIMEOUT" << endl;
149+
return 1;
150+
}
151+
int WEMBED_SIZE = atoi(argv[1]);
152+
int HIDDEN_SIZE = atoi(argv[2]);
153+
int MLP_SIZE = atoi(argv[3]);
154+
trainer.sparse_updates_enabled = atoi(argv[4]);
155+
int BATCH_SIZE = atoi(argv[5]);
156+
int LAST_STEP = atoi(argv[6]);
157+
int TIMEOUT = atoi(argv[7]);
158+
159+
// Initilaize the tagger
160+
BiLSTMTagger tagger(1, WEMBED_SIZE, HIDDEN_SIZE, MLP_SIZE, model, word_voc, tag_voc, word_cnt);
161+
162+
{
163+
duration<float> fs = (system_clock::now() - start);
164+
float startup_time = duration_cast<milliseconds>(fs).count() / float(1000);
165+
cout << "startup time: " << startup_time << endl;
166+
}
167+
168+
// Do training
169+
shuffle(train.begin(), train.end(), *dynet::rndeng);
170+
start = system_clock::now();
171+
int i = 0, all_tagged = 0, this_words = 0;
172+
float this_loss = 0.f, all_time = 0.f;
173+
unsigned batch = BATCH_SIZE;
174+
for(int iter = 0; iter < 100; iter++) {
175+
for(size_t id1 = 0; id1 <= train.size()-batch; id1 += batch) {
176+
i += batch;
177+
if(i % 500 == 0) {
178+
trainer.status();
179+
cout << this_loss/this_words << endl;
180+
all_tagged += this_words;
181+
this_loss = 0.f;
182+
this_words = 0;
183+
}
184+
if(i % 5000 == 0) {
185+
duration<float> fs = (system_clock::now() - start);
186+
all_time += duration_cast<milliseconds>(fs).count() / float(1000);
187+
int dev_words = 0, dev_good = 0;
188+
float dev_loss = 0;
189+
for(auto & sent : dev) {
190+
vector<string> tags = tagger.tag_sent(sent.first);
191+
for(size_t j = 0; j < tags.size(); ++j)
192+
if(tags[j] == sent.second[j])
193+
dev_good++;
194+
dev_words += sent.second.size();
195+
}
196+
cout << "acc=" << dev_good/float(dev_words) << ", time=" << all_time << ", word_per_sec=" << all_tagged/all_time << endl;
197+
if(all_time > TIMEOUT)
198+
exit(0);
199+
start = system_clock::now();
200+
}
201+
202+
ComputationGraph cg;
203+
vector<Expression> losses;
204+
for(size_t id2 = 0; id2 < batch; ++id2) {
205+
auto & s = train[id1+id2];
206+
losses.push_back(tagger.sent_loss(cg, s.first, s.second));
207+
this_words += s.first.size();
208+
}
209+
Expression loss_exp = sum(losses);
210+
float my_loss = as_scalar(cg.forward(loss_exp));
211+
this_loss += my_loss;
212+
if(LAST_STEP > 0) {
213+
cg.backward(loss_exp);
214+
if(LAST_STEP > 1)
215+
trainer.update();
216+
}
217+
}
218+
trainer.update_epoch(1.0);
219+
}
220+
return 0;
221+
}

0 commit comments

Comments
 (0)