-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathdoc.h
51 lines (37 loc) · 986 Bytes
/
doc.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#ifndef _DOC_H
#define _DOC_H
#include <string>
#include <vector>
#include <list>
#include <cassert>
#include <sstream>
#include "biterm.h"
using namespace std;
class Doc {
private:
vector<int> ws; // word sequence
public:
Doc(const string& s) {read_doc(s);}
Doc(const vector<int>& ws):ws(ws) { }
int size() const {return ws.size();}
const vector<int>& get_ws() const {return ws;}
const int get_w(int i) const {
assert(i < ws.size());
return ws[i];
}
void gen_biterms(vector<Biterm>& bs) const {
if (ws.size() < 2) return;
//double weight = double(1) / (ws.size()-1); // for weighted BTM in our journal paper
double weight = 1; // for original BTM in our WWW2013 paper
for (int i = 0; i < ws.size()-1; ++i)
for (int j = i+1; j < ws.size(); ++j)
bs.push_back( Biterm(ws[i], ws[j], weight) );
}
private:
void read_doc(const string& s) {
istringstream iss(s);
int w;
while (iss >> w) ws.push_back(w);
}
};
#endif