-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathlocalrc.sample
29 lines (28 loc) · 1.11 KB
/
localrc.sample
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# constants
timestamp=`date +%Y%m%d%H%M%S`
# variables
## Use text data from <file> to train the model
FILEIN=corpus/insuranceqa/w2v/iqa.w2v.train
## Set size of word vectors; default is 100
EMBEDDING_DIM=100
## Set max skip length between words; default is 5
WINDOW=5
## Set threshold for occurrence of words. Those that appear with higher frequency in the training data
## will be randomly down-sampled; default is 1e-3, useful range is (0, 1e-5)
SAMPLE=1e-5
## Number of negative examples; default is 5, common values are 3 - 10 (0 = not used)
NEGATIVE=1
## Use Hierarchical Softmax; default is 0 (not used)
HS=0
## Use the continuous bag of words model; default is 1 (use 0 for skip-gram model)
CBOW=0
## Run more training iterations (default 5)
ITER=30
## Save the resulting vectors in binary moded; default is 0 (off)
BINARY=1
## This will discard words that appear less than <int> times; default is 5
MINCOUNT=5
## Use <int> threads
THREADS=30
## Use <file> to save the resulting word vectors / word clusters
FILEOUT=tmp/iqa.w2v.$timestamp.bin$BINARY.neg$NEGATIVE.cbow$CBOW.win$WINDOW.iter$ITER.embed$EMBEDDING_DIM.thr$THREADS