forked from opencog/relex
-
Notifications
You must be signed in to change notification settings - Fork 0
/
multi-page.sh
executable file
·68 lines (55 loc) · 1.42 KB
/
multi-page.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/bin/bash
#
# multi-page.sh: example of parsing multiple texts at once.
# This is a kludgy utility for parsing several files in parallel
#
# the files to be parsed
filelist=../wiki/simplewiki-20080629-stripped/*
# the maximum number of concurrent parses.
# (set this to the number of CPU cores on your system).
maxjobs=4;
export LANG=en_US.UTF-8
VM_OPTS="-Xmx1024m"
RELEX_OPTS="\
-Drelex.algpath=data/relex-semantic.algs \
-Dwordnet.configfile=data/wordnet/file_properties.xml \
-Djava.library.path=../../lib \
-Dgate.home=../../share/java \
-Dgate.plugins.home=../../share/java \
-Dgate.site.config=../../share/java \
"
CLASSPATH="-classpath \
bin:\
../../share/java/opennlp-tools-1.3.0.jar:\
../../share/java/maxent-2.4.0.jar:\
../../share/java/trove.jar:\
../../share/java/jwnl.jar:\
../../share/java/commons-logging.jar:\
../../share/java/gnu-getopt.jar:\
../../share/java/link-grammar-4.4.2.jar:\
"
function parseit {
fn="`basename "$1"`";
in="$1"
url="http://simple.wikipedia.org/wiki/$fn"
out="../wiki/parsed/$fn.xml"
err="../wiki/err/err-$fn"
# echo $in $url $out $err
echo $url
cat "$in" | nice java $VM_OPTS $RELEX_OPTS $CLASSPATH relex.WebFormat -g -n 4 \
--url "$url" > "$out" 2>"$err" &
}
jobsrunning=0;
echo $jobsrunning
for filename in $filelist;
do
if [ $jobsrunning -lt $maxjobs ] ;
then
# sleep 2 &
let jobsrunning=jobsrunning+1
parseit "$filename"
else
wait
let jobsrunning=0
fi
done