-
Notifications
You must be signed in to change notification settings - Fork 0
/
migrate-UMLS.sh
executable file
·125 lines (113 loc) · 3.04 KB
/
migrate-UMLS.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/bin/sh
set -euo pipefail
die() {
echo $@ # >&2
exit 1
}
download() {
if [ -z "$API_KEY" ]; then
die "API_KEY missing"
fi
if [ $# -ne 1 -o "${1:-}" = "-h" ]; then
die "usage: $0 download UMLSVERSION"
fi
VERSION=$0
URL="https://download.nlm.nih.gov/umls/kss/$VERSION/umls-$VERSION-full.zip"
curl -OJ "https://uts-ws.nlm.nih.gov/download?url=$URL&apiKey=$API_KEY"
unzip "umls-$VERSION-full.zip"
}
subset() {
if [ $# -ne 2 -o "${1:-}" = "-h" ]; then
die "usage: $0 subset UMLSVERSION path/to/UMLS/VERSION-full"
fi
VERSION=$1
DIR=$2
cd "$DIR"
unzip -o mmsys.zip
echo "Now create the UMLS subset using 'umls/uts-umls.prop'"
# ./run_$(host).sh
METADIR=/d1/UMLS
DESTDIR=/d1/UMLS/METASUBSET
MMSYS_HOME=/d1/UMLS/MMSYS
CLASSPATH=${MMSYS_HOME}:$MMSYS_HOME/lib/jpf-boot.jar
JAVA_HOME=$MMSYS_HOME/jre/linux
CONFIG_FILE=/d1/umls/config.properties
export METADIR
export DESTDIR
export MMSYS_HOME
export CLASSPATH
export JAVA_HOME
cd $MMSYS_HOME
$JAVA_HOME/bin/java \
-Djava.awt.headless=true \
-Djpf.boot.config=$MMSYS_HOME/etc/subset.boot.properties \
-Dlog4j.configuration=$MMSYS_HOME/etc/subset.log4j.properties \
-Dinput.uri=$METADIR \
-Doutput.uri=$DESTDIR \
-Dmmsys.config.uri=$CONFIG_FILE \
-Xms300M \
-Xmx1000M \
org.java.plugin.boot.Boot
}
GRANTDB="
grant connect to codemapper;
grant usage on schema public to codemapper;
grant select on all tables in schema public to codemapper;
"
createdb() {
echo $# -- ${1:-}
if [ $# -ne 1 -o "${1:-}" = "-h" ]; then
die "usage: $0 createdb dbname"
fi
DBNAME=$1
echo "create database '$GRANTDB';" | psql
echo "$GRANTDB" | psql "$DBNAME"
}
populate() {
if [ $# -ne 2 -o "${1:-}" = "-h" ]; then
die "usage: $0 populate dbname path/to/UMLS/VERSION-codemapper"
fi
DBNAME=$1
UMLSDIR=2
echo
echo CREATE AND FILL TABLES
sed "s|@META@|$UMLSDIR/META|" umls-tables.sql|psql umlsXXXX
echo
echo CREATE INDEXES
cat umls-indexes.sql|psql umlsXXXX
}
dumpsql() {
if [ $# -ne 2 -o "${1:-}" = "-h" ]; then
die "usage: $0 dumpsql dbname path/to/umls-version.sql.gz"
fi
DBNAME=$1
FILE=$2
pg_dump --no-owner "$DBNAME" | gzip > "$FILE"
subcommand=$1
shift 1
case "${subcommand:-}" in
download)
download "$@"
;;
subset)
subset "$@"
;;
createdb)
createdb "$@"
;;
populate)
populate "$@"
;;
dumpsql)
dumpsql "$@"
;;
*)
echo "Usage:"
echo "1. download the UMLS distribution file"
echo "2. create the CodeMapper subset of the UMLS"
echo "3. create the SQL database and grant permissions to codemapper"
echo "4. populate the SQL database with the CodeMapper subset"
echo "5. dump the SQL database for transfer to the server"
die "usage: $0 download|subset|createdb|populate|dumpsql -h"
;;
esac