From 5e90d620d4e438a9df81c7dd27d7d50192ebbcac Mon Sep 17 00:00:00 2001 From: Kai Sheng Tai Date: Thu, 17 Dec 2015 11:56:09 -0800 Subject: [PATCH] Updated link to GloVe vectors --- .gitignore | 4 ++++ fetch_and_preprocess.sh | 1 + scripts/download.py | 11 +++-------- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 2a9a0a1..048cbfc 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,7 @@ predictions trained_models *~ #*# +*.class +lib/stanford-parser +lib/stanford-tagger + diff --git a/fetch_and_preprocess.sh b/fetch_and_preprocess.sh index dd12c13..c0bfda4 100755 --- a/fetch_and_preprocess.sh +++ b/fetch_and_preprocess.sh @@ -1,4 +1,5 @@ #!/bin/bash +set -e python2.7 scripts/download.py CLASSPATH="lib:lib/stanford-parser/stanford-parser.jar:lib/stanford-parser/stanford-parser-3.5.1-models.jar" diff --git a/scripts/download.py b/scripts/download.py index a1f81fd..e33647a 100644 --- a/scripts/download.py +++ b/scripts/download.py @@ -44,6 +44,7 @@ def download(url, dirpath): return filepath def unzip(filepath): + print("Extracting: " + filepath) dirpath = os.path.dirname(filepath) with zipfile.ZipFile(filepath) as zf: zf.extractall(dirpath) @@ -83,14 +84,8 @@ def download_wordvecs(dirpath): return else: os.makedirs(dirpath) - url = 'http://www-nlp.stanford.edu/data/glove.840B.300d.txt.gz' - filepath = download(url, dirpath) - print('extracting ' + filepath) - with gzip.open(filepath, 'rb') as gf: - with open(filepath[:-3], 'w') as f: - for line in gf: - f.write(line) - os.remove(filepath) + url = 'http://www-nlp.stanford.edu/data/glove.840B.300d.zip' + unzip(download(url, dirpath)) def download_sick(dirpath): if os.path.exists(dirpath):