From 42be01dabf568e158bba20a4871e03795b1fc82e Mon Sep 17 00:00:00 2001
From: Alexander Chalk <contact@alexchalk.net>
Date: Wed, 19 Jun 2019 08:21:42 -0400
Subject: [PATCH 01/11] Add fastText to CNN text classification examples

---
 .../examples/cnn-text-classification/README.md      | 13 +++++++++++++
 .../src/cnn_text_classification/data_helper.clj     |  8 ++++++++
 2 files changed, 21 insertions(+)

diff --git a/contrib/clojure-package/examples/cnn-text-classification/README.md b/contrib/clojure-package/examples/cnn-text-classification/README.md
index f2ed939bee16..191bf6fe029e 100644
--- a/contrib/clojure-package/examples/cnn-text-classification/README.md
+++ b/contrib/clojure-package/examples/cnn-text-classification/README.md
@@ -49,6 +49,19 @@ and then run
 - `lein uberjar`
 - `java -Xms1024m -Xmx2048m -jar target/cnn-text-classification-0.1.0-SNAPSHOT-standalone.jar`
 
+## Usage with fastText
+
+Using fastText instead of glove is fairly straightforward, as the pretrained embedding format is very similar.
+
+Download the wiki news 300d 1M pre-trained word vectors from the fastText [site](https://fasttext.cc/docs/en/english-vectors.html).
+
+Unzip the word vectors and place them in the `data/fastText` directory.
+
+Then you can run training on a subset of examples through the repl using:
+```
+(train-convnet {:embedding-size 300 :batch-size 100 :test-size 100 :num-epoch 10 :max-examples 1000 :pretrained-embedding :fastText})
+```
+
 ## Usage with word2vec
 
 You can also use word2vec embeddings in order to train the text classification model.
diff --git a/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/data_helper.clj b/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/data_helper.clj
index 82ba13087a37..4c5a3d84699d 100644
--- a/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/data_helper.clj
+++ b/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/data_helper.clj
@@ -33,6 +33,8 @@
   [embedding-size]
   (format "data/glove/glove.6B.%dd.txt" embedding-size))
 
+(def fasttext-file-path "data/fastText/wiki-news-300d-1M.vec")
+
 (defn r-string
   "Reads a string from the given DataInputStream `dis` until a space or newline is reached."
   [dis]
@@ -100,6 +102,10 @@
   (println "Loading the glove pre-trained word embeddings from " glove-file-path)
   (into {} (read-text-embedding-pairs (io/reader glove-file-path))))
 
+(defn load-fasttext [fasttext-file-path]
+  (println "Loading the fastText pre-trained word embeddings from " fasttext-file-path)
+  (into {} (read-text-embedding-pairs (io/reader fasttext-file-path))))
+
 (defn clean-str [s]
   (-> s
       (string/replace #"^A-Za-z0-9(),!?'`]" " ")
@@ -190,6 +196,8 @@
         vocab-embeddings (case pretrained-embedding
                            :glove (->> (load-glove (glove-file-path embedding-size))
                                        (build-vocab-embeddings vocab embedding-size))
+                           :fastText (->> (load-fasttext fasttext-file-path)
+                                          (build-vocab-embeddings vocab embedding-size))
                            :word2vec (->> (load-word2vec-model w2v-file-path embedding-size {:vocab vocab})
                                           (:word2vec)
                                           (build-vocab-embeddings vocab embedding-size))

From e4beba8ca2322ec2757d17a81de601f2d8fe6c73 Mon Sep 17 00:00:00 2001
From: Alexander Chalk <contact@alexchalk.net>
Date: Sun, 23 Jun 2019 12:30:37 -0400
Subject: [PATCH 02/11] Update repl running instructions

---
 .../examples/cnn-text-classification/README.md           | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/contrib/clojure-package/examples/cnn-text-classification/README.md b/contrib/clojure-package/examples/cnn-text-classification/README.md
index 191bf6fe029e..ac0b2d579b44 100644
--- a/contrib/clojure-package/examples/cnn-text-classification/README.md
+++ b/contrib/clojure-package/examples/cnn-text-classification/README.md
@@ -29,8 +29,7 @@ You also must download the glove word embeddings. The suggested one to use is th
 ## Usage
 
 You can run through the repl with
-`(train-convnet {:embedding-size 50 :batch-size 100 :test-size 100 :num-epoch 10 :max-examples 1000 :pretrained-embedding :glove})`
-
+`(train-convnet {:devs [(context/cpu 0)] :embedding-size 50 :batch-size 100 :test-size 100 :num-epoch 10 :max-examples 1000 :pretrained-embedding :glove})`
 or
 `JVM_OPTS="-Xmx1g" lein run` (cpu)
 
@@ -59,7 +58,7 @@ Unzip the word vectors and place them in the `data/fastText` directory.
 
 Then you can run training on a subset of examples through the repl using:
 ```
-(train-convnet {:embedding-size 300 :batch-size 100 :test-size 100 :num-epoch 10 :max-examples 1000 :pretrained-embedding :fastText})
+(train-convnet {:devs [(context/cpu 0)] :embedding-size 300 :batch-size 100 :test-size 100 :num-epoch 10 :max-examples 1000 :pretrained-embedding :fastText})
 ```
 
 ## Usage with word2vec
@@ -71,7 +70,7 @@ you'll need to unzip them and place them in the `contrib/clojure-package/data` d
 
 Then you can run training on a subset of examples through the repl using:
 ```
-(train-convnet {:embedding-size 300 :batch-size 100 :test-size 100 :num-epoch 10 :max-examples 1000 :pretrained-embedding :word2vec})
+(train-convnet {:devs [(context/cpu 0)] :embedding-size 300 :batch-size 100 :test-size 100 :num-epoch 10 :max-examples 1000 :pretrained-embedding :word2vec})
 ```
 Note that loading word2vec embeddings consumes memory and takes some time.
 
@@ -79,7 +78,7 @@ You can also train them using `JVM_OPTS="-Xmx8g" lein run` once you've modified
 the parameters to `train-convnet` (see above) in `src/cnn_text_classification/classifier.clj`.
 In order to run training with word2vec on the complete data set, you will need to run:
 ```
-(train-convnet {:embedding-size 300 :batch-size 100 :test-size 1000 :num-epoch 10 :pretrained-embedding :word2vec})
+(train-convnet {:devs [(context/cpu 0)] :embedding-size 300 :batch-size 100 :test-size 1000 :num-epoch 10 :pretrained-embedding :word2vec})
 ```
 You should be able to achieve an accuracy of `~0.78` using the parameters above.
 

From 5f421520a77ff4c26473d78b5f8328b5f690867f Mon Sep 17 00:00:00 2001
From: Alexander Chalk <contact@alexchalk.net>
Date: Sun, 23 Jun 2019 19:45:41 -0400
Subject: [PATCH 03/11] Complete solution with OOM workaround

---
 .../examples/cnn-text-classification/README.md   |  3 +++
 .../src/cnn_text_classification/data_helper.clj  | 16 +++++++++-------
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/contrib/clojure-package/examples/cnn-text-classification/README.md b/contrib/clojure-package/examples/cnn-text-classification/README.md
index ac0b2d579b44..4d97f69fd183 100644
--- a/contrib/clojure-package/examples/cnn-text-classification/README.md
+++ b/contrib/clojure-package/examples/cnn-text-classification/README.md
@@ -56,6 +56,9 @@ Download the wiki news 300d 1M pre-trained word vectors from the fastText [site]
 
 Unzip the word vectors and place them in the `data/fastText` directory.
 
+To prevent OOM errors, we will not use all the trained embeddings; instead we'll take
+the first 10%: `head -n 100000 wiki-news-300d-1M.vec > wiki-news-300d-100K.vec`.
+
 Then you can run training on a subset of examples through the repl using:
 ```
 (train-convnet {:devs [(context/cpu 0)] :embedding-size 300 :batch-size 100 :test-size 100 :num-epoch 10 :max-examples 1000 :pretrained-embedding :fastText})
diff --git a/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/data_helper.clj b/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/data_helper.clj
index 4c5a3d84699d..b0dcf9cdc7c7 100644
--- a/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/data_helper.clj
+++ b/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/data_helper.clj
@@ -33,7 +33,7 @@
   [embedding-size]
   (format "data/glove/glove.6B.%dd.txt" embedding-size))
 
-(def fasttext-file-path "data/fastText/wiki-news-300d-1M.vec")
+(def fasttext-file-path "data/fastText/wiki-news-300d-100K.vec")
 
 (defn r-string
   "Reads a string from the given DataInputStream `dis` until a space or newline is reached."
@@ -77,8 +77,8 @@
            _  (println "Processing with " {:dim dim :word-size word-size} " loading max vectors " max-vectors)
            _ (if (not= embedding-size dim)
                (throw (ex-info "Mismatch in embedding size"
-                      {:input-embedding-size embedding-size
-                       :word2vec-embedding-size dim})))
+                       {:input-embedding-size embedding-size
+                        :word2vec-embedding-size dim})))
            vectors (load-w2v-vectors dis dim max-vectors)
            word2vec (if vocab
                       (->> vectors
@@ -92,19 +92,21 @@
   ([path embedding-size]
    (load-word2vec-model path embedding-size {:max-vectors 100})))
 
-(defn read-text-embedding-pairs [rdr]
-  (for [^String line (line-seq rdr)
+(defn read-text-embedding-pairs [pairs]
+  (for [^String line pairs
         :let [fields (.split line " ")]]
     [(aget fields 0)
      (mapv #(Float/parseFloat ^String %) (rest fields))]))
 
 (defn load-glove [glove-file-path]
   (println "Loading the glove pre-trained word embeddings from " glove-file-path)
-  (into {} (read-text-embedding-pairs (io/reader glove-file-path))))
+  (into {} (read-text-embedding-pairs (line-seq (io/reader glove-file-path)))))
+
+(def remove-fasttext-metadata rest)
 
 (defn load-fasttext [fasttext-file-path]
   (println "Loading the fastText pre-trained word embeddings from " fasttext-file-path)
-  (into {} (read-text-embedding-pairs (io/reader fasttext-file-path))))
+  (into {} (read-text-embedding-pairs (remove-fasttext-metadata (line-seq (io/reader fasttext-file-path))))))
 
 (defn clean-str [s]
   (-> s

From add3ce9d6ec6dbae2e239af0e14793be7370f179 Mon Sep 17 00:00:00 2001
From: Alexander Chalk <contact@alexchalk.net>
Date: Mon, 24 Jun 2019 14:53:53 -0400
Subject: [PATCH 04/11] Complete solution with smaller fastText dataset

---
 .../examples/cnn-text-classification/README.md           | 9 +++------
 .../src/cnn_text_classification/data_helper.clj          | 2 +-
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/contrib/clojure-package/examples/cnn-text-classification/README.md b/contrib/clojure-package/examples/cnn-text-classification/README.md
index 4d97f69fd183..8b754eb70faa 100644
--- a/contrib/clojure-package/examples/cnn-text-classification/README.md
+++ b/contrib/clojure-package/examples/cnn-text-classification/README.md
@@ -52,12 +52,9 @@ and then run
 
 Using fastText instead of glove is fairly straightforward, as the pretrained embedding format is very similar.
 
-Download the wiki news 300d 1M pre-trained word vectors from the fastText [site](https://fasttext.cc/docs/en/english-vectors.html).
-
-Unzip the word vectors and place them in the `data/fastText` directory.
-
-To prevent OOM errors, we will not use all the trained embeddings; instead we'll take
-the first 10%: `head -n 100000 wiki-news-300d-1M.vec > wiki-news-300d-100K.vec`.
+Download the 'Simple English' pretrained wiki word vectors (text) from the fastText
+[site](https://fasttext.cc/docs/en/pretrained-vectors.html) and place them in the
+`data/fastText` directory.
 
 Then you can run training on a subset of examples through the repl using:
 ```
diff --git a/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/data_helper.clj b/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/data_helper.clj
index b0dcf9cdc7c7..4ef61fcddb40 100644
--- a/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/data_helper.clj
+++ b/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/data_helper.clj
@@ -33,7 +33,7 @@
   [embedding-size]
   (format "data/glove/glove.6B.%dd.txt" embedding-size))
 
-(def fasttext-file-path "data/fastText/wiki-news-300d-100K.vec")
+(def fasttext-file-path "data/fastText/wiki.simple.vec")
 
 (defn r-string
   "Reads a string from the given DataInputStream `dis` until a space or newline is reached."

From a8d3bf318514db4c82e293cd919494679b523445 Mon Sep 17 00:00:00 2001
From: Alexander Chalk <contact@alexchalk.net>
Date: Mon, 24 Jun 2019 15:04:56 -0400
Subject: [PATCH 05/11] Add approx validation accuracy to readme

---
 .../clojure-package/examples/cnn-text-classification/README.md  | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/contrib/clojure-package/examples/cnn-text-classification/README.md b/contrib/clojure-package/examples/cnn-text-classification/README.md
index 8b754eb70faa..9e1d8b89dd03 100644
--- a/contrib/clojure-package/examples/cnn-text-classification/README.md
+++ b/contrib/clojure-package/examples/cnn-text-classification/README.md
@@ -61,6 +61,8 @@ Then you can run training on a subset of examples through the repl using:
 (train-convnet {:devs [(context/cpu 0)] :embedding-size 300 :batch-size 100 :test-size 100 :num-epoch 10 :max-examples 1000 :pretrained-embedding :fastText})
 ```
 
+Expect a validation accuracy of `~0.67` with the above parameters.
+
 ## Usage with word2vec
 
 You can also use word2vec embeddings in order to train the text classification model.

From 6fa3db3b2b2c42b2bc171635528697753e91891d Mon Sep 17 00:00:00 2001
From: Alexander Chalk <contact@alexchalk.net>
Date: Fri, 28 Jun 2019 11:29:07 -0400
Subject: [PATCH 06/11] Add threading macro

---
 .../src/cnn_text_classification/data_helper.clj       | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/data_helper.clj b/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/data_helper.clj
index 4ef61fcddb40..034a86697e47 100644
--- a/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/data_helper.clj
+++ b/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/data_helper.clj
@@ -100,13 +100,20 @@
 
 (defn load-glove [glove-file-path]
   (println "Loading the glove pre-trained word embeddings from " glove-file-path)
-  (into {} (read-text-embedding-pairs (line-seq (io/reader glove-file-path)))))
+  (->> (io/reader glove-file-path)
+       line-seq
+       read-text-embedding-pairs
+       (into {})))
 
 (def remove-fasttext-metadata rest)
 
 (defn load-fasttext [fasttext-file-path]
   (println "Loading the fastText pre-trained word embeddings from " fasttext-file-path)
-  (into {} (read-text-embedding-pairs (remove-fasttext-metadata (line-seq (io/reader fasttext-file-path))))))
+  (->> (io/reader fasttext-file-path)
+       line-seq
+       remove-fasttext-metadata
+       read-text-embedding-pairs
+       (into {})))
 
 (defn clean-str [s]
   (-> s

From 0e94485a2d2d7b4c3b1acf6c4a2d1b93b780a601 Mon Sep 17 00:00:00 2001
From: Alexander Chalk <contact@alexchalk.net>
Date: Fri, 28 Jun 2019 11:32:32 -0400
Subject: [PATCH 07/11] Use consistent fasttext casing

---
 .../src/cnn_text_classification/data_helper.clj               | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/data_helper.clj b/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/data_helper.clj
index 034a86697e47..b6824212fb16 100644
--- a/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/data_helper.clj
+++ b/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/data_helper.clj
@@ -33,7 +33,7 @@
   [embedding-size]
   (format "data/glove/glove.6B.%dd.txt" embedding-size))
 
-(def fasttext-file-path "data/fastText/wiki.simple.vec")
+(def fasttext-file-path "data/fasttext/wiki.simple.vec")
 
 (defn r-string
   "Reads a string from the given DataInputStream `dis` until a space or newline is reached."
@@ -205,7 +205,7 @@
         vocab-embeddings (case pretrained-embedding
                            :glove (->> (load-glove (glove-file-path embedding-size))
                                        (build-vocab-embeddings vocab embedding-size))
-                           :fastText (->> (load-fasttext fasttext-file-path)
+                           :fasttext (->> (load-fasttext fasttext-file-path)
                                           (build-vocab-embeddings vocab embedding-size))
                            :word2vec (->> (load-word2vec-model w2v-file-path embedding-size {:vocab vocab})
                                           (:word2vec)

From 2466e18beabc74f922d26fbf2416d2534731ebd9 Mon Sep 17 00:00:00 2001
From: Alexander Chalk <contact@alexchalk.net>
Date: Fri, 28 Jun 2019 11:33:58 -0400
Subject: [PATCH 08/11] Add bangs to io reader functions

---
 .../src/cnn_text_classification/data_helper.clj    | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/data_helper.clj b/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/data_helper.clj
index b6824212fb16..df132c3167cd 100644
--- a/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/data_helper.clj
+++ b/contrib/clojure-package/examples/cnn-text-classification/src/cnn_text_classification/data_helper.clj
@@ -64,7 +64,7 @@
           vect (mapv (fn [_] (read-float dis)) (range embedding-size))]
       (cons [word vect] (lazy-seq (load-w2v-vectors dis embedding-size (dec num-vectors)))))))
 
-(defn load-word2vec-model
+(defn load-word2vec-model!
   "Loads the word2vec model stored in a binary format from the given `path`.
   By default only the first 100 embeddings are loaded."
   ([path embedding-size opts]
@@ -90,7 +90,7 @@
        (println "Finished")
        {:num-embed dim :word2vec word2vec})))
   ([path embedding-size]
-   (load-word2vec-model path embedding-size {:max-vectors 100})))
+   (load-word2vec-model! path embedding-size {:max-vectors 100})))
 
 (defn read-text-embedding-pairs [pairs]
   (for [^String line pairs
@@ -98,7 +98,7 @@
     [(aget fields 0)
      (mapv #(Float/parseFloat ^String %) (rest fields))]))
 
-(defn load-glove [glove-file-path]
+(defn load-glove! [glove-file-path]
   (println "Loading the glove pre-trained word embeddings from " glove-file-path)
   (->> (io/reader glove-file-path)
        line-seq
@@ -107,7 +107,7 @@
 
 (def remove-fasttext-metadata rest)
 
-(defn load-fasttext [fasttext-file-path]
+(defn load-fasttext! [fasttext-file-path]
   (println "Loading the fastText pre-trained word embeddings from " fasttext-file-path)
   (->> (io/reader fasttext-file-path)
        line-seq
@@ -203,11 +203,11 @@
         sentences-padded  (pad-sentences sentences)
         vocab (build-vocab sentences-padded)
         vocab-embeddings (case pretrained-embedding
-                           :glove (->> (load-glove (glove-file-path embedding-size))
+                           :glove (->> (load-glove! (glove-file-path embedding-size))
                                        (build-vocab-embeddings vocab embedding-size))
-                           :fasttext (->> (load-fasttext fasttext-file-path)
+                           :fasttext (->> (load-fasttext! fasttext-file-path)
                                           (build-vocab-embeddings vocab embedding-size))
-                           :word2vec (->> (load-word2vec-model w2v-file-path embedding-size {:vocab vocab})
+                           :word2vec (->> (load-word2vec-model! w2v-file-path embedding-size {:vocab vocab})
                                           (:word2vec)
                                           (build-vocab-embeddings vocab embedding-size))
                            vocab)

From 74715345ebc2235171eb845655d2e76ac2876b91 Mon Sep 17 00:00:00 2001
From: Alexander Chalk <contact@alexchalk.net>
Date: Fri, 28 Jun 2019 11:35:06 -0400
Subject: [PATCH 09/11] Reference default context setting in readme

---
 .../examples/cnn-text-classification/README.md            | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/contrib/clojure-package/examples/cnn-text-classification/README.md b/contrib/clojure-package/examples/cnn-text-classification/README.md
index 9e1d8b89dd03..aa7640ff3190 100644
--- a/contrib/clojure-package/examples/cnn-text-classification/README.md
+++ b/contrib/clojure-package/examples/cnn-text-classification/README.md
@@ -29,7 +29,7 @@ You also must download the glove word embeddings. The suggested one to use is th
 ## Usage
 
 You can run through the repl with
-`(train-convnet {:devs [(context/cpu 0)] :embedding-size 50 :batch-size 100 :test-size 100 :num-epoch 10 :max-examples 1000 :pretrained-embedding :glove})`
+`(train-convnet {:devs [(context/default-context)] :embedding-size 50 :batch-size 100 :test-size 100 :num-epoch 10 :max-examples 1000 :pretrained-embedding :glove})`
 or
 `JVM_OPTS="-Xmx1g" lein run` (cpu)
 
@@ -58,7 +58,7 @@ Download the 'Simple English' pretrained wiki word vectors (text) from the fastT
 
 Then you can run training on a subset of examples through the repl using:
 ```
-(train-convnet {:devs [(context/cpu 0)] :embedding-size 300 :batch-size 100 :test-size 100 :num-epoch 10 :max-examples 1000 :pretrained-embedding :fastText})
+(train-convnet {:devs [(context/default-context)] :embedding-size 300 :batch-size 100 :test-size 100 :num-epoch 10 :max-examples 1000 :pretrained-embedding :fastText})
 ```
 
 Expect a validation accuracy of `~0.67` with the above parameters.
@@ -72,7 +72,7 @@ you'll need to unzip them and place them in the `contrib/clojure-package/data` d
 
 Then you can run training on a subset of examples through the repl using:
 ```
-(train-convnet {:devs [(context/cpu 0)] :embedding-size 300 :batch-size 100 :test-size 100 :num-epoch 10 :max-examples 1000 :pretrained-embedding :word2vec})
+(train-convnet {:devs [(context/default-context)] :embedding-size 300 :batch-size 100 :test-size 100 :num-epoch 10 :max-examples 1000 :pretrained-embedding :word2vec})
 ```
 Note that loading word2vec embeddings consumes memory and takes some time.
 
@@ -80,7 +80,7 @@ You can also train them using `JVM_OPTS="-Xmx8g" lein run` once you've modified
 the parameters to `train-convnet` (see above) in `src/cnn_text_classification/classifier.clj`.
 In order to run training with word2vec on the complete data set, you will need to run:
 ```
-(train-convnet {:devs [(context/cpu 0)] :embedding-size 300 :batch-size 100 :test-size 1000 :num-epoch 10 :pretrained-embedding :word2vec})
+(train-convnet {:devs [(context/default-context)] :embedding-size 300 :batch-size 100 :test-size 1000 :num-epoch 10 :pretrained-embedding :word2vec})
 ```
 You should be able to achieve an accuracy of `~0.78` using the parameters above.
 

From 9d89cbca131125aa73110c6987b34a54cece3dc9 Mon Sep 17 00:00:00 2001
From: Alexander Chalk <contact@alexchalk.net>
Date: Fri, 28 Jun 2019 11:36:18 -0400
Subject: [PATCH 10/11] Change fasttext references in readme

---
 .../examples/cnn-text-classification/README.md                | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/contrib/clojure-package/examples/cnn-text-classification/README.md b/contrib/clojure-package/examples/cnn-text-classification/README.md
index aa7640ff3190..cdae5ff0d308 100644
--- a/contrib/clojure-package/examples/cnn-text-classification/README.md
+++ b/contrib/clojure-package/examples/cnn-text-classification/README.md
@@ -54,11 +54,11 @@ Using fastText instead of glove is fairly straightforward, as the pretrained emb
 
 Download the 'Simple English' pretrained wiki word vectors (text) from the fastText
 [site](https://fasttext.cc/docs/en/pretrained-vectors.html) and place them in the
-`data/fastText` directory.
+`data/fasttext` directory.
 
 Then you can run training on a subset of examples through the repl using:
 ```
-(train-convnet {:devs [(context/default-context)] :embedding-size 300 :batch-size 100 :test-size 100 :num-epoch 10 :max-examples 1000 :pretrained-embedding :fastText})
+(train-convnet {:devs [(context/default-context)] :embedding-size 300 :batch-size 100 :test-size 100 :num-epoch 10 :max-examples 1000 :pretrained-embedding :fasttext})
 ```
 
 Expect a validation accuracy of `~0.67` with the above parameters.

From f454f3cae46aa5521dff6bfea739736d93a9db79 Mon Sep 17 00:00:00 2001
From: Alexander Chalk <contact@alexchalk.net>
Date: Fri, 28 Jun 2019 11:50:37 -0400
Subject: [PATCH 11/11] Add data fetching shellscript for fasttext

---
 .../cnn-text-classification/README.md         |  2 +-
 .../get_fasttext_data.sh                      | 24 +++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)
 create mode 100755 contrib/clojure-package/examples/cnn-text-classification/get_fasttext_data.sh

diff --git a/contrib/clojure-package/examples/cnn-text-classification/README.md b/contrib/clojure-package/examples/cnn-text-classification/README.md
index cdae5ff0d308..8f8e6200ec7c 100644
--- a/contrib/clojure-package/examples/cnn-text-classification/README.md
+++ b/contrib/clojure-package/examples/cnn-text-classification/README.md
@@ -54,7 +54,7 @@ Using fastText instead of glove is fairly straightforward, as the pretrained emb
 
 Download the 'Simple English' pretrained wiki word vectors (text) from the fastText
 [site](https://fasttext.cc/docs/en/pretrained-vectors.html) and place them in the
-`data/fasttext` directory.
+`data/fasttext` directory. Alternatively just run `./get_fasttext_data.sh`.
 
 Then you can run training on a subset of examples through the repl using:
 ```
diff --git a/contrib/clojure-package/examples/cnn-text-classification/get_fasttext_data.sh b/contrib/clojure-package/examples/cnn-text-classification/get_fasttext_data.sh
new file mode 100755
index 000000000000..2bfe96659402
--- /dev/null
+++ b/contrib/clojure-package/examples/cnn-text-classification/get_fasttext_data.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -evx
+
+mkdir -p data/fasttext
+cd data/fasttext
+wget https://dl.fbaipublicfiles.com/fasttext/vectors-wiki/wiki.simple.vec