ambuda-org · gouenji-shuuya · Mar 26, 2023 · akprasad · Mar 22, 2023 · gouenji-shuuya
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,4 @@
 /data/
 /dcs-data/
 *.log
+**/www/env
diff --git a/Makefile b/Makefile
@@ -28,7 +28,7 @@ docs:
 
 # Creates and collects all data files required to use Vidyut.
 create_all_data:
-	@./scripts/create_all_data.sh
+	@./scripts/create_all_data.sh $(MAKE)
 
 create_sandhi_rules:
 	RUST_LOG=info cargo run --release --bin create_sandhi_rules -- \

diff --git a/README.md b/README.md
@@ -78,7 +78,7 @@ tests:
 ```shell
 $ git clone https://github.com/ambuda-org/vidyut.git
 $ cd vidyut
-$ make test
+$ make -j`nproc` test
 ```
 
 Your first build will likely take a few minutes, but future builds will
@@ -87,7 +87,7 @@ be much faster.
 Next, we recommend creating and collecting our rich linguistic data:
 
 ```shell
-$ make create_all_data
+$ make -j`nproc` create_all_data
 ```
 
 This command will take several minutes, but most users will not need to re-run
@@ -157,8 +157,8 @@ Documentation
 -------------
 
 To view documentation for all crates (including private modules and structs),
-run `make docs`. This command will generate Rust's standard documentation and
-open it in your default web browser.
+run `make -j`nproc` docs`. This command will generate Rust's standard
+documentation and open it in your default web browser.
 
 
 Contributing

diff --git a/scripts/create_all_data.sh b/scripts/create_all_data.sh
@@ -1,54 +1,76 @@
-#!/usr/bin/env sh
+#!/usr/bin/env bash
 
 # Create all of the linguistic data necessary for general usage.
 
-# Clean up temporary files, if they exist.
-rm -Rf data-git 2&> /dev/null
-rm -Rf dcs-data 2&> /dev/null
 
 # Exit if any step in this install script fails.
 set -e
 
+# Clean up temporary files, if they exist.
+rm -rf data-git
+rm -rf dcs-data
+
 # Create necessary directories.
 mkdir -p "data/build/${1}"
 
-echo "========================="
-echo "| DCS corpus data       |"
-echo "========================="
-echo
-if [ -e "data/raw/dcs" ]; then
+
+echo -e "
+=========================
+| DCS corpus data       |
+=========================
+"
+
+if [[ -e "data/raw/dcs" ]]; then
     echo "Training data already exists -- skipping fetch."
 else
     echo "Training data does not exist -- fetching."
+
     mkdir -p "data/raw/dcs"
     git clone --depth 1 https://github.com/OliverHellwig/sanskrit.git dcs-data
+
     mv dcs-data/dcs/data/conllu data/raw/dcs/conllu
-    rm -Rf dcs-data
+    rm -rf dcs-data
 fi
-echo
-echo "========================="
-echo "| Linguistic data fetch |"
-echo "========================="
-echo
-if [ -e "data/raw/lex" ]; then
+
+
+echo -e "
+=========================
+| Linguistic data fetch |
+=========================
+"
+
+if [[ -e "data/raw/lex" ]]; then
     echo "Lexical data already exists -- skipping fetch."
 else
     echo "Lexical data does not exist -- fetching."
+
     mkdir -p "data/raw/lex"
     git clone --depth=1 https://github.com/sanskrit/data.git data-git
+
     python3 data-git/bin/make_data.py --make_prefixed_verbals
     mv data-git/all-data/* data/raw/lex
+
     rm -rf data-git
 fi
-echo
-echo "========================="
-echo "| Vidyut build          |"
-echo "========================="
-echo
-make create_kosha
-make test_kosha
-make create_sandhi_rules
-make train_cheda
-make eval_cheda
-echo
-echo "Complete."
+
+
+echo -e "
+=========================
+| Vidyut build          |
+=========================
+"
+
+if [[ "$1" == "" ]]; then
+    make_cmd="make  -j`nproc`"
+else
+    make_cmd=$1
+fi
+
+$make_cmd create_kosha
+$make_cmd test_kosha
+$make_cmd create_sandhi_rules
+$make_cmd train_cheda
+$make_cmd eval_cheda
+
+
+echo -e "\nComplete."
diff --git a/vidyut-cheda/scripts/fetch_training_data.py b/vidyut-cheda/scripts/fetch_training_data.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 
-"""Fetches training data from GitHub.
+"""
+Fetches training data from GitHub.
 
 We could do this in a shell script, but I find this more readable.
 """
@@ -9,12 +10,13 @@
 from pathlib import Path
 
 
-training_data = Path("dcs-data")
-if not training_data.exists():
-    print(f"Training data folder '{training_data}' does not exist -- fetching.")
-    subprocess.check_call(
-        f"git clone --depth 1 https://github.com/OliverHellwig/sanskrit.git {training_data}",
-        shell=True,
-    )
+training_dir = Path("dcs-data")
+
+if training_dir.exists():
+    print(f"Training data folder '{training_dir}' exists -- skipping fetch.")
 else:
-    print(f"Training data folder '{training_data}' exists -- skipping fetch.")
+    print(f"Training data folder '{training_dir}' does not exist -- fetching.")
+
+    repo_link = "https://github.com/OliverHellwig/sanskrit.git"
+    subprocess.check_call(f"git clone --depth 1 {repo_link} {training_dir}",
+                          shell=True)
diff --git a/vidyut-prakriya/scripts/run-debugger.sh b/vidyut-prakriya/scripts/run-debugger.sh
@@ -1,6 +1,8 @@
-#!/usr/bin/env sh
-if [[ ! $(command -v wasm-pack) ]]
-then
+#!/usr/bin/env bash
+
+set -e  # Exit on failure.
+
+if [[ ! $(command -v wasm-pack) ]]; then
     echo "Our debugger requires wasm-pack. Please install wasm-pack:"
     echo "https://rustwasm.github.io/wasm-pack/installer/"
     echo
@@ -12,11 +14,15 @@ fi
 # build seems to have issues with enum parsing. So, stick with the release
 # build.
 wasm-pack build --target web --release
-mkdir -p www/static/wasm && cp pkg/* www/static/wasm
-mkdir -p www/static/data && cp data/* www/static/data
-cd www \
-    && python3 -m venv env \
-    && . env/bin/activate \
-    && pip3 install -r requirements.txt \
-    && python app.py
 
+mkdir -p www/static/wasm
+cp pkg/* www/static/wasm
+
+mkdir -p www/static/data
+cp data/* www/static/data
+
+cd www
+python3 -m venv env
+. env/bin/activate
+pip3 install -r requirements.txt
+python app.py
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,3 +2,4 @@ @@
     /data/
     /dcs-data/
     *.log
+    **/www/env