diff --git a/examples/de.properties b/examples/de.properties new file mode 100644 index 0000000..6508db9 --- /dev/null +++ b/examples/de.properties @@ -0,0 +1,12 @@ +# This uses the following datasets: +# +# 1) Web 1T 5-gram, 10 European Languages, Version 1, Linguistic Data Consortium (LDC) catalog number LDC2009T25 and isbn 1-58563-525-1 +# 2) Google Ngram Viewer dataset version 1 from 2009-07-15 + +input = /path/to/corpus-google-web-1t-5-gram-10-european-languages-version1/data/GERMAN \ + ; /path/to/corpus-google-books-n-grams-version1/googlebooks-ger-all-20090715 +output = /path/to/out +temp = /path/to/temp +lang = de +lowercase = false +max-n-gram = 5 diff --git a/examples/en.properties b/examples/en.properties new file mode 100644 index 0000000..a6d3daf --- /dev/null +++ b/examples/en.properties @@ -0,0 +1,12 @@ +# This uses the following datasets: +# +# 1) Web 1T 5-gram Version 1, Linguistic Data Consortium (LDC) catalog number LDC2006T13 and ISBN 1-58563-397-6 +# 2) Google Ngram Viewer dataset version 1 from 2009-07-15 + +input = /path/to/corpus-google-web-1t-5-gram-version1 \ + ; /path/to/corpus-google-books-n-grams-version1/googlebooks-eng-all-20090715 +output = /path/to/out +temp = /path/to/temp +lang = en +lowercase = true +max-n-gram = 5