Resolve buggy tts save by updating gTTS to v2.2.1

Fix jdlorimer#161. Thanks to @psii for mentioning that the update fixes the issue.
3ter · Nov 17, 2020 · d282e99 · d282e99
1 parent b005552
commit d282e99
Show file tree

Hide file tree

Showing 19 changed files with 964 additions and 231 deletions.
diff --git a/chinese/lib/gtts/cli.py b/chinese/lib/gtts/cli.py
@@ -88,6 +88,7 @@ def print_languages(ctx, param, value):
     """
     if not value or ctx.resilient_parsing:
         return
+
     try:
         langs = tts_langs()
         langs_str_list = sorted("{}: {}".format(k, langs[k]) for k in langs)
@@ -136,6 +137,14 @@ def set_debug(ctx, param, debug):
     show_default=True,
     callback=validate_lang,
     help="IETF language tag. Language to speak in. List documented tags with --all.")
+@click.option(
+    '-t',
+    '--tld',
+    metavar='<tld>',
+    default='com',
+    show_default=True,
+    is_eager=True,  # Prioritize <tld> to ensure it gets set before <lang>
+    help="Top-level domain for the Google host, i.e https://translate.google.<tld>")
 @click.option(
     '--nocheck',
     default=False,
@@ -159,7 +168,7 @@ def set_debug(ctx, param, debug):
     callback=set_debug,
     help="Show debug information.")
 @click.version_option(version=__version__)
-def tts_cli(text, file, output, slow, lang, nocheck):
+def tts_cli(text, file, output, slow, tld, lang, nocheck):
     """ Read <text> to mp3 format using Google Translate's Text-to-Speech API
     (set <text> or --file <file> to - for standard input)
     """
@@ -189,6 +198,7 @@ def tts_cli(text, file, output, slow, lang, nocheck):
             text=text,
             lang=lang,
             slow=slow,
+            tld=tld,
             lang_check=not nocheck)
         tts.write_to_fp(output)
     except (ValueError, AssertionError) as e:

diff --git a/chinese/lib/gtts/lang.py b/chinese/lib/gtts/lang.py
@@ -1,14 +1,8 @@
 # -*- coding: utf-8 -*-
-from bs4 import BeautifulSoup
-import requests
 import logging
-import re
 
 __all__ = ['tts_langs']
 
-URL_BASE = 'http://translate.google.com'
-JS_FILE = 'translate_m.js'
-
 # Logger
 log = logging.getLogger(__name__)
 log.addHandler(logging.NullHandler())
@@ -18,69 +12,94 @@ def tts_langs():
     """Languages Google Text-to-Speech supports.
 
     Returns:
-        dict: A dictionnary of the type `{ '<lang>': '<name>'}`
+        dict: A dictionary of the type `{ '<lang>': '<name>'}`
 
-        Where `<lang>` is an IETF language tag such as `en` or `pt-br`,
-        and `<name>` is the full English name of the language, such as
-        `English` or `Portuguese (Brazil)`.
+            Where `<lang>` is an IETF language tag such as `en` or `pt-br`,
+            and `<name>` is the full English name of the language, such as
+            `English` or `Portuguese (Brazil)`.
 
-    The dictionnary returned combines languages from two origins:
+    The dictionary returned combines languages from two origins:
 
-    - Languages fetched automatically from Google Translate
+    - Languages fetched from Google Translate
     - Languages that are undocumented variations that were observed to work and
       present different dialects or accents.
 
     """
-    try:
-        langs = dict()
-        langs.update(_fetch_langs())
-        langs.update(_extra_langs())
-        log.debug("langs: %s", langs)
-        return langs
-    except Exception as e:
-        raise RuntimeError("Unable to get language list: %s" % str(e))
-
+    langs = dict()
+    langs.update(_main_langs())
+    langs.update(_extra_langs())
+    log.debug("langs: {}".format(langs))
+    return langs
 
-def _fetch_langs():
-    """Fetch (scrape) languages from Google Translate.
 
-    Google Translate loads a JavaScript Array of 'languages codes' that can
-    be spoken. We intersect this list with all the languages Google Translate
-    provides to get the ones that support text-to-speech.
+def _main_langs():
+    """Define the main languages.
 
     Returns:
-        dict: A dictionnary of languages from Google Translate
+        dict: A dictionnary of the main languages extracted from
+            Google Translate.
 
     """
-    # Load HTML
-    page = requests.get(URL_BASE)
-    soup = BeautifulSoup(page.content, 'html.parser')
-
-    # JavaScript URL
-    # The <script src=''> path can change, but not the file.
-    # Ex: /zyx/abc/20180211/desktop_module_main.js
-    js_path = soup.find(src=re.compile(JS_FILE))['src']
-    js_url = "{}/{}".format(URL_BASE, js_path)
-
-    # Load JavaScript
-    js_contents = requests.get(js_url).text
-
-    # Approximately extract TTS-enabled language codes
-    # RegEx pattern search because minified variables can change.
-    # Extra garbage will be dealt with later as we keep languages only.
-    # In: "[...]Fv={af:1,ar:1,[...],zh:1,"zh-cn":1,"zh-tw":1}[...]"
-    # Out: ['is', '12', [...], 'af', 'ar', [...], 'zh', 'zh-cn', 'zh-tw']
-    pattern = r'[{,\"](\w{2}|\w{2}-\w{2,3})(?=:1|\":1)'
-    tts_langs = re.findall(pattern, js_contents)
-
-    # Build lang. dict. from main page (JavaScript object populating lang. menu)
-    # Filtering with the TTS-enabled languages
-    # In: "{code:'auto',name:'Detect language'},{code:'af',name:'Afrikaans'},[...]"
-    # re.findall: [('auto', 'Detect language'), ('af', 'Afrikaans'), [...]]
-    # Out: {'af': 'Afrikaans', [...]}
-    trans_pattern = r"{code:'(?P<lang>.+?[^'])',name:'(?P<name>.+?[^'])'}"
-    trans_langs = re.findall(trans_pattern, page.text)
-    return {lang: name for lang, name in trans_langs if lang in tts_langs}
+    return {
+        'af': 'Afrikaans',
+        'ar': 'Arabic',
+        'bn': 'Bengali',
+        'bs': 'Bosnian',
+        'ca': 'Catalan',
+        'cs': 'Czech',
+        'cy': 'Welsh',
+        'da': 'Danish',
+        'de': 'German',
+        'el': 'Greek',
+        'en': 'English',
+        'eo': 'Esperanto',
+        'es': 'Spanish',
+        'et': 'Estonian',
+        'fi': 'Finnish',
+        'fr': 'French',
+        'gu': 'Gujarati',
+        'hi': 'Hindi',
+        'hr': 'Croatian',
+        'hu': 'Hungarian',
+        'hy': 'Armenian',
+        'id': 'Indonesian',
+        'is': 'Icelandic',
+        'it': 'Italian',
+        'ja': 'Japanese',
+        'jw': 'Javanese',
+        'km': 'Khmer',
+        'kn': 'Kannada',
+        'ko': 'Korean',
+        'la': 'Latin',
+        'lv': 'Latvian',
+        'mk': 'Macedonian',
+        'ml': 'Malayalam',
+        'mr': 'Marathi',
+        'my': 'Myanmar (Burmese)',
+        'ne': 'Nepali',
+        'nl': 'Dutch',
+        'no': 'Norwegian',
+        'pl': 'Polish',
+        'pt': 'Portuguese',
+        'ro': 'Romanian',
+        'ru': 'Russian',
+        'si': 'Sinhala',
+        'sk': 'Slovak',
+        'sq': 'Albanian',
+        'sr': 'Serbian',
+        'su': 'Sundanese',
+        'sv': 'Swedish',
+        'sw': 'Swahili',
+        'ta': 'Tamil',
+        'te': 'Telugu',
+        'th': 'Thai',
+        'tl': 'Filipino',
+        'tr': 'Turkish',
+        'uk': 'Ukrainian',
+        'ur': 'Urdu',
+        'vi': 'Vietnamese',
+        'zh-CN': 'Chinese'
+    }
 
 
 def _extra_langs():
@@ -89,9 +108,9 @@ def _extra_langs():
     Returns:
         dict: A dictionnary of extra languages manually defined.
 
-        Variations of the ones fetched by `_fetch_langs`,
-        observed to provide different dialects or accents or
-        just simply accepted by the Google Translate Text-to-Speech API.
+            Variations of the ones fetched by `_main_langs`,
+            observed to provide different dialects or accents or
+            just simply accepted by the Google Translate Text-to-Speech API.
 
     """
     return {

diff --git a/chinese/lib/gtts_token/__init__.py → chinese/lib/gtts/tests/__init__.py b/chinese/lib/gtts_token/__init__.py → chinese/lib/gtts/tests/__init__.py
diff --git a/chinese/lib/gtts/tests/input_files/test_cli_test_ascii.txt b/chinese/lib/gtts/tests/input_files/test_cli_test_ascii.txt
@@ -0,0 +1,2 @@
+Can you make pink a little more pinkish can you make pink a little more pinkish, nor can you make the font bigger?
+How much will it cost the website doesn't have the theme i was going for.
diff --git a/chinese/lib/gtts/tests/input_files/test_cli_test_utf8.txt b/chinese/lib/gtts/tests/input_files/test_cli_test_utf8.txt
@@ -0,0 +1,5 @@
+这是一个三岁的小孩
+在讲述她从一系列照片里看到的东西。
+对这个世界， 她也许还有很多要学的东西，
+但在一个重要的任务上， 她已经是专家了：
+去理解她所看到的东西。
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		Can you make pink a little more pinkish can you make pink a little more pinkish, nor can you make the font bigger?
		How much will it cost the website doesn't have the theme i was going for.