TudorCRL · TudorCRL · Feb 21, 2023 · Feb 21, 2023 · Feb 21, 2023 · Feb 21, 2023
diff --git a/README.rst b/README.rst
@@ -39,6 +39,7 @@ Speech recognition engine/API support:
 * `Tensorflow <https://www.tensorflow.org/>`__
 * `Vosk API <https://github.com/alphacep/vosk-api/>`__ (works offline)
 * `OpenAI whisper <https://github.com/openai/whisper>`__ (works offline)
+* `Speechmatics ASR API <https://portal.speechmatics.com/>`__
 
 **Quickstart:** ``pip install SpeechRecognition``. See the "Installing" section for more details.
 
@@ -95,6 +96,7 @@ To use all of the functionality of the library, you should have:
 * **FLAC encoder** (required only if the system is not x86-based Windows/Linux/OS X)
 * **Vosk** (required only if you need to use Vosk API speech recognition ``recognizer_instance.recognize_vosk``)
 * **Whisper** (required only if you need to use Whisper ``recognizer_instance.recognize_whisper``)
+* **Speechmatics** (required only if you need to use Speechmatics ``recognizer_instance.recognize_speechmatics``)
 
 The following requirements are optional, but can improve or extend functionality in some situations:
 
@@ -169,6 +171,12 @@ Whisper is **required if and only if you want to use whisper** (``recognizer_ins
 
 You can install it with ``python3 -m pip install git+https://github.com/openai/whisper.git soundfile``.
 
+Speechmatics (for Speechmatics users)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Speechmatics is **required if and only if you want to use speechmatics** (``recognizer_instance.recognize_speechmatics``).
+
+You can install it with ``python3 -m pip install speechmatics-python``. You will also need an API key from `<https://portal.speechmatics.com/manage-access/>__`.
+
 Troubleshooting
 ---------------
 

diff --git a/examples/audio_transcribe.py b/examples/audio_transcribe.py
@@ -13,6 +13,16 @@
 with sr.AudioFile(AUDIO_FILE) as source:
     audio = r.record(source)  # read the entire audio file
 
+# recognize speech using Speechmatics
+SPEECHMATICS_KEY = "INSERT SPEECHMATICS API KEY HERE"
+try:
+    print("Speechmatics thinks you said " + r.recognize_speechmatics(audio, key=SPEECHMATICS_KEY))
+except sr.UnknownValueError:
+    print("Speechmatics could not understand audio")
+except sr.RequestError as e:
+    print("Could not request results from the Speechmatics service; {0}".format(e))
+
+
 # recognize speech using Sphinx
 try:
     print("Sphinx thinks you said " + r.recognize_sphinx(audio))

diff --git a/examples/extended_results.py b/examples/extended_results.py
@@ -16,6 +16,16 @@
 with sr.AudioFile(AUDIO_FILE) as source:
     audio = r.record(source)  # read the entire audio file
 
+# recognize speech using Speechmatics
+SPEECHMATICS_KEY = "INSERT SPEECHMATICS API KEY HERE"
+try:
+    print("Speechmatics results:")
+    pprint(r.recognize_speechmatics(audio, key=SPEECHMATICS_KEY, transcript_format="json-v2"))
+except sr.UnknownValueError:
+    print("Speechmatics could not understand audio")
+except sr.RequestError as e:
+    print("Speechmatics error; {0}".format(e))
+
 # recognize speech using Sphinx
 try:
     print("Sphinx thinks you said " + r.recognize_sphinx(audio))

diff --git a/examples/microphone_recognition.py b/examples/microphone_recognition.py
@@ -10,6 +10,15 @@
     print("Say something!")
     audio = r.listen(source)
 
+# recognize speech using Speechmatics
+SPEECHMATICS_KEY = "INSERT SPEECHMATICS API KEY HERE"
+try:
+    print("Speechmatics thinks you said " + r.recognize_speechmatics(audio, key=SPEECHMATICS_KEY))
+except sr.UnknownValueError:
+    print("Speechmatics could not understand audio")
+except sr.RequestError as e:
+    print("Could not request results from Speechmatics service; {0}".format(e))
+
 # recognize speech using Sphinx
 try:
     print("Sphinx thinks you said " + r.recognize_sphinx(audio))

diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
@@ -1702,6 +1702,47 @@ def recognize_vosk(self, audio_data, language='en'):
 
         return finalRecognition
 
+    def recognize_speechmatics(self, audio_data, key=None, language="en", transcript_format="txt"):
+        """
+        Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the Speechmatics ASR
+
+        The key value is your speechmatics API key. You can get an API key by creating an account and signing into the portal at https://portal.speechmatics.com/manage-access/.
+
+        The recognition language is determined by ``language``, an RFC5646 language tag like "en" or "es". The full list of supported languages can be found at https://docs.speechmatics.com/introduction/supported-languages.
+
+        Returns a text representation of the transcript by default. You can alson get a json representation of the transcript by setting transcript_format='json-v2', which comes with a range of meta-data about each word in the transcript. The full transcript schema is documented here: https://docs.speechmatics.com/features. You can also request an SRT format by setting `format='srt'`
+
+        Raises a ``speech_recognition.UnknownValueError`` exception if the speech is unintelligible. Raises a ``speech_recognition.RequestError`` exception if the speech recognition operation failed, if the key isn't valid, or if there is no internet connection.
+        """
+        assert isinstance(audio_data, AudioData), "Data must be audio data"
+        assert isinstance(key, str), "``key`` must be a string"
+
+        try:
+            from speechmatics.models import ConnectionSettings, BatchTranscriptionConfig
+            from speechmatics.batch_client import BatchClient
+            from speechmatics.constants import BATCH_SELF_SERVICE_URL
+        except:
+            raise RequestError("missing speechmatics python module: install using `pip install speechmatics-python`")
+
+        wav_data = audio_data.get_wav_data(
+            convert_rate=None if audio_data.sample_rate >= 16000 else 16000  # audio samples must be at least 16 kHz
+        )
+        audio_input = ("audio_file.wav", wav_data)
+        settings = ConnectionSettings(
+            url=BATCH_SELF_SERVICE_URL,
+            auth_token=key,
+        )
+        conf = BatchTranscriptionConfig(
+            language=language,
+        )
+        with BatchClient(settings) as client:
+            job_id = client.submit_job(
+                audio=audio_input,
+                transcription_config=conf,
+            )
+            transcript = client.wait_for_completion(job_id, transcription_format=transcript_format)
+            return transcript
+
 def get_flac_converter():
     """Returns the absolute path of a FLAC converter executable, or raises an OSError if none can be found."""
     flac_converter = shutil_which("flac")  # check for installed version first

diff --git a/tests/test_recognition.py b/tests/test_recognition.py
@@ -34,6 +34,24 @@ def test_google_chinese(self):
         with sr.AudioFile(self.AUDIO_FILE_ZH) as source: audio = r.record(source)
         self.assertEqual(r.recognize_google(audio, language="zh-CN"), u"砸自己的脚")
 
+    @unittest.skipUnless("SPEECHMATICS_KEY" in os.environ, "requires Speechmatics key to be specified in SPEECHMATICS_KEY environment variable")
+    def test_speechmatics_english(self):
+        r = sr.Recognizer()
+        with sr.AudioFile(self.AUDIO_FILE_EN) as source: audio = r.record(source)
+        self.assertEqual(r.recognize_speechmatics(audio, key=os.environ["SPEECHMATICS_KEY"]), "One, two, three.")
+
+    @unittest.skipUnless("SPEECHMATICS_KEY" in os.environ, "requires Speechmatics key to be specified in SPEECHMATICS_KEY environment variable")
+    def test_speechmatics_french(self):
+        r = sr.Recognizer()
+        with sr.AudioFile(self.AUDIO_FILE_FR) as source: audio = r.record(source)
+        self.assertEqual(r.recognize_speechmatics(audio, key=os.environ["SPEECHMATICS_KEY"], language="fr"), u"C'est la dictée numéro un.")
+
+    @unittest.skipUnless("SPEECHMATICS_KEY" in os.environ, "requires Speechmatics key to be specified in SPEECHMATICS_KEY environment variable")
+    def test_speechmatics_mandarin(self):
+        r = sr.Recognizer()
+        with sr.AudioFile(self.AUDIO_FILE_ZH) as source: audio = r.record(source)
+        self.assertEqual(r.recognize_speechmatics(audio, key=os.environ["SPEECHMATICS_KEY"], language="cmn"), u"砸自己的脚。")
+
     @unittest.skipUnless("WIT_AI_KEY" in os.environ, "requires Wit.ai key to be specified in WIT_AI_KEY environment variable")
     def test_wit_english(self):
         r = sr.Recognizer()