From 220e6304218ba8c1264adfebd97583b38369283e Mon Sep 17 00:00:00 2001 From: Raivis Dejus Date: Sat, 23 Nov 2024 21:43:37 +0200 Subject: [PATCH] Adding option to import urls via CLI (#1003) --- buzz/cli.py | 20 +++++++++++++++----- docs/docs/cli.md | 6 +++--- docs/docs/faq.md | 18 +++++++++--------- 3 files changed, 27 insertions(+), 17 deletions(-) diff --git a/buzz/cli.py b/buzz/cli.py index 42c251e54..d130f573a 100644 --- a/buzz/cli.py +++ b/buzz/cli.py @@ -1,6 +1,7 @@ import enum import sys import typing +import urllib.parse from PyQt6.QtCore import QCommandLineParser, QCommandLineOption @@ -44,6 +45,9 @@ def parse_command_line(app: Application): print(parser.helpText()) sys.exit(1) +def is_url(path: str) -> bool: + parsed = urllib.parse.urlparse(path) + return all([parsed.scheme, parsed.netloc]) def parse(app: Application, parser: QCommandLineParser): parser.addPositionalArgument("", "One of the following commands:\n- add") @@ -203,14 +207,20 @@ def parse(app: Application, parser: QCommandLineParser): word_level_timings=word_timestamps, openai_access_token=openai_access_token, ) - file_transcription_options = FileTranscriptionOptions( - file_paths=file_paths, - output_formats=output_formats, - ) for file_path in file_paths: + path_is_url = is_url(file_path) + + file_transcription_options = FileTranscriptionOptions( + file_paths=[file_path] if not path_is_url else None, + url=file_path if path_is_url else None, + output_formats=output_formats, + ) + transcription_task = FileTranscriptionTask( - file_path=file_path, + file_path=file_path if not path_is_url else None, + url=file_path if path_is_url else None, + source=FileTranscriptionTask.Source.FILE_IMPORT if not path_is_url else FileTranscriptionTask.Source.URL_IMPORT, model_path=model_path, transcription_options=transcription_options, file_transcription_options=file_transcription_options, diff --git a/docs/docs/cli.md b/docs/docs/cli.md index 46c41756c..6dcbb0a18 100644 --- a/docs/docs/cli.md +++ b/docs/docs/cli.md @@ -10,7 +10,7 @@ sidebar_position: 5 Start a new transcription task. ``` -Usage: buzz add [options] [file file file...] +Usage: buzz add [options] [file url file...] Options: -t, --task The task to perform. Allowed: translate, @@ -60,7 +60,7 @@ Options: (Yiddish), yo (Yoruba), zh (Chinese). Leave empty to detect language. -p, --prompt Initial prompt. - -wt, --word-timestamps Generate word-level timestamps. + -wt, --word-timestamps Generate word-level timestamps. (available since 1.2.0) --openai-token OpenAI access token. Use only when --model-type is openaiapi. Defaults to your previously saved access token, if one exists. @@ -73,7 +73,7 @@ Options: -v, --version Displays version information. Arguments: - files Input file paths + files or urls Input file paths or urls. Url import availalbe since 1.2.0. ``` **Examples**: diff --git a/docs/docs/faq.md b/docs/docs/faq.md index 8276e7016..9e78b2218 100644 --- a/docs/docs/faq.md +++ b/docs/docs/faq.md @@ -3,20 +3,20 @@ title: FAQ sidebar_position: 5 --- -1. **Where are the models stored?** +#### 1. Where are the models stored? The models are stored in `~/.cache/Buzz` (Linux), `~/Library/Caches/Buzz` (Mac OS) or `%USERPROFILE%\AppData\Local\Buzz\Buzz\Cache` (Windows). Paste the location in your file manager to access the models. -2. **What can I try if the transcription runs too slowly?** +#### 2. What can I try if the transcription runs too slowly? Speech recognition requires large amount of computation, so one option is to try using a lower Whisper model size or using a Whisper.cpp model to run speech recognition of your computer. If you have access to a computer with GPU that has at least 6GB of VRAM you can try using the Faster Whisper model. Buzz also supports using OpenAI API to do speech recognition on a remote server. To use this feature you need to set OpenAI API key in Preferences. See [Preferences](https://chidiwilliams.github.io/buzz/docs/preferences) section for more details. -3. **How to record system audio?** +#### 3. How to record system audio? To transcribe system audio you need to configure virtual audio device and connect output from the applications you want to transcribe to this virtual speaker. After that you can select it as source in the Buzz. See [Usage](https://chidiwilliams.github.io/buzz/docs/usage/live_recording) section for more details. @@ -25,13 +25,13 @@ sidebar_position: 5 - Windows - [VB CABLE](https://vb-audio.com/Cable/) - Linux - [PulseAudio Volume Control](https://wiki.ubuntu.com/record_system_sound) -4. **What model should I use?** +#### 4. What model should I use? Model size to use will depend on your hardware and use case. Smaller models will work faster but will have more inaccuracies. Larger models will be more accurate but will require more powerful hardware or longer time to transcribe. When choosing among large models consider the following. "Large" is the first released older model, "Large-V2" is later updated model with better accuracy, for some languages considered the most robust and stable. "Large-V3" is the latest model with the best accuracy in many cases, but some times can hallucinate or invent words that were never in the audio. "Turbo" model tries to get a good balance between speed and accuracy. The only sure way to know what model best suits your needs is to test them all in your language. -5. **How to get GPU acceleration for faster transcription?** +#### 5. How to get GPU acceleration for faster transcription? On Linux GPU acceleration is supported out of the box on Nvidia GPUs. If you still get any issues install [CUDA 12](https://developer.nvidia.com/cuda-downloads), [cuBLASS](https://developer.nvidia.com/cublas) and [cuDNN](https://developer.nvidia.com/cudnn). @@ -39,7 +39,7 @@ sidebar_position: 5 For Faster whisper CUDA 12 is required, computers with older CUDA versions will use CPU. -6. **How to fix `Unanticipated host error[PaErrorCode-9999]`?** +#### 6. How to fix `Unanticipated host error[PaErrorCode-9999]`? Check if there are any system settings preventing apps from accessing the microphone. @@ -49,17 +49,17 @@ sidebar_position: 5 For method 2 there is no need to uninstall the antivirus, but see if you can temporarily disable it or if there are settings that may prevent Buzz from accessing the microphone. -7. **Can I use Buzz on a computer without internet?** +#### 7. Can I use Buzz on a computer without internet? Yes, Buzz can be used without internet connection if you download the necessary models on some other computer that has the internet and manually move them to the offline computer. The easiest way to find where the models are stored is to go to Help -> Preferences -> Models. Then download some model, and push "Show file location" button. This will open the folder where the models are stored. Copy the models folder to the same location on the offline computer. F.e. for Linux it is `.cache/Buzz/models` in your home directory. -8. **Buzz crashes, what to do?** +#### 8. Buzz crashes, what to do? If a model download was incomplete or corrupted, Buzz may crash. Try to delete the downloaded model files in `Help -> Preferences -> Models` and re-download them. If that does not help, check the log file for errors and [report the issue](https://github.com/chidiwilliams/buzz/issues) so we can fix it. The log file is located in `~/Library/Logs/Buzz` (Mac OS) or `%USERPROFILE%\AppData\Local\Buzz\Buzz\Logs` (Windows). On Linux run the Buzz from the command line to see the relevant messages. -9. **Where can I get latest development version?** +### 9. Where can I get latest development version? Latest development version will have latest bug fixes and most recent features. If you feel a bit adventurous it is recommended to try the latest development version as they needs some testing before they get released to everybody.