diff --git a/notebooks/Bark+Vocos.ipynb b/notebooks/Bark+Vocos.ipynb index 6af7365..e8bc85c 100644 --- a/notebooks/Bark+Vocos.ipynb +++ b/notebooks/Bark+Vocos.ipynb @@ -6,7 +6,7 @@ "private_outputs": true, "provenance": [], "gpuType": "T4", - "authorship_tag": "ABX9TyNuxsqp/FTsmltYeYfMZ6sw", + "authorship_tag": "ABX9TyMC53IsYoVJIVijVzw3ADvX", "include_colab_link": true }, "kernelspec": { @@ -26,7 +26,7 @@ "colab_type": "text" }, "source": [ - "\"Open" + "\"Open" ] }, { @@ -41,7 +41,7 @@ { "cell_type": "markdown", "source": [ - "In this notebook, we use Bark generative model to turn a text prompt into EnCodec audio tokens. These tokens then go through two decoders, EnCodec and Vocos, to reconstruct the audio waveform. Compare the results to discover the differences in audio quality and characteristics." + "In this notebook, we use [Bark](https://github.com/suno-ai/bark) generative model to turn a text prompt into EnCodec audio tokens. These tokens then go through two decoders, EnCodec and Vocos, to reconstruct the audio waveform. Compare the results to discover the differences in audio quality and characteristics." ], "metadata": { "id": "zJFDte0daDAz" @@ -103,6 +103,7 @@ "cell_type": "code", "source": [ "from vocos import Vocos\n", + "import torch\n", "\n", "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "vocos = Vocos.from_pretrained(\"charactr/vocos-encodec-24khz\").to(device)" @@ -225,8 +226,6 @@ { "cell_type": "code", "source": [ - "import torch\n", - "\n", "audio_tokens_torch = torch.from_numpy(audio_tokens).to(device)\n", "features = vocos.codes_to_features(audio_tokens_torch)\n", "vocos_output = vocos.decode(features, bandwidth_id=torch.tensor([2], device=device)) # 6 kbps\n",