serp-ai · BrasD99 · Jul 22, 2023 · Jul 22, 2023 · Jul 22, 2023 · Jul 22, 2023
diff --git a/clone_voice.ipynb b/clone_voice.ipynb
@@ -6,13 +6,24 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from bark.generation import load_codec_model, generate_text_semantic\n",
+    "!pip install git+https://github.com/suno-ai/bark.git\n",
+    "!git clone https://github.com/gitmylo/bark-voice-cloning-HuBERT-quantizer\n",
+    "!pip install -r ./bark-voice-cloning-HuBERT-quantizer/requirements.txt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from bark.generation import load_codec_model\n",
     "from encodec.utils import convert_audio\n",
     "\n",
     "import torchaudio\n",
     "import torch\n",
     "\n",
-    "device = 'cuda' # or 'cpu'\n",
+    "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
     "model = load_codec_model(use_gpu=True if device == 'cuda' else False)"
    ]
   },
@@ -22,8 +33,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# From https://github.com/gitmylo/bark-voice-cloning-HuBERT-quantizer\n",
-    "from hubert.hubert_manager import HuBERTManager\n",
+    "import sys\n",
+    "sys.path.append('./bark-voice-cloning-HuBERT-quantizer')\n",
+    "from bark_hubert_quantizer.hubert_manager import HuBERTManager\n",
     "hubert_manager = HuBERTManager()\n",
     "hubert_manager.make_sure_hubert_installed()\n",
     "hubert_manager.make_sure_tokenizer_installed()"
@@ -35,16 +47,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# From https://github.com/gitmylo/bark-voice-cloning-HuBERT-quantizer \n",
     "# Load HuBERT for semantic tokens\n",
-    "from hubert.pre_kmeans_hubert import CustomHubert\n",
-    "from hubert.customtokenizer import CustomTokenizer\n",
+    "from bark_hubert_quantizer.pre_kmeans_hubert import CustomHubert\n",
+    "from bark_hubert_quantizer.customtokenizer import CustomTokenizer\n",
     "\n",
     "# Load the HuBERT model\n",
     "hubert_model = CustomHubert(checkpoint_path='data/models/hubert/hubert.pt').to(device)\n",
     "\n",
     "# Load the CustomTokenizer model\n",
-    "tokenizer = CustomTokenizer.load_from_checkpoint('data/models/hubert/tokenizer.pth').to(device)  # Automatically uses the right layers"
+    "tokenizer = CustomTokenizer.load_from_checkpoint('data/models/hubert/tokenizer.pth', map_location=device).to(device)  # Automatically uses the right layers"
    ]
   },
   {
@@ -101,34 +112,13 @@
    "outputs": [],
    "source": [
     "import numpy as np\n",
-    "voice_name = 'output' # whatever you want the name of the voice to be\n",
-    "output_path = 'bark/assets/prompts/' + voice_name + '.npz'\n",
-    "np.savez(output_path, fine_prompt=codes, coarse_prompt=codes[:2, :], semantic_prompt=semantic_tokens)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# That's it! Now you can head over to the generate.ipynb and use your voice_name for the 'history_prompt'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Heres the generation stuff copy-pasted for convenience"
+    "import os\n",
+    "\n",
+    "voice_filename = 'output.npz'\n",
+    "current_path = os.getcwd()\n",
+    "voice_name = os.path.join(current_path, voice_filename)\n",
+    "\n",
+    "np.savez(voice_name, fine_prompt=codes, coarse_prompt=codes[:2, :], semantic_prompt=semantic_tokens)"
    ]
   },
   {
@@ -138,12 +128,10 @@
    "outputs": [],
    "source": [
     "from bark.api import generate_audio\n",
-    "from transformers import BertTokenizer\n",
     "from bark.generation import SAMPLE_RATE, preload_models, codec_decode, generate_coarse, generate_fine, generate_text_semantic\n",
     "\n",
     "# Enter your prompt and speaker here\n",
-    "text_prompt = \"Hello, my name is Serpy. And, uh — and I like pizza. [laughs]\"\n",
-    "voice_name = \"output\" # use your custom voice name here if you have one"
+    "text_prompt = \"Hello, my name is Serpy. And, uh — and I like pizza. [laughs]\""
    ]
   },
   {
@@ -161,8 +149,7 @@
     "    fine_use_gpu=True,\n",
     "    fine_use_small=False,\n",
     "    codec_use_gpu=True,\n",
-    "    force_reload=False,\n",
-    "    path=\"models\"\n",
+    "    force_reload=False\n",
     ")"
    ]
   },
@@ -245,10 +232,8 @@
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.8"
-  },
-  "orig_nbformat": 4
+   "pygments_lexer": "ipython3"
+  }
  },
  "nbformat": 4,
  "nbformat_minor": 2

diff --git a/generate.ipynb b/generate.ipynb
@@ -1,5 +1,14 @@
 {
  "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install git+https://github.com/suno-ai/bark.git"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -163,10 +172,8 @@
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.8"
-  },
-  "orig_nbformat": 4
+   "pygments_lexer": "ipython3"
+  }
  },
  "nbformat": 4,
  "nbformat_minor": 2

diff --git a/generate_chunked.ipynb b/generate_chunked.ipynb
@@ -1,5 +1,14 @@
 {
  "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install git+https://github.com/suno-ai/bark.git"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -334,10 +343,8 @@
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.8"
-  },
-  "orig_nbformat": 4
+   "pygments_lexer": "ipython3"
+  }
  },
  "nbformat": 4,
  "nbformat_minor": 2