|
1132 | 1132 | {
|
1133 | 1133 | "cell_type": "code",
|
1134 | 1134 | "execution_count": null,
|
1135 |
| - "id": "590b9db5", |
| 1135 | + "id": "3637baad", |
1136 | 1136 | "metadata": {},
|
1137 | 1137 | "outputs": [],
|
1138 | 1138 | "source": [
|
1139 | 1139 | "import numpy as np\n",
|
1140 |
| - "import gradio as gr\n", |
| 1140 | + "\n", |
1141 | 1141 | "from bark import SAMPLE_RATE\n",
|
1142 | 1142 | "from bark.generation import SUPPORTED_LANGS\n",
|
1143 | 1143 | "\n",
|
|
1151 | 1151 | "PROMPT_LOOKUP[\"Unconditional\"] = None\n",
|
1152 | 1152 | "PROMPT_LOOKUP[\"Announcer\"] = \"announcer\"\n",
|
1153 | 1153 | "\n",
|
1154 |
| - "default_text = \"Hello, my name is Suno. And, uh — and I like pizza. [laughs]\\nBut I also have other interests such as playing tic tac toe.\"\n", |
1155 |
| - "\n", |
1156 |
| - "title = \"# 🐶 Bark: Text-to-Speech using OpenVINO</div>\"\n", |
1157 |
| - "\n", |
1158 |
| - "description = \"\"\"\n", |
1159 |
| - "Bark is a universal text-to-audio model created by [Suno](http://suno.ai). \\\n", |
1160 |
| - "Bark can generate highly realistic, multilingual speech as well as other audio - including music, background noise and simple sound effects. \\\n", |
1161 |
| - "The model output is not censored and the authors do not endorse the opinions in the generated content. \\\n", |
1162 |
| - "Use at your own risk.\n", |
1163 |
| - "\"\"\"\n", |
1164 |
| - "\n", |
1165 |
| - "article = \"\"\"\n", |
1166 |
| - "\n", |
1167 |
| - "## 🌎 Foreign Language\n", |
1168 |
| - "\n", |
1169 |
| - "Bark supports various languages out-of-the-box and automatically determines language from input text. \\\n", |
1170 |
| - "When prompted with code-switched text, Bark will even attempt to employ the native accent for the respective languages in the same voice.\n", |
1171 |
| - "\n", |
1172 |
| - "Try the prompt:\n", |
1173 |
| - "\n", |
1174 |
| - "```\n", |
1175 |
| - "Buenos días Miguel. Tu colega piensa que tu alemán es extremadamente malo. But I suppose your english isn't terrible.\n", |
1176 |
| - "```\n", |
1177 |
| - "\n", |
1178 |
| - "## 🤭 Non-Speech Sounds\n", |
1179 |
| - "\n", |
1180 |
| - "Below is a list of some known non-speech sounds, but we are finding more every day. \\\n", |
1181 |
| - "Please let us know if you find patterns that work particularly well on Discord!\n", |
1182 |
| - "\n", |
1183 |
| - "* [laughter]\n", |
1184 |
| - "* [laughs]\n", |
1185 |
| - "* [sighs]\n", |
1186 |
| - "* [music]\n", |
1187 |
| - "* [gasps]\n", |
1188 |
| - "* [clears throat]\n", |
1189 |
| - "* — or ... for hesitations\n", |
1190 |
| - "* ♪ for song lyrics\n", |
1191 |
| - "* capitalization for emphasis of a word\n", |
1192 |
| - "* MAN/WOMAN: for bias towards speaker\n", |
1193 |
| - "\n", |
1194 |
| - "Try the prompt:\n", |
1195 |
| - "\n", |
1196 |
| - "```\n", |
1197 |
| - "\" [clears throat] Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as... ♪ singing ♪.\"\n", |
1198 |
| - "```\n", |
1199 |
| - "\n", |
1200 |
| - "## 🎶 Music\n", |
1201 |
| - "Bark can generate all types of audio, and, in principle, doesn't see a difference between speech and music. \\\n", |
1202 |
| - "Sometimes Bark chooses to generate text as music, but you can help it out by adding music notes around your lyrics.\n", |
1203 |
| - "\n", |
1204 |
| - "Try the prompt:\n", |
1205 |
| - "\n", |
1206 |
| - "```\n", |
1207 |
| - "♪ In the jungle, the mighty jungle, the lion barks tonight ♪\n", |
1208 |
| - "```\n", |
1209 |
| - "\n", |
1210 |
| - "## 🧬 Voice Cloning\n", |
1211 |
| - "\n", |
1212 |
| - "Bark has the capability to fully clone voices - including tone, pitch, emotion and prosody. \\\n", |
1213 |
| - "The model also attempts to preserve music, ambient noise, etc. from input audio. \\\n", |
1214 |
| - "However, to mitigate misuse of this technology, we limit the audio history prompts to a limited set of Suno-provided, fully synthetic options to choose from.\n", |
1215 |
| - "\n", |
1216 |
| - "## 👥 Speaker Prompts\n", |
1217 |
| - "\n", |
1218 |
| - "You can provide certain speaker prompts such as NARRATOR, MAN, WOMAN, etc. \\\n", |
1219 |
| - "Please note that these are not always respected, especially if a conflicting audio history prompt is given.\n", |
1220 |
| - "\n", |
1221 |
| - "Try the prompt:\n", |
1222 |
| - "\n", |
1223 |
| - "```\n", |
1224 |
| - "WOMAN: I would like an oatmilk latte please.\n", |
1225 |
| - "MAN: Wow, that's expensive!\n", |
1226 |
| - "```\n", |
1227 |
| - "\n", |
1228 |
| - "\"\"\"\n", |
1229 |
| - "\n", |
1230 |
| - "examples = [\n", |
1231 |
| - " [\n", |
1232 |
| - " \"Please surprise me and speak in whatever voice you enjoy. Vielen Dank und Gesundheit!\",\n", |
1233 |
| - " \"Unconditional\",\n", |
1234 |
| - " ],\n", |
1235 |
| - " [\n", |
1236 |
| - " \"Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as playing tic tac toe.\",\n", |
1237 |
| - " \"Speaker 1 (en)\",\n", |
1238 |
| - " ],\n", |
1239 |
| - " [\n", |
1240 |
| - " \"Buenos días Miguel. Tu colega piensa que tu alemán es extremadamente malo. But I suppose your english isn't terrible.\",\n", |
1241 |
| - " \"Speaker 0 (es)\",\n", |
1242 |
| - " ],\n", |
1243 |
| - "]\n", |
1244 |
| - "\n", |
1245 | 1154 | "\n",
|
1246 | 1155 | "def gen_tts(text, history_prompt):\n",
|
1247 | 1156 | " history_prompt = PROMPT_LOOKUP[history_prompt]\n",
|
1248 | 1157 | " audio_arr = generate_audio(text, history_prompt=history_prompt)\n",
|
1249 | 1158 | " audio_arr = (audio_arr * 32767).astype(np.int16)\n",
|
1250 |
| - " return (SAMPLE_RATE, audio_arr)\n", |
1251 |
| - "\n", |
1252 |
| - "\n", |
1253 |
| - "with gr.Blocks() as block:\n", |
1254 |
| - " gr.Markdown(title)\n", |
1255 |
| - " gr.Markdown(description)\n", |
1256 |
| - " with gr.Row():\n", |
1257 |
| - " with gr.Column():\n", |
1258 |
| - " input_text = gr.Textbox(label=\"Input Text\", lines=2, value=default_text)\n", |
1259 |
| - " options = gr.Dropdown(AVAILABLE_PROMPTS, value=\"Speaker 1 (en)\", label=\"Acoustic Prompt\")\n", |
1260 |
| - " run_button = gr.Button()\n", |
1261 |
| - " with gr.Column():\n", |
1262 |
| - " audio_out = gr.Audio(label=\"Generated Audio\", type=\"numpy\")\n", |
1263 |
| - " inputs = [input_text, options]\n", |
1264 |
| - " outputs = [audio_out]\n", |
1265 |
| - " gr.Examples(examples=examples, fn=gen_tts, inputs=inputs, outputs=outputs)\n", |
1266 |
| - " gr.Markdown(article)\n", |
1267 |
| - " run_button.click(fn=gen_tts, inputs=inputs, outputs=outputs, queue=True)\n", |
| 1159 | + " return (SAMPLE_RATE, audio_arr)" |
| 1160 | + ] |
| 1161 | + }, |
| 1162 | + { |
| 1163 | + "cell_type": "code", |
| 1164 | + "execution_count": null, |
| 1165 | + "id": "590b9db5", |
| 1166 | + "metadata": {}, |
| 1167 | + "outputs": [], |
| 1168 | + "source": [ |
| 1169 | + "import requests\n", |
| 1170 | + "\n", |
| 1171 | + "if not Path(\"gradio_helper.py\").exists():\n", |
| 1172 | + " r = requests.get(url=\"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/bark-text-to-audio/gradio_helper.py\")\n", |
| 1173 | + " open(\"gradio_helper.py\", \"w\").write(r.text)\n", |
| 1174 | + "\n", |
| 1175 | + "from gradio_helper import make_demo\n", |
| 1176 | + "\n", |
| 1177 | + "demo = make_demo(fn=gen_tts, available_prompts=AVAILABLE_PROMPTS)\n", |
| 1178 | + "\n", |
1268 | 1179 | "try:\n",
|
1269 |
| - " block.launch(debug=True)\n", |
| 1180 | + " demo.launch(debug=True)\n", |
1270 | 1181 | "except Exception:\n",
|
1271 |
| - " block.launch(share=True, debug=True)\n", |
| 1182 | + " demo.launch(share=True, debug=True)\n", |
1272 | 1183 | "# if you are launching remotely, specify server_name and server_port\n",
|
1273 | 1184 | "# demo.launch(server_name='your server name', server_port='server port in int')\n",
|
1274 | 1185 | "# Read more in the docs: https://gradio.app/docs/"
|
|
0 commit comments