diff --git a/notebooks/train_with_common_voice.ipynb b/notebooks/train_with_common_voice.ipynb index 56160c5e..3c4989f9 100644 --- a/notebooks/train_with_common_voice.ipynb +++ b/notebooks/train_with_common_voice.ipynb @@ -37,7 +37,7 @@ "\n", "👋 Hello and welcome to Coqui (🐸) STT \n", "\n", - "The goal of this notebook is to show you a **typical workflow** for **training** and **testing** an STT model with 🐸 and data from Common Voice.\n", + "This notebook shows a **typical workflow** for **training** and **testing** an 🐸 STT model on data from Common Voice.\n", "\n", "In this notebook, we will:\n", "\n", @@ -74,21 +74,15 @@ "id": "be5fe49c" }, "source": [ - "## ✅ Download & format sample data for English\n", + "## ✅ Download & format sample data for Serbian\n", "\n", "**First things first**: we need some data.\n", "\n", - "We're training a Speech-to-Text model, so we need some _speech_ and we need some _text_. Specificially, we want _transcribed speech_. Let's download some audio and transcripts.\n", - "\n", - "🐸 STT expects to find information about your data in a CSV file, where each line contains:\n", - "\n", - "1. the **path** to an audio file\n", - "2. the **size** of that audio file\n", - "3. the **transcript** of that audio file.\n", + "We're training a Speech-to-Text model, so we want _speech_ and we want _text_. Specificially, we want _transcribed speech_. Let's download some audio and transcripts.\n", "\n", "To focus on model training, we formatted the Common Voice data for you already, and you will find CSV files for `{train,test,dev}.csv` in the data directory.\n", "\n", - "Let's train a speech-to-text model 😊\n" + "Let's download some data for Serbian 😊\n" ], "id": "be5fe49c" }, @@ -105,14 +99,15 @@ "from coqui_stt_training.util.downloader import maybe_download\n", "\n", "def download_preformatted_data():\n", - " if not os.path.exists(\"data/sr-data\"):\n", - " maybe_download(\"sr-data.tar\", \"data/\", \"https://coqui-ai-public-data.s3.amazonaws.com/cv/7.0/sr-data.tar\")\n", - " print('\\nNo extracted data found. Extracting now...')\n", - " tar = tarfile.open(\"data/sr-data.tar\", mode=\"r:\")\n", - " tar.extractall(\"data/\")\n", + " if not os.path.exists(\"sr-data\"):\n", + " maybe_download(\"sr-data.tar\", \".\", \"https://coqui-ai-public-data.s3.amazonaws.com/cv/7.0/sr-data.tar\")\n", + " print('\\nExtracting data...')\n", + " tar = tarfile.open(\"sr-data.tar\", mode=\"r:\")\n", + " tar.extractall(\".\")\n", " tar.close()\n", + " print('\\nFinished extracting data...')\n", " else:\n", - " print('Found \"data/sr-data\" - not extracting.')\n", + " print('Found data - not extracting.')\n", "\n", "# Download + extract Common Voice data\n", "download_preformatted_data()" @@ -134,8 +129,8 @@ "id": "fa2aec77" }, "source": [ - "! ls data/sr-data\n", - "! wc -l data/sr-data/*.csv" + "! ls sr-data\n", + "! wc -l sr-data/*.csv" ], "id": "fa2aec77", "execution_count": null, @@ -166,9 +161,9 @@ "from coqui_stt_training.util.config import initialize_globals_from_args\n", "\n", "initialize_globals_from_args(\n", - " train_files=[\"data/sr-data/train.csv\"],\n", - " dev_files=[\"data/sr-data/dev.csv\"],\n", - " test_files=[\"data/sr-data/test.csv\"],\n", + " train_files=[\"sr-data/train.csv\"],\n", + " dev_files=[\"sr-data/dev.csv\"],\n", + " test_files=[\"sr-data/test.csv\"],\n", " load_train=\"init\",\n", " n_hidden=200,\n", " epochs=1,\n", @@ -214,9 +209,7 @@ "source": [ "## ✅ Train a new model\n", "\n", - "Let's kick off a training run 🚀🚀🚀 (using the configure you set above).\n", - "\n", - "This notebook should work on either a GPU or a CPU. However, in case you're running this on _multiple_ GPUs we want to only use one, because the sample dataset (one audio file) is too small to split across multiple GPUs." + "Let's kick off a training run 🚀🚀🚀 (using the configure you set above).", ], "id": "ae82fd75" },