From 7085fd3ed344f25061c01257df97e8be9f0a3fdd Mon Sep 17 00:00:00 2001 From: Josh Meyer Date: Tue, 14 Sep 2021 11:50:35 -0400 Subject: [PATCH 01/17] Add notebook for CV --- notebooks/README.md | 5 +- ...ing.ipynb => easy_transfer_learning.ipynb} | 0 notebooks/train_with_common_voice.ipynb | 260 ++++++++++++++++++ ...=> train_your_first_coqui_STT_model.ipynb} | 0 4 files changed, 263 insertions(+), 2 deletions(-) rename notebooks/{easy-transfer-learning.ipynb => easy_transfer_learning.ipynb} (100%) create mode 100644 notebooks/train_with_common_voice.ipynb rename notebooks/{train-your-first-coqui-STT-model.ipynb => train_your_first_coqui_STT_model.ipynb} (100%) diff --git a/notebooks/README.md b/notebooks/README.md index 06fd5867..d2be19e4 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -1,4 +1,5 @@ # Python Notebooks for 🐸 STT -1. Train a new Speech-to-Text model from scratch [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/coqui-ai/STT/blob/main/notebooks/train-your-first-coqui-STT-model.ipynb) -2. Transfer learning (English --> Russian) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/coqui-ai/STT/blob/main/notebooks/easy-transfer-learning.ipynb) +1. Train a new Speech-to-Text model from scratch [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/coqui-ai/STT/blob/main/notebooks/train_your_first_coqui-STT_model.ipynb) +2. Transfer learning (English --> Russian) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/coqui-ai/STT/blob/main/notebooks/easy_transfer_learning.ipynb) +2. Train a model with Common Voice data [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/coqui-ai/STT/blob/main/notebooks/train_with_common_voice.ipynb) diff --git a/notebooks/easy-transfer-learning.ipynb b/notebooks/easy_transfer_learning.ipynb similarity index 100% rename from notebooks/easy-transfer-learning.ipynb rename to notebooks/easy_transfer_learning.ipynb diff --git a/notebooks/train_with_common_voice.ipynb b/notebooks/train_with_common_voice.ipynb new file mode 100644 index 00000000..c66e6670 --- /dev/null +++ b/notebooks/train_with_common_voice.ipynb @@ -0,0 +1,260 @@ +{ + "nbformat": 4, + "nbformat_minor": 5, + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + }, + "colab": { + "name": "train-with-common-voice-data.ipynb", + "private_outputs": true, + "provenance": [], + "collapsed_sections": [] + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "f79d99ef" + }, + "source": [ + "# Train a 🐸 STT model with Common Voice data 💫\n", + "\n", + "👋 Hello and welcome to Coqui (🐸) STT \n", + "\n", + "The goal of this notebook is to show you a **typical workflow** for **training** and **testing** an STT model with 🐸 and data from Common Voice.\n", + "\n", + "In this notebook, we will:\n", + "\n", + "1. Download Common Voice data (pre-formatted for 🐸 STT)\n", + "2. Configure the training and testing runs\n", + "3. Train a new model\n", + "4. Test the model and display its performance\n", + "\n", + "So, let's jump right in!\n", + "\n", + "*PS - If you just want a working, off-the-shelf model, check out the [🐸 Model Zoo](https://www.coqui.ai/models)*" + ], + "id": "f79d99ef" + }, + { + "cell_type": "code", + "metadata": { + "id": "fa2aec78" + }, + "source": [ + "## Install Coqui STT\n", + "! pip install -U pip\n", + "! pip install coqui_stt_training\n", + "## Install opus tools\n", + "! apt-get install libopusfile0 libopus-dev libopusfile-dev" + ], + "id": "fa2aec78", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "be5fe49c" + }, + "source": [ + "## ✅ Download & format sample data for English\n", + "\n", + "**First things first**: we need some data.\n", + "\n", + "We're training a Speech-to-Text model, so we need some _speech_ and we need some _text_. Specificially, we want _transcribed speech_. Let's download some audio and transcripts.\n", + "\n", + "🐸 STT expects to find information about your data in a CSV file, where each line contains:\n", + "\n", + "1. the **path** to an audio file\n", + "2. the **size** of that audio file\n", + "3. the **transcript** of that audio file.\n", + "\n", + "To focus on model training, we formatted the Common Voice data for you already, and you will find CSV files for `{train,test,dev}.csv` in the data directory.\n", + "\n", + "Let's train a speech-to-text model 😊\n" + ], + "id": "be5fe49c" + }, + { + "cell_type": "code", + "metadata": { + "scrolled": true, + "id": "53945462" + }, + "source": [ + "### Download pre-formatted Common Voice data\n", + "! wget https://coqui-ai-public-data.s3.amazonaws.com/cv/7.0/kk-data.tar\n", + "! tar -xf kk-data.tar" + ], + "id": "53945462", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "96e8b708" + }, + "source": [ + "### 👀 Take a look at the data" + ], + "id": "96e8b708" + }, + { + "cell_type": "code", + "metadata": { + "id": "fa2aec77" + }, + "source": [ + "! ls kk-data\n", + "! wc -l kk-data/*.csv" + ], + "id": "fa2aec77", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "d9dfac21" + }, + "source": [ + "## ✅ Configure & set hyperparameters\n", + "\n", + "Coqui STT comes with a long list of hyperparameters you can tweak. We've set default values, but you will often want to set your own. You can use `initialize_globals_from_args()` to do this. \n", + "\n", + "You must **always** configure the paths to your data, and you must **always** configure your alphabet. Additionally, here we show how you can specify the size of hidden layers (`n_hidden`), the number of epochs to train for (`epochs`), and to initialize a new model from scratch (`load_train=\"init\"`).\n", + "\n", + "If you're training on a GPU, you can uncomment the (larger) training batch sizes for faster training." + ], + "id": "d9dfac21" + }, + { + "cell_type": "code", + "metadata": { + "id": "d264fdec" + }, + "source": [ + "from coqui_stt_training.util.config import initialize_globals_from_args\n", + "\n", + "initialize_globals_from_args(\n", + " train_files=[\"kk-data/train.csv\"],\n", + " dev_files=[\"kk-data/dev.csv\"],\n", + " test_files=[\"kk-data/test.csv\"],\n", + " load_train=\"init\",\n", + " n_hidden=200,\n", + " epochs=1,\n", + " beam_width=1,\n", + " #train_batch_size=128,\n", + " #dev_batch_size=128,\n", + " #test_batch_size=128,\n", + ")" + ], + "id": "d264fdec", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "799c1425" + }, + "source": [ + "### 👀 View all config settings" + ], + "id": "799c1425" + }, + { + "cell_type": "code", + "metadata": { + "id": "03b33d2b" + }, + "source": [ + "from coqui_stt_training.util.config import Config\n", + "\n", + "print(Config.to_json())" + ], + "id": "03b33d2b", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ae82fd75" + }, + "source": [ + "## ✅ Train a new model\n", + "\n", + "Let's kick off a training run 🚀🚀🚀 (using the configure you set above).\n", + "\n", + "This notebook should work on either a GPU or a CPU. However, in case you're running this on _multiple_ GPUs we want to only use one, because the sample dataset (one audio file) is too small to split across multiple GPUs." + ], + "id": "ae82fd75" + }, + { + "cell_type": "code", + "metadata": { + "scrolled": true, + "id": "550a504e" + }, + "source": [ + "from coqui_stt_training.train import train\n", + "\n", + "train()" + ], + "id": "550a504e", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9f6dc959" + }, + "source": [ + "## ✅ Test the model\n", + "\n", + "We made it! 🙌\n", + "\n", + "Let's kick off the testing run, which displays performance metrics.\n", + "\n", + "The settings we used here are for demonstration purposes, so you don't want to deploy this model into production. In this notebook we're focusing on the workflow itself, so it's forgivable 😇\n", + "\n", + "You can still train a more State-of-the-Art model by finding better hyperparameters, so go for it 💪" + ], + "id": "9f6dc959" + }, + { + "cell_type": "code", + "metadata": { + "id": "dd42bc7a" + }, + "source": [ + "from coqui_stt_training.evaluate import test\n", + "\n", + "test()" + ], + "id": "dd42bc7a", + "execution_count": null, + "outputs": [] + } + ] +} diff --git a/notebooks/train-your-first-coqui-STT-model.ipynb b/notebooks/train_your_first_coqui_STT_model.ipynb similarity index 100% rename from notebooks/train-your-first-coqui-STT-model.ipynb rename to notebooks/train_your_first_coqui_STT_model.ipynb From 5201c2a10c8f41ff6b3ea046e68ee93062f08c9c Mon Sep 17 00:00:00 2001 From: Josh Meyer Date: Wed, 15 Sep 2021 04:02:08 -0400 Subject: [PATCH 02/17] Install STT from pypi in notebook --- notebooks/train_your_first_coqui_STT_model.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/notebooks/train_your_first_coqui_STT_model.ipynb b/notebooks/train_your_first_coqui_STT_model.ipynb index 506f6e3b..26961b86 100644 --- a/notebooks/train_your_first_coqui_STT_model.ipynb +++ b/notebooks/train_your_first_coqui_STT_model.ipynb @@ -32,9 +32,9 @@ "metadata": {}, "outputs": [], "source": [ - "## Install Coqui STT if you need to\n", - "!git clone --depth 1 https://github.com/coqui-ai/STT.git\n", - "!cd STT; pip install -U pip wheel setuptools; pip install ." + "## Install Coqui STT\n", + "! pip install -U pip\n", + "! pip install coqui_stt_training" ] }, { From 903c2b4acae9a72c728d415e9bf4784ba43ab91b Mon Sep 17 00:00:00 2001 From: Josh Meyer Date: Wed, 15 Sep 2021 04:03:27 -0400 Subject: [PATCH 03/17] Install STT from pypi in notebook --- notebooks/easy_transfer_learning.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/notebooks/easy_transfer_learning.ipynb b/notebooks/easy_transfer_learning.ipynb index 792bdd98..c46f06ca 100644 --- a/notebooks/easy_transfer_learning.ipynb +++ b/notebooks/easy_transfer_learning.ipynb @@ -34,9 +34,9 @@ "metadata": {}, "outputs": [], "source": [ - "## Install Coqui STT if you need to\n", - "!git clone --depth 1 https://github.com/coqui-ai/STT.git\n", - "!cd STT; pip install -U pip wheel setuptools; pip install ." + "## Install Coqui STT\n", + "! pip install -U pip\n", + "! pip install coqui_stt_training" ] }, { From 0e8920ed6365eb79ffd68a73a3909a95ac59bfd5 Mon Sep 17 00:00:00 2001 From: Josh Meyer Date: Wed, 15 Sep 2021 04:09:00 -0400 Subject: [PATCH 04/17] Use table to organize notebooks --- notebooks/README.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/notebooks/README.md b/notebooks/README.md index d2be19e4..81474be6 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -1,5 +1,7 @@ # Python Notebooks for 🐸 STT -1. Train a new Speech-to-Text model from scratch [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/coqui-ai/STT/blob/main/notebooks/train_your_first_coqui-STT_model.ipynb) -2. Transfer learning (English --> Russian) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/coqui-ai/STT/blob/main/notebooks/easy_transfer_learning.ipynb) -2. Train a model with Common Voice data [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/coqui-ai/STT/blob/main/notebooks/train_with_common_voice.ipynb) +| Notebook title | Language(s) | Link to Colab | +|----------------|---------------|-------------| +|Train your first 🐸 STT model | English | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/coqui-ai/STT/blob/main/notebooks/train_your_first_coqui-STT_model.ipynb) | +|Easy Transfer learning | English --> Russian | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/coqui-ai/STT/blob/main/notebooks/easy_transfer_learning.ipynb)| +| Train a model with Common Voice | Kazakh | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/coqui-ai/STT/blob/main/notebooks/train_with_common_voice.ipynb) | From 2729da33a8d2ec3835b61c5fe819e74ef1f5c4c0 Mon Sep 17 00:00:00 2001 From: Josh Meyer Date: Wed, 15 Sep 2021 06:54:25 -0400 Subject: [PATCH 05/17] More notebook work --- notebooks/easy_transfer_learning.ipynb | 2 +- notebooks/train_with_common_voice.ipynb | 51 +++++++++++-------- .../train_your_first_coqui_STT_model.ipynb | 2 +- 3 files changed, 33 insertions(+), 22 deletions(-) diff --git a/notebooks/easy_transfer_learning.ipynb b/notebooks/easy_transfer_learning.ipynb index c46f06ca..0ce1466d 100644 --- a/notebooks/easy_transfer_learning.ipynb +++ b/notebooks/easy_transfer_learning.ipynb @@ -147,7 +147,7 @@ " alphabet_config_path=\"russian/alphabet.txt\",\n", " train_files=[\"russian/ru.csv\"],\n", " dev_files=[\"russian/ru.csv\"],\n", - " epochs=200,\n", + " epochs=100,\n", " load_cudnn=True,\n", ")" ] diff --git a/notebooks/train_with_common_voice.ipynb b/notebooks/train_with_common_voice.ipynb index c66e6670..56160c5e 100644 --- a/notebooks/train_with_common_voice.ipynb +++ b/notebooks/train_with_common_voice.ipynb @@ -92,21 +92,32 @@ ], "id": "be5fe49c" }, - { - "cell_type": "code", - "metadata": { - "scrolled": true, - "id": "53945462" - }, - "source": [ - "### Download pre-formatted Common Voice data\n", - "! wget https://coqui-ai-public-data.s3.amazonaws.com/cv/7.0/kk-data.tar\n", - "! tar -xf kk-data.tar" - ], - "id": "53945462", - "execution_count": null, - "outputs": [] - }, + { + "cell_type": "code", + "execution_count": null, + "id": "608d203f", + "metadata": {}, + "outputs": [], + "source": [ + "### Download pre-formatted Common Voice data\n", + "import os\n", + "import tarfile\n", + "from coqui_stt_training.util.downloader import maybe_download\n", + "\n", + "def download_preformatted_data():\n", + " if not os.path.exists(\"data/sr-data\"):\n", + " maybe_download(\"sr-data.tar\", \"data/\", \"https://coqui-ai-public-data.s3.amazonaws.com/cv/7.0/sr-data.tar\")\n", + " print('\\nNo extracted data found. Extracting now...')\n", + " tar = tarfile.open(\"data/sr-data.tar\", mode=\"r:\")\n", + " tar.extractall(\"data/\")\n", + " tar.close()\n", + " else:\n", + " print('Found \"data/sr-data\" - not extracting.')\n", + "\n", + "# Download + extract Common Voice data\n", + "download_preformatted_data()" + ] + }, { "cell_type": "markdown", "metadata": { @@ -123,8 +134,8 @@ "id": "fa2aec77" }, "source": [ - "! ls kk-data\n", - "! wc -l kk-data/*.csv" + "! ls data/sr-data\n", + "! wc -l data/sr-data/*.csv" ], "id": "fa2aec77", "execution_count": null, @@ -155,9 +166,9 @@ "from coqui_stt_training.util.config import initialize_globals_from_args\n", "\n", "initialize_globals_from_args(\n", - " train_files=[\"kk-data/train.csv\"],\n", - " dev_files=[\"kk-data/dev.csv\"],\n", - " test_files=[\"kk-data/test.csv\"],\n", + " train_files=[\"data/sr-data/train.csv\"],\n", + " dev_files=[\"data/sr-data/dev.csv\"],\n", + " test_files=[\"data/sr-data/test.csv\"],\n", " load_train=\"init\",\n", " n_hidden=200,\n", " epochs=1,\n", diff --git a/notebooks/train_your_first_coqui_STT_model.ipynb b/notebooks/train_your_first_coqui_STT_model.ipynb index 26961b86..c53e2b8f 100644 --- a/notebooks/train_your_first_coqui_STT_model.ipynb +++ b/notebooks/train_your_first_coqui_STT_model.ipynb @@ -151,7 +151,7 @@ " test_files=[\"english/ldc93s1.csv\"],\n", " load_train=\"init\",\n", " n_hidden=100,\n", - " epochs=200,\n", + " epochs=100,\n", ")" ] }, From cbd3db9d288cc1b3e3cfb4c77f0caafe4eb293c0 Mon Sep 17 00:00:00 2001 From: Josh Meyer Date: Wed, 15 Sep 2021 07:16:08 -0400 Subject: [PATCH 06/17] Cosmetic notebook changes --- notebooks/train_with_common_voice.ipynb | 41 ++++++++++--------------- 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/notebooks/train_with_common_voice.ipynb b/notebooks/train_with_common_voice.ipynb index 56160c5e..3c4989f9 100644 --- a/notebooks/train_with_common_voice.ipynb +++ b/notebooks/train_with_common_voice.ipynb @@ -37,7 +37,7 @@ "\n", "👋 Hello and welcome to Coqui (🐸) STT \n", "\n", - "The goal of this notebook is to show you a **typical workflow** for **training** and **testing** an STT model with 🐸 and data from Common Voice.\n", + "This notebook shows a **typical workflow** for **training** and **testing** an 🐸 STT model on data from Common Voice.\n", "\n", "In this notebook, we will:\n", "\n", @@ -74,21 +74,15 @@ "id": "be5fe49c" }, "source": [ - "## ✅ Download & format sample data for English\n", + "## ✅ Download & format sample data for Serbian\n", "\n", "**First things first**: we need some data.\n", "\n", - "We're training a Speech-to-Text model, so we need some _speech_ and we need some _text_. Specificially, we want _transcribed speech_. Let's download some audio and transcripts.\n", - "\n", - "🐸 STT expects to find information about your data in a CSV file, where each line contains:\n", - "\n", - "1. the **path** to an audio file\n", - "2. the **size** of that audio file\n", - "3. the **transcript** of that audio file.\n", + "We're training a Speech-to-Text model, so we want _speech_ and we want _text_. Specificially, we want _transcribed speech_. Let's download some audio and transcripts.\n", "\n", "To focus on model training, we formatted the Common Voice data for you already, and you will find CSV files for `{train,test,dev}.csv` in the data directory.\n", "\n", - "Let's train a speech-to-text model 😊\n" + "Let's download some data for Serbian 😊\n" ], "id": "be5fe49c" }, @@ -105,14 +99,15 @@ "from coqui_stt_training.util.downloader import maybe_download\n", "\n", "def download_preformatted_data():\n", - " if not os.path.exists(\"data/sr-data\"):\n", - " maybe_download(\"sr-data.tar\", \"data/\", \"https://coqui-ai-public-data.s3.amazonaws.com/cv/7.0/sr-data.tar\")\n", - " print('\\nNo extracted data found. Extracting now...')\n", - " tar = tarfile.open(\"data/sr-data.tar\", mode=\"r:\")\n", - " tar.extractall(\"data/\")\n", + " if not os.path.exists(\"sr-data\"):\n", + " maybe_download(\"sr-data.tar\", \".\", \"https://coqui-ai-public-data.s3.amazonaws.com/cv/7.0/sr-data.tar\")\n", + " print('\\nExtracting data...')\n", + " tar = tarfile.open(\"sr-data.tar\", mode=\"r:\")\n", + " tar.extractall(\".\")\n", " tar.close()\n", + " print('\\nFinished extracting data...')\n", " else:\n", - " print('Found \"data/sr-data\" - not extracting.')\n", + " print('Found data - not extracting.')\n", "\n", "# Download + extract Common Voice data\n", "download_preformatted_data()" @@ -134,8 +129,8 @@ "id": "fa2aec77" }, "source": [ - "! ls data/sr-data\n", - "! wc -l data/sr-data/*.csv" + "! ls sr-data\n", + "! wc -l sr-data/*.csv" ], "id": "fa2aec77", "execution_count": null, @@ -166,9 +161,9 @@ "from coqui_stt_training.util.config import initialize_globals_from_args\n", "\n", "initialize_globals_from_args(\n", - " train_files=[\"data/sr-data/train.csv\"],\n", - " dev_files=[\"data/sr-data/dev.csv\"],\n", - " test_files=[\"data/sr-data/test.csv\"],\n", + " train_files=[\"sr-data/train.csv\"],\n", + " dev_files=[\"sr-data/dev.csv\"],\n", + " test_files=[\"sr-data/test.csv\"],\n", " load_train=\"init\",\n", " n_hidden=200,\n", " epochs=1,\n", @@ -214,9 +209,7 @@ "source": [ "## ✅ Train a new model\n", "\n", - "Let's kick off a training run 🚀🚀🚀 (using the configure you set above).\n", - "\n", - "This notebook should work on either a GPU or a CPU. However, in case you're running this on _multiple_ GPUs we want to only use one, because the sample dataset (one audio file) is too small to split across multiple GPUs." + "Let's kick off a training run 🚀🚀🚀 (using the configure you set above).", ], "id": "ae82fd75" }, From f6a64e7dd880811af63aa8deaef28d2f3c038102 Mon Sep 17 00:00:00 2001 From: Josh Meyer Date: Wed, 15 Sep 2021 07:19:35 -0400 Subject: [PATCH 07/17] Typo --- notebooks/train_with_common_voice.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/train_with_common_voice.ipynb b/notebooks/train_with_common_voice.ipynb index 3c4989f9..7bb845b7 100644 --- a/notebooks/train_with_common_voice.ipynb +++ b/notebooks/train_with_common_voice.ipynb @@ -209,7 +209,7 @@ "source": [ "## ✅ Train a new model\n", "\n", - "Let's kick off a training run 🚀🚀🚀 (using the configure you set above).", + "Let's kick off a training run 🚀🚀🚀 (using the configure you set above)." ], "id": "ae82fd75" }, From 8a3cea8b6d67e123c548f3d59470869ce5ba11ac Mon Sep 17 00:00:00 2001 From: Josh Meyer Date: Wed, 15 Sep 2021 07:57:53 -0400 Subject: [PATCH 08/17] Cosmetic changes --- notebooks/train_your_first_coqui_STT_model.ipynb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/notebooks/train_your_first_coqui_STT_model.ipynb b/notebooks/train_your_first_coqui_STT_model.ipynb index c53e2b8f..df885b2d 100644 --- a/notebooks/train_your_first_coqui_STT_model.ipynb +++ b/notebooks/train_your_first_coqui_STT_model.ipynb @@ -54,9 +54,9 @@ "2. the **size** of that audio file\n", "3. the **transcript** of that audio file.\n", "\n", - "Formatting the audio and transcript isn't too difficult in this case. We define a custom data importer called `download_sample_data()` which does all the work. If you have a custom dataset, you will probably want to write a custom data importer.\n", + "Formatting the audio and transcript isn't too difficult in this case. We define `download_sample_data()` which does all the work. If you have a custom dataset, you will want to write a custom data importer.\n", "\n", - "**Second things second**: we want an alphabet. The output layer of a typical* 🐸 STT model represents letters in the alphabet, and you should specify this alphabet before training. Let's download an English alphabet from Coqui and use that.\n", + "**Second things second**: we want an alphabet. The output layer of a typical* 🐸 STT model represents letters in the alphabet. Let's download an English alphabet from Coqui and use that.\n", "\n", "*_If you are working with languages with large character sets (e.g. Chinese), you can set `bytes_output_mode=True` instead of supplying an `alphabet.txt` file. In this case, the output layer of the STT model will correspond to individual UTF-8 bytes instead of individual characters._" ] @@ -98,7 +98,7 @@ "id": "96e8b708", "metadata": {}, "source": [ - "### Take a look at the data (*Optional* )" + "### 👀 Take a look at the data" ] }, { @@ -150,7 +150,7 @@ " dev_files=[\"english/ldc93s1.csv\"],\n", " test_files=[\"english/ldc93s1.csv\"],\n", " load_train=\"init\",\n", - " n_hidden=100,\n", + " n_hidden=200,\n", " epochs=100,\n", ")" ] @@ -160,7 +160,7 @@ "id": "799c1425", "metadata": {}, "source": [ - "### View all Config settings (*Optional*) " + "### 👀 View all Config settings" ] }, { From 6405bd1758cf8d985d0bb53b20f6f32f5783ab35 Mon Sep 17 00:00:00 2001 From: Josh Meyer Date: Wed, 15 Sep 2021 09:08:12 -0400 Subject: [PATCH 09/17] Add CI tests for notebooks --- ci_scripts/notebook-tests.sh | 14 ++++++++++++++ notebooks/train_with_common_voice.ipynb | 19 ++++++++++--------- 2 files changed, 24 insertions(+), 9 deletions(-) create mode 100755 ci_scripts/notebook-tests.sh diff --git a/ci_scripts/notebook-tests.sh b/ci_scripts/notebook-tests.sh new file mode 100755 index 00000000..3872b14c --- /dev/null +++ b/ci_scripts/notebook-tests.sh @@ -0,0 +1,14 @@ +#!/bin/bash +set -xe + +source $(dirname "$0")/all-vars.sh +source $(dirname "$0")/all-utils.sh + +set -o pipefail +pip install --upgrade pip setuptools wheel | cat +pip install --upgrade . | cat +set +o pipefail + +for python_notebook in ./notebooks/*.ipynb; do + time jupyter nbconvert --to notebook --execute $python_notebook +done diff --git a/notebooks/train_with_common_voice.ipynb b/notebooks/train_with_common_voice.ipynb index 7bb845b7..b492f4f2 100644 --- a/notebooks/train_with_common_voice.ipynb +++ b/notebooks/train_with_common_voice.ipynb @@ -99,11 +99,11 @@ "from coqui_stt_training.util.downloader import maybe_download\n", "\n", "def download_preformatted_data():\n", - " if not os.path.exists(\"sr-data\"):\n", - " maybe_download(\"sr-data.tar\", \".\", \"https://coqui-ai-public-data.s3.amazonaws.com/cv/7.0/sr-data.tar\")\n", + " if not os.path.exists(\"serbian/sr-data\"):\n", + " maybe_download(\"sr-data.tar\", \"serbian/\", \"https://coqui-ai-public-data.s3.amazonaws.com/cv/7.0/sr-data.tar\")\n", " print('\\nExtracting data...')\n", - " tar = tarfile.open(\"sr-data.tar\", mode=\"r:\")\n", - " tar.extractall(\".\")\n", + " tar = tarfile.open(\"serbian/sr-data.tar\", mode=\"r:\")\n", + " tar.extractall(\"serbian/\")\n", " tar.close()\n", " print('\\nFinished extracting data...')\n", " else:\n", @@ -129,8 +129,8 @@ "id": "fa2aec77" }, "source": [ - "! ls sr-data\n", - "! wc -l sr-data/*.csv" + "! ls serbian/sr-data\n", + "! wc -l serbian/sr-data/*.csv" ], "id": "fa2aec77", "execution_count": null, @@ -161,9 +161,10 @@ "from coqui_stt_training.util.config import initialize_globals_from_args\n", "\n", "initialize_globals_from_args(\n", - " train_files=[\"sr-data/train.csv\"],\n", - " dev_files=[\"sr-data/dev.csv\"],\n", - " test_files=[\"sr-data/test.csv\"],\n", + " train_files=[\"serbian/sr-data/train.csv\"],\n", + " dev_files=[\"serbian/sr-data/dev.csv\"],\n", + " test_files=[\"serbian/sr-data/test.csv\"],\n", + " checkpoint_dir=\"serbian/checkpoints/\",\n", " load_train=\"init\",\n", " n_hidden=200,\n", " epochs=1,\n", From bd7809421d8f0aa757ce3fd8032c0427f6f9b40e Mon Sep 17 00:00:00 2001 From: Josh Meyer Date: Wed, 15 Sep 2021 09:19:46 -0400 Subject: [PATCH 10/17] Add notebooks to CI workflow --- .github/workflows/build-and-test.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 70429f90..6120df29 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -695,6 +695,9 @@ jobs: mv convert_graphdef_memmapped_format /tmp - run: | mkdir -p ${CI_ARTIFACTS_DIR} || true + - name: Run python notebooks + run: | + ./ci_scripts/notebook-tests.sh - name: Run basic training tests run: | python -m pip install coqui_stt_ctcdecoder-*.whl From 242d2eff2cbad2eea44784b7c4e8b19f126dfd8a Mon Sep 17 00:00:00 2001 From: Josh Meyer Date: Wed, 15 Sep 2021 10:09:05 -0400 Subject: [PATCH 11/17] Add missing jupyter install in CI --- .github/workflows/build-and-test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 9d503998..6f97e022 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -697,6 +697,7 @@ jobs: mkdir -p ${CI_ARTIFACTS_DIR} || true - name: Run python notebooks run: | + python -m pip install jupyter ./ci_scripts/notebook-tests.sh - name: Run basic training tests run: | From 1a55ce8078d6c9f0880e39625c223768cc9c09e5 Mon Sep 17 00:00:00 2001 From: Josh Meyer Date: Wed, 15 Sep 2021 12:03:30 -0400 Subject: [PATCH 12/17] Add missing opus tools to CI --- .github/workflows/build-and-test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 6f97e022..a0109a41 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -697,6 +697,7 @@ jobs: mkdir -p ${CI_ARTIFACTS_DIR} || true - name: Run python notebooks run: | + apt-get install libopusfile0 libopus-dev libopusfile-dev python -m pip install jupyter ./ci_scripts/notebook-tests.sh - name: Run basic training tests From be7500c8b779cac2f167b6adc1b9001773d8a67a Mon Sep 17 00:00:00 2001 From: Josh Meyer Date: Wed, 15 Sep 2021 12:04:34 -0400 Subject: [PATCH 13/17] Fix Typo --- notebooks/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/README.md b/notebooks/README.md index 81474be6..e7fdd3a8 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -4,4 +4,4 @@ |----------------|---------------|-------------| |Train your first 🐸 STT model | English | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/coqui-ai/STT/blob/main/notebooks/train_your_first_coqui-STT_model.ipynb) | |Easy Transfer learning | English --> Russian | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/coqui-ai/STT/blob/main/notebooks/easy_transfer_learning.ipynb)| -| Train a model with Common Voice | Kazakh | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/coqui-ai/STT/blob/main/notebooks/train_with_common_voice.ipynb) | +| Train a model with Common Voice | Serbian | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/coqui-ai/STT/blob/main/notebooks/train_with_common_voice.ipynb) | From 90d4e43c5821c5b3081e0b6f97f6a022bab74890 Mon Sep 17 00:00:00 2001 From: Josh Meyer Date: Wed, 15 Sep 2021 12:22:05 -0400 Subject: [PATCH 14/17] Use sudo for installing opus things --- .github/workflows/build-and-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index a0109a41..343551cc 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -697,7 +697,7 @@ jobs: mkdir -p ${CI_ARTIFACTS_DIR} || true - name: Run python notebooks run: | - apt-get install libopusfile0 libopus-dev libopusfile-dev + sudo apt-get install -y --no-install-recommends libopusfile0 libopus-dev libopusfile-dev python -m pip install jupyter ./ci_scripts/notebook-tests.sh - name: Run basic training tests From c78f98a7bc81c3f830b540c1cfac071b0496c4fd Mon Sep 17 00:00:00 2001 From: Josh Meyer Date: Thu, 16 Sep 2021 03:19:09 -0400 Subject: [PATCH 15/17] Add separate job to CI for notebook tests --- .github/workflows/build-and-test.yml | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 343551cc..8e97712c 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -670,6 +670,22 @@ jobs: bitrate: ${{ matrix.bitrate }} model-kind: ${{ matrix.models }} timeout-minutes: 5 + python-notebooks-tests: + name: "Lin|Python notebook tests" + runs-on: ubuntu-20.04 + if: ${{ github.event_name == 'pull_request' }} + strategy: + matrix: + pyver: [3.6, 3.7] + steps: + - uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.pyver }} + - name: Run python notebooks + run: | + sudo apt-get install -y --no-install-recommends libopusfile0 libopus-dev libopusfile-dev + python -m pip install jupyter + ./ci_scripts/notebook-tests.sh training-basic-tests: name: "Lin|Basic training tests" runs-on: ubuntu-20.04 @@ -695,11 +711,6 @@ jobs: mv convert_graphdef_memmapped_format /tmp - run: | mkdir -p ${CI_ARTIFACTS_DIR} || true - - name: Run python notebooks - run: | - sudo apt-get install -y --no-install-recommends libopusfile0 libopus-dev libopusfile-dev - python -m pip install jupyter - ./ci_scripts/notebook-tests.sh - name: Run basic training tests run: | python -m pip install coqui_stt_ctcdecoder-*.whl From 8cfc1163e284599aaf0ef30cf76d87e580289006 Mon Sep 17 00:00:00 2001 From: Josh Meyer Date: Thu, 16 Sep 2021 03:28:11 -0400 Subject: [PATCH 16/17] Add checkout action --- .github/workflows/build-and-test.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 8e97712c..e5eb3103 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -674,13 +674,13 @@ jobs: name: "Lin|Python notebook tests" runs-on: ubuntu-20.04 if: ${{ github.event_name == 'pull_request' }} - strategy: - matrix: - pyver: [3.6, 3.7] steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 1 - uses: actions/setup-python@v2 with: - python-version: ${{ matrix.pyver }} + python-version: 3.8 - name: Run python notebooks run: | sudo apt-get install -y --no-install-recommends libopusfile0 libopus-dev libopusfile-dev From 7cbe879fc6fea4221df4882e645dbb99c80b1425 Mon Sep 17 00:00:00 2001 From: Josh Meyer Date: Thu, 16 Sep 2021 03:36:32 -0400 Subject: [PATCH 17/17] Use python 3.7, not 3.8 --- .github/workflows/build-and-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index e5eb3103..ab71966c 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -680,7 +680,7 @@ jobs: fetch-depth: 1 - uses: actions/setup-python@v2 with: - python-version: 3.8 + python-version: 3.7 - name: Run python notebooks run: | sudo apt-get install -y --no-install-recommends libopusfile0 libopus-dev libopusfile-dev