Currently train.py is overloaded with many independent features. Understanding the code and what will be the result of a training call requires untangling the entire script. It's also an error prone UX. This is a first step at separating independent parts into their own scripts.
26 lines
695 B
Python
Executable File
26 lines
695 B
Python
Executable File
#!/usr/bin/env python
|
|
import os
|
|
from import_ldc93s1 import _download_and_preprocess_data as download_ldc
|
|
from coqui_stt_training.util.config import initialize_globals_from_args
|
|
from coqui_stt_training.train import train
|
|
from coqui_stt_training.evaluate import test
|
|
|
|
# only one GPU for only one training sample
|
|
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
|
|
|
|
download_ldc("data/ldc93s1")
|
|
|
|
initialize_globals_from_args(
|
|
load_train="init",
|
|
alphabet_config_path="data/alphabet.txt",
|
|
train_files=["data/ldc93s1/ldc93s1.csv"],
|
|
dev_files=["data/ldc93s1/ldc93s1.csv"],
|
|
test_files=["data/ldc93s1/ldc93s1.csv"],
|
|
augment=["time_mask"],
|
|
n_hidden=100,
|
|
epochs=200,
|
|
)
|
|
|
|
train()
|
|
test()
|