Sort importer imports with isort
This commit is contained in:
parent
20b0ab17ea
commit
b7e6b8c3e6
4
.isort.cfg
Normal file
4
.isort.cfg
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
[settings]
|
||||||
|
line_length=80
|
||||||
|
multi_line_output=3
|
||||||
|
default_section=FIRSTPARTY
|
@ -6,11 +6,19 @@ Use "python3 build_sdb.py -h" for help
|
|||||||
from __future__ import absolute_import, division, print_function
|
from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
import progressbar
|
import progressbar
|
||||||
|
|
||||||
|
from deepspeech_training.util.audio import (
|
||||||
|
AUDIO_TYPE_OPUS,
|
||||||
|
AUDIO_TYPE_WAV,
|
||||||
|
change_audio_types
|
||||||
|
)
|
||||||
from deepspeech_training.util.downloader import SIMPLE_BAR
|
from deepspeech_training.util.downloader import SIMPLE_BAR
|
||||||
from deepspeech_training.util.audio import change_audio_types, AUDIO_TYPE_WAV, AUDIO_TYPE_OPUS
|
from deepspeech_training.util.sample_collections import (
|
||||||
from deepspeech_training.util.sample_collections import samples_from_files, DirectSDBWriter
|
DirectSDBWriter,
|
||||||
|
samples_from_files
|
||||||
|
)
|
||||||
|
|
||||||
AUDIO_TYPE_LOOKUP = {
|
AUDIO_TYPE_LOOKUP = {
|
||||||
'wav': AUDIO_TYPE_WAV,
|
'wav': AUDIO_TYPE_WAV,
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import tensorflow.compat.v1 as tfv1
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
import tensorflow.compat.v1 as tfv1
|
||||||
from google.protobuf import text_format
|
from google.protobuf import text_format
|
||||||
|
|
||||||
|
|
||||||
|
@ -3,9 +3,10 @@ from __future__ import absolute_import, division, print_function
|
|||||||
|
|
||||||
import glob
|
import glob
|
||||||
import os
|
import os
|
||||||
import pandas
|
|
||||||
import tarfile
|
import tarfile
|
||||||
|
|
||||||
|
import pandas
|
||||||
|
|
||||||
from deepspeech_training.util.importers import get_importers_parser
|
from deepspeech_training.util.importers import get_importers_parser
|
||||||
|
|
||||||
COLUMN_NAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
COLUMN_NAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
||||||
|
@ -3,9 +3,10 @@ from __future__ import absolute_import, division, print_function
|
|||||||
|
|
||||||
import glob
|
import glob
|
||||||
import os
|
import os
|
||||||
import pandas
|
|
||||||
import tarfile
|
import tarfile
|
||||||
|
|
||||||
|
import pandas
|
||||||
|
|
||||||
from deepspeech_training.util.importers import get_importers_parser
|
from deepspeech_training.util.importers import get_importers_parser
|
||||||
|
|
||||||
COLUMNNAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
COLUMNNAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
||||||
|
@ -3,15 +3,22 @@ from __future__ import absolute_import, division, print_function
|
|||||||
|
|
||||||
import csv
|
import csv
|
||||||
import os
|
import os
|
||||||
import progressbar
|
|
||||||
import sox
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import tarfile
|
import tarfile
|
||||||
|
|
||||||
from glob import glob
|
from glob import glob
|
||||||
from multiprocessing import Pool
|
from multiprocessing import Pool
|
||||||
from deepspeech_training.util.importers import validate_label_eng as validate_label, get_counter, get_imported_samples, print_import_report
|
|
||||||
from deepspeech_training.util.downloader import maybe_download, SIMPLE_BAR
|
import progressbar
|
||||||
|
import sox
|
||||||
|
|
||||||
|
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||||
|
from deepspeech_training.util.importers import (
|
||||||
|
get_counter,
|
||||||
|
get_imported_samples,
|
||||||
|
print_import_report
|
||||||
|
)
|
||||||
|
from deepspeech_training.util.importers import \
|
||||||
|
validate_label_eng as validate_label
|
||||||
|
|
||||||
FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
||||||
SAMPLE_RATE = 16000
|
SAMPLE_RATE = 16000
|
||||||
|
@ -10,16 +10,22 @@ from __future__ import absolute_import, division, print_function
|
|||||||
|
|
||||||
import csv
|
import csv
|
||||||
import os
|
import os
|
||||||
import progressbar
|
|
||||||
import sox
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
|
||||||
from multiprocessing import Pool
|
from multiprocessing import Pool
|
||||||
from deepspeech_training.util.downloader import SIMPLE_BAR
|
|
||||||
from deepspeech_training.util.text import Alphabet
|
|
||||||
from deepspeech_training.util.importers import get_importers_parser, get_validate_label, get_counter, get_imported_samples, print_import_report
|
|
||||||
|
|
||||||
|
import progressbar
|
||||||
|
import sox
|
||||||
|
|
||||||
|
from deepspeech_training.util.downloader import SIMPLE_BAR
|
||||||
|
from deepspeech_training.util.importers import (
|
||||||
|
get_counter,
|
||||||
|
get_imported_samples,
|
||||||
|
get_importers_parser,
|
||||||
|
get_validate_label,
|
||||||
|
print_import_report
|
||||||
|
)
|
||||||
|
from deepspeech_training.util.text import Alphabet
|
||||||
|
|
||||||
FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
||||||
SAMPLE_RATE = 16000
|
SAMPLE_RATE = 16000
|
||||||
|
@ -1,20 +1,24 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
from __future__ import absolute_import, division, print_function
|
from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
# Prerequisite: Having the sph2pipe tool in your PATH:
|
|
||||||
# https://www.ldc.upenn.edu/language-resources/tools/sphere-conversion-tools
|
|
||||||
|
|
||||||
import codecs
|
import codecs
|
||||||
import fnmatch
|
import fnmatch
|
||||||
import librosa
|
|
||||||
import os
|
import os
|
||||||
import pandas
|
|
||||||
import soundfile # <= Has an external dependency on libsndfile
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
|
||||||
from deepspeech_training.util.importers import validate_label_eng as validate_label
|
import librosa
|
||||||
|
import pandas
|
||||||
|
import soundfile # <= Has an external dependency on libsndfile
|
||||||
|
|
||||||
|
from deepspeech_training.util.importers import \
|
||||||
|
validate_label_eng as validate_label
|
||||||
|
|
||||||
|
# Prerequisite: Having the sph2pipe tool in your PATH:
|
||||||
|
# https://www.ldc.upenn.edu/language-resources/tools/sphere-conversion-tools
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _download_and_preprocess_data(data_dir):
|
def _download_and_preprocess_data(data_dir):
|
||||||
# Assume data_dir contains extracted LDC2004S13, LDC2004T19, LDC2005S13, LDC2005T19
|
# Assume data_dir contains extracted LDC2004S13, LDC2004T19, LDC2005S13, LDC2005T19
|
||||||
|
@ -2,11 +2,12 @@
|
|||||||
from __future__ import absolute_import, division, print_function
|
from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
import glob
|
import glob
|
||||||
import numpy as np
|
|
||||||
import os
|
import os
|
||||||
import pandas
|
|
||||||
import tarfile
|
import tarfile
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas
|
||||||
|
|
||||||
from deepspeech_training.util.importers import get_importers_parser
|
from deepspeech_training.util.importers import get_importers_parser
|
||||||
|
|
||||||
COLUMN_NAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
COLUMN_NAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
||||||
|
@ -4,15 +4,18 @@ import csv
|
|||||||
import logging
|
import logging
|
||||||
import math
|
import math
|
||||||
import os
|
import os
|
||||||
import pandas as pd
|
|
||||||
import swifter
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import urllib
|
import urllib
|
||||||
|
|
||||||
from deepspeech_training.util.importers import get_importers_parser, get_validate_label
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
from sox import Transformer
|
from sox import Transformer
|
||||||
|
|
||||||
|
import swifter
|
||||||
|
from deepspeech_training.util.importers import (
|
||||||
|
get_importers_parser,
|
||||||
|
get_validate_label
|
||||||
|
)
|
||||||
|
|
||||||
__version__ = "0.1.0"
|
__version__ = "0.1.0"
|
||||||
_logger = logging.getLogger(__name__)
|
_logger = logging.getLogger(__name__)
|
||||||
|
@ -1,12 +1,14 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
from __future__ import absolute_import, division, print_function
|
from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
import pandas
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
import pandas
|
||||||
|
|
||||||
from deepspeech_training.util.downloader import maybe_download
|
from deepspeech_training.util.downloader import maybe_download
|
||||||
|
|
||||||
|
|
||||||
def _download_and_preprocess_data(data_dir):
|
def _download_and_preprocess_data(data_dir):
|
||||||
# Conditionally download data
|
# Conditionally download data
|
||||||
LDC93S1_BASE = "LDC93S1"
|
LDC93S1_BASE = "LDC93S1"
|
||||||
|
@ -4,17 +4,18 @@ from __future__ import absolute_import, division, print_function
|
|||||||
import codecs
|
import codecs
|
||||||
import fnmatch
|
import fnmatch
|
||||||
import os
|
import os
|
||||||
import pandas
|
|
||||||
import progressbar
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import tarfile
|
import tarfile
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
|
||||||
from deepspeech_training.util.downloader import maybe_download
|
import pandas
|
||||||
|
import progressbar
|
||||||
from sox import Transformer
|
from sox import Transformer
|
||||||
from tensorflow.python.platform import gfile
|
from tensorflow.python.platform import gfile
|
||||||
|
|
||||||
|
from deepspeech_training.util.downloader import maybe_download
|
||||||
|
|
||||||
SAMPLE_RATE = 16000
|
SAMPLE_RATE = 16000
|
||||||
|
|
||||||
def _download_and_preprocess_data(data_dir):
|
def _download_and_preprocess_data(data_dir):
|
||||||
|
@ -4,20 +4,25 @@ from __future__ import absolute_import, division, print_function
|
|||||||
import argparse
|
import argparse
|
||||||
import csv
|
import csv
|
||||||
import os
|
import os
|
||||||
import progressbar
|
|
||||||
import re
|
import re
|
||||||
import sox
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import unicodedata
|
import unicodedata
|
||||||
import zipfile
|
import zipfile
|
||||||
|
|
||||||
from deepspeech_training.util.downloader import maybe_download
|
|
||||||
from deepspeech_training.util.downloader import SIMPLE_BAR
|
|
||||||
from deepspeech_training.util.importers import get_importers_parser, get_validate_label, get_counter, get_imported_samples, print_import_report
|
|
||||||
from deepspeech_training.util.text import Alphabet
|
|
||||||
from glob import glob
|
from glob import glob
|
||||||
from multiprocessing import Pool
|
from multiprocessing import Pool
|
||||||
|
|
||||||
|
import progressbar
|
||||||
|
import sox
|
||||||
|
|
||||||
|
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||||
|
from deepspeech_training.util.importers import (
|
||||||
|
get_counter,
|
||||||
|
get_imported_samples,
|
||||||
|
get_importers_parser,
|
||||||
|
get_validate_label,
|
||||||
|
print_import_report
|
||||||
|
)
|
||||||
|
from deepspeech_training.util.text import Alphabet
|
||||||
|
|
||||||
FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
||||||
SAMPLE_RATE = 16000
|
SAMPLE_RATE = 16000
|
||||||
|
@ -4,18 +4,24 @@ from __future__ import absolute_import, division, print_function
|
|||||||
|
|
||||||
import csv
|
import csv
|
||||||
import os
|
import os
|
||||||
import progressbar
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import tarfile
|
import tarfile
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
|
||||||
from deepspeech_training.util.downloader import maybe_download
|
|
||||||
from deepspeech_training.util.downloader import SIMPLE_BAR
|
|
||||||
from deepspeech_training.util.importers import get_importers_parser, get_validate_label, get_counter, get_imported_samples, print_import_report
|
|
||||||
from deepspeech_training.util.text import Alphabet
|
|
||||||
from glob import glob
|
from glob import glob
|
||||||
from multiprocessing import Pool
|
from multiprocessing import Pool
|
||||||
|
|
||||||
|
import progressbar
|
||||||
|
|
||||||
|
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||||
|
from deepspeech_training.util.importers import (
|
||||||
|
get_counter,
|
||||||
|
get_imported_samples,
|
||||||
|
get_importers_parser,
|
||||||
|
get_validate_label,
|
||||||
|
print_import_report
|
||||||
|
)
|
||||||
|
from deepspeech_training.util.text import Alphabet
|
||||||
|
|
||||||
FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
||||||
SAMPLE_RATE = 16000
|
SAMPLE_RATE = 16000
|
||||||
MAX_SECS = 15
|
MAX_SECS = 15
|
||||||
|
@ -3,10 +3,11 @@ from __future__ import absolute_import, division, print_function
|
|||||||
|
|
||||||
import glob
|
import glob
|
||||||
import os
|
import os
|
||||||
import pandas
|
|
||||||
import tarfile
|
import tarfile
|
||||||
import wave
|
import wave
|
||||||
|
|
||||||
|
import pandas
|
||||||
|
|
||||||
from deepspeech_training.util.importers import get_importers_parser
|
from deepspeech_training.util.importers import get_importers_parser
|
||||||
|
|
||||||
COLUMN_NAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
COLUMN_NAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
||||||
|
@ -3,11 +3,12 @@ from __future__ import absolute_import, division, print_function
|
|||||||
|
|
||||||
import glob
|
import glob
|
||||||
import json
|
import json
|
||||||
import numpy as np
|
|
||||||
import os
|
import os
|
||||||
import pandas
|
|
||||||
import tarfile
|
import tarfile
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas
|
||||||
|
|
||||||
from deepspeech_training.util.importers import get_importers_parser
|
from deepspeech_training.util.importers import get_importers_parser
|
||||||
|
|
||||||
COLUMN_NAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
COLUMN_NAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
||||||
|
@ -3,20 +3,26 @@ from __future__ import absolute_import, division, print_function
|
|||||||
|
|
||||||
import csv
|
import csv
|
||||||
import os
|
import os
|
||||||
import progressbar
|
|
||||||
import re
|
import re
|
||||||
import sox
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import tarfile
|
import tarfile
|
||||||
import unicodedata
|
import unicodedata
|
||||||
import zipfile
|
import zipfile
|
||||||
|
|
||||||
from deepspeech_training.util.downloader import maybe_download, SIMPLE_BAR
|
|
||||||
from deepspeech_training.util.importers import get_importers_parser, get_validate_label, get_counter, get_imported_samples, print_import_report
|
|
||||||
from deepspeech_training.util.text import Alphabet
|
|
||||||
from glob import glob
|
from glob import glob
|
||||||
from multiprocessing import Pool
|
from multiprocessing import Pool
|
||||||
|
|
||||||
|
import progressbar
|
||||||
|
import sox
|
||||||
|
|
||||||
|
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||||
|
from deepspeech_training.util.importers import (
|
||||||
|
get_counter,
|
||||||
|
get_imported_samples,
|
||||||
|
get_importers_parser,
|
||||||
|
get_validate_label,
|
||||||
|
print_import_report
|
||||||
|
)
|
||||||
|
from deepspeech_training.util.text import Alphabet
|
||||||
|
|
||||||
FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
||||||
SAMPLE_RATE = 16000
|
SAMPLE_RATE = 16000
|
||||||
|
@ -6,19 +6,20 @@ from __future__ import absolute_import, division, print_function
|
|||||||
# from the deepspeech directory run with: ./bin/import_swb.py ./data/swb/
|
# from the deepspeech directory run with: ./bin/import_swb.py ./data/swb/
|
||||||
import codecs
|
import codecs
|
||||||
import fnmatch
|
import fnmatch
|
||||||
import librosa
|
|
||||||
import os
|
import os
|
||||||
import pandas
|
|
||||||
import requests
|
|
||||||
import soundfile # <= Has an external dependency on libsndfile
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import tarfile
|
import tarfile
|
||||||
import unicodedata
|
import unicodedata
|
||||||
import wave
|
import wave
|
||||||
|
|
||||||
from deepspeech_training.util.importers import validate_label_eng as validate_label
|
import librosa
|
||||||
|
import pandas
|
||||||
|
import requests
|
||||||
|
import soundfile # <= Has an external dependency on libsndfile
|
||||||
|
|
||||||
|
from deepspeech_training.util.importers import \
|
||||||
|
validate_label_eng as validate_label
|
||||||
|
|
||||||
# ARCHIVE_NAME refers to ISIP alignments from 01/29/03
|
# ARCHIVE_NAME refers to ISIP alignments from 01/29/03
|
||||||
ARCHIVE_NAME = 'switchboard_word_alignments.tar.gz'
|
ARCHIVE_NAME = 'switchboard_word_alignments.tar.gz'
|
||||||
|
@ -8,23 +8,25 @@ from __future__ import absolute_import, division, print_function
|
|||||||
import argparse
|
import argparse
|
||||||
import csv
|
import csv
|
||||||
import os
|
import os
|
||||||
import progressbar
|
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
import sox
|
|
||||||
import sys
|
import sys
|
||||||
import tarfile
|
import tarfile
|
||||||
import unicodedata
|
import unicodedata
|
||||||
import wave
|
import wave
|
||||||
import xml.etree.cElementTree as ET
|
import xml.etree.cElementTree as ET
|
||||||
|
|
||||||
from glob import glob
|
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
|
from glob import glob
|
||||||
from multiprocessing.pool import ThreadPool
|
from multiprocessing.pool import ThreadPool
|
||||||
|
|
||||||
|
import progressbar
|
||||||
|
import sox
|
||||||
|
|
||||||
|
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||||
|
from deepspeech_training.util.importers import \
|
||||||
|
validate_label_eng as validate_label
|
||||||
from deepspeech_training.util.text import Alphabet
|
from deepspeech_training.util.text import Alphabet
|
||||||
from deepspeech_training.util.importers import validate_label_eng as validate_label
|
|
||||||
from deepspeech_training.util.downloader import maybe_download, SIMPLE_BAR
|
|
||||||
|
|
||||||
SWC_URL = "https://www2.informatik.uni-hamburg.de/nats/pub/SWC/SWC_{language}.tar"
|
SWC_URL = "https://www2.informatik.uni-hamburg.de/nats/pub/SWC/SWC_{language}.tar"
|
||||||
SWC_ARCHIVE = "SWC_{language}.tar"
|
SWC_ARCHIVE = "SWC_{language}.tar"
|
||||||
|
@ -1,17 +1,18 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
from __future__ import absolute_import, division, print_function
|
from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
import pandas
|
|
||||||
import sys
|
import sys
|
||||||
import tarfile
|
import tarfile
|
||||||
import unicodedata
|
import unicodedata
|
||||||
import wave
|
import wave
|
||||||
|
|
||||||
from glob import glob
|
from glob import glob
|
||||||
from os import makedirs, path, remove, rmdir
|
from os import makedirs, path, remove, rmdir
|
||||||
|
|
||||||
|
import pandas
|
||||||
from sox import Transformer
|
from sox import Transformer
|
||||||
from deepspeech_training.util.downloader import maybe_download
|
|
||||||
from tensorflow.python.platform import gfile
|
from tensorflow.python.platform import gfile
|
||||||
|
|
||||||
|
from deepspeech_training.util.downloader import maybe_download
|
||||||
from deepspeech_training.util.stm import parse_stm_file
|
from deepspeech_training.util.stm import parse_stm_file
|
||||||
|
|
||||||
|
|
||||||
|
@ -11,13 +11,15 @@
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
import errno
|
import errno
|
||||||
|
import fnmatch
|
||||||
import os
|
import os
|
||||||
from os import path
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import tarfile
|
import tarfile
|
||||||
import fnmatch
|
from os import path
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import subprocess
|
|
||||||
|
|
||||||
def clean(word):
|
def clean(word):
|
||||||
# LC ALL & strip punctuation which are not required
|
# LC ALL & strip punctuation which are not required
|
||||||
|
@ -3,18 +3,23 @@ from __future__ import absolute_import, division, print_function
|
|||||||
|
|
||||||
import csv
|
import csv
|
||||||
import os
|
import os
|
||||||
import progressbar
|
|
||||||
import re
|
import re
|
||||||
import sox
|
|
||||||
import subprocess
|
import subprocess
|
||||||
import unidecode
|
|
||||||
import zipfile
|
import zipfile
|
||||||
|
|
||||||
from deepspeech_training.util.downloader import maybe_download
|
|
||||||
from deepspeech_training.util.downloader import SIMPLE_BAR
|
|
||||||
from deepspeech_training.util.importers import get_importers_parser, get_validate_label, get_counter, get_imported_samples, print_import_report
|
|
||||||
from multiprocessing import Pool
|
from multiprocessing import Pool
|
||||||
|
|
||||||
|
import progressbar
|
||||||
|
import sox
|
||||||
|
|
||||||
|
import unidecode
|
||||||
|
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||||
|
from deepspeech_training.util.importers import (
|
||||||
|
get_counter,
|
||||||
|
get_imported_samples,
|
||||||
|
get_importers_parser,
|
||||||
|
get_validate_label,
|
||||||
|
print_import_report
|
||||||
|
)
|
||||||
|
|
||||||
FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
|
||||||
SAMPLE_RATE = 16000
|
SAMPLE_RATE = 16000
|
||||||
|
@ -8,15 +8,17 @@ from __future__ import absolute_import, division, print_function
|
|||||||
import argparse
|
import argparse
|
||||||
import csv
|
import csv
|
||||||
import os
|
import os
|
||||||
import progressbar
|
|
||||||
import tarfile
|
import tarfile
|
||||||
import unicodedata
|
import unicodedata
|
||||||
import wave
|
import wave
|
||||||
import xml.etree.cElementTree as ET
|
import xml.etree.cElementTree as ET
|
||||||
|
|
||||||
from collections import Counter
|
from collections import Counter
|
||||||
from deepspeech_training.util.downloader import maybe_download, SIMPLE_BAR
|
|
||||||
from deepspeech_training.util.importers import validate_label_eng as validate_label
|
import progressbar
|
||||||
|
|
||||||
|
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||||
|
from deepspeech_training.util.importers import \
|
||||||
|
validate_label_eng as validate_label
|
||||||
from deepspeech_training.util.text import Alphabet
|
from deepspeech_training.util.text import Alphabet
|
||||||
|
|
||||||
TUDA_VERSION = 'v2'
|
TUDA_VERSION = 'v2'
|
||||||
|
@ -4,17 +4,21 @@
|
|||||||
# as per https://homepages.inf.ed.ac.uk/jyamagis/page3/page58/page58.html
|
# as per https://homepages.inf.ed.ac.uk/jyamagis/page3/page58/page58.html
|
||||||
from __future__ import absolute_import, division, print_function
|
from __future__ import absolute_import, division, print_function
|
||||||
|
|
||||||
import librosa
|
|
||||||
import os
|
import os
|
||||||
import progressbar
|
|
||||||
import random
|
import random
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from deepspeech_training.util.downloader import maybe_download, SIMPLE_BAR
|
|
||||||
from deepspeech_training.util.importers import get_counter, get_imported_samples, print_import_report
|
|
||||||
from multiprocessing import Pool
|
from multiprocessing import Pool
|
||||||
from zipfile import ZipFile
|
from zipfile import ZipFile
|
||||||
|
|
||||||
|
import librosa
|
||||||
|
import progressbar
|
||||||
|
|
||||||
|
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
|
||||||
|
from deepspeech_training.util.importers import (
|
||||||
|
get_counter,
|
||||||
|
get_imported_samples,
|
||||||
|
print_import_report
|
||||||
|
)
|
||||||
|
|
||||||
SAMPLE_RATE = 16000
|
SAMPLE_RATE = 16000
|
||||||
MAX_SECS = 10
|
MAX_SECS = 10
|
||||||
|
@ -3,20 +3,21 @@ from __future__ import absolute_import, division, print_function
|
|||||||
|
|
||||||
import codecs
|
import codecs
|
||||||
import os
|
import os
|
||||||
import pandas
|
|
||||||
import re
|
import re
|
||||||
import tarfile
|
import tarfile
|
||||||
import threading
|
import threading
|
||||||
import unicodedata
|
import unicodedata
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
from deepspeech_training.util.downloader import maybe_download
|
|
||||||
from glob import glob
|
from glob import glob
|
||||||
from multiprocessing.pool import ThreadPool
|
from multiprocessing.pool import ThreadPool
|
||||||
from os import makedirs, path
|
from os import makedirs, path
|
||||||
|
|
||||||
|
import pandas
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
from six.moves import urllib
|
from six.moves import urllib
|
||||||
from tensorflow.python.platform import gfile
|
from tensorflow.python.platform import gfile
|
||||||
|
|
||||||
|
from deepspeech_training.util.downloader import maybe_download
|
||||||
|
|
||||||
"""The number of jobs to run in parallel"""
|
"""The number of jobs to run in parallel"""
|
||||||
NUM_PARALLEL = 8
|
NUM_PARALLEL = 8
|
||||||
|
|
||||||
@ -188,7 +189,3 @@ def _generate_dataset(data_dir, data_set):
|
|||||||
|
|
||||||
if __name__=="__main__":
|
if __name__=="__main__":
|
||||||
_download_and_preprocess_data(sys.argv[1])
|
_download_and_preprocess_data(sys.argv[1])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,9 +1,11 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import tensorflow.compat.v1 as tfv1
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
import tensorflow.compat.v1 as tfv1
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
with tfv1.gfile.FastGFile(sys.argv[1], 'rb') as fin:
|
with tfv1.gfile.FastGFile(sys.argv[1], 'rb') as fin:
|
||||||
graph_def = tfv1.GraphDef()
|
graph_def = tfv1.GraphDef()
|
||||||
|
@ -10,7 +10,10 @@ import random
|
|||||||
import sys
|
import sys
|
||||||
|
|
||||||
from deepspeech_training.util.audio import AUDIO_TYPE_PCM
|
from deepspeech_training.util.audio import AUDIO_TYPE_PCM
|
||||||
from deepspeech_training.util.sample_collections import samples_from_file, LabeledSample
|
from deepspeech_training.util.sample_collections import (
|
||||||
|
LabeledSample,
|
||||||
|
samples_from_file
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def play_sample(samples, index):
|
def play_sample(samples, index):
|
||||||
|
Loading…
Reference in New Issue
Block a user