Sort importer imports with isort

This commit is contained in:
Reuben Morais 2020-03-31 13:43:00 +02:00
parent 20b0ab17ea
commit b7e6b8c3e6
27 changed files with 174 additions and 98 deletions

4
.isort.cfg Normal file
View File

@ -0,0 +1,4 @@
[settings]
line_length=80
multi_line_output=3
default_section=FIRSTPARTY

View File

@ -6,11 +6,19 @@ Use "python3 build_sdb.py -h" for help
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
import argparse import argparse
import progressbar import progressbar
from deepspeech_training.util.audio import (
AUDIO_TYPE_OPUS,
AUDIO_TYPE_WAV,
change_audio_types
)
from deepspeech_training.util.downloader import SIMPLE_BAR from deepspeech_training.util.downloader import SIMPLE_BAR
from deepspeech_training.util.audio import change_audio_types, AUDIO_TYPE_WAV, AUDIO_TYPE_OPUS from deepspeech_training.util.sample_collections import (
from deepspeech_training.util.sample_collections import samples_from_files, DirectSDBWriter DirectSDBWriter,
samples_from_files
)
AUDIO_TYPE_LOOKUP = { AUDIO_TYPE_LOOKUP = {
'wav': AUDIO_TYPE_WAV, 'wav': AUDIO_TYPE_WAV,

View File

@ -1,9 +1,9 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import tensorflow.compat.v1 as tfv1
import sys import sys
import tensorflow.compat.v1 as tfv1
from google.protobuf import text_format from google.protobuf import text_format

View File

@ -3,9 +3,10 @@ from __future__ import absolute_import, division, print_function
import glob import glob
import os import os
import pandas
import tarfile import tarfile
import pandas
from deepspeech_training.util.importers import get_importers_parser from deepspeech_training.util.importers import get_importers_parser
COLUMN_NAMES = ['wav_filename', 'wav_filesize', 'transcript'] COLUMN_NAMES = ['wav_filename', 'wav_filesize', 'transcript']

View File

@ -3,9 +3,10 @@ from __future__ import absolute_import, division, print_function
import glob import glob
import os import os
import pandas
import tarfile import tarfile
import pandas
from deepspeech_training.util.importers import get_importers_parser from deepspeech_training.util.importers import get_importers_parser
COLUMNNAMES = ['wav_filename', 'wav_filesize', 'transcript'] COLUMNNAMES = ['wav_filename', 'wav_filesize', 'transcript']

View File

@ -3,15 +3,22 @@ from __future__ import absolute_import, division, print_function
import csv import csv
import os import os
import progressbar
import sox
import subprocess import subprocess
import tarfile import tarfile
from glob import glob from glob import glob
from multiprocessing import Pool from multiprocessing import Pool
from deepspeech_training.util.importers import validate_label_eng as validate_label, get_counter, get_imported_samples, print_import_report
from deepspeech_training.util.downloader import maybe_download, SIMPLE_BAR import progressbar
import sox
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
from deepspeech_training.util.importers import (
get_counter,
get_imported_samples,
print_import_report
)
from deepspeech_training.util.importers import \
validate_label_eng as validate_label
FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript'] FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
SAMPLE_RATE = 16000 SAMPLE_RATE = 16000

View File

@ -10,16 +10,22 @@ from __future__ import absolute_import, division, print_function
import csv import csv
import os import os
import progressbar
import sox
import subprocess import subprocess
import unicodedata import unicodedata
from multiprocessing import Pool from multiprocessing import Pool
from deepspeech_training.util.downloader import SIMPLE_BAR
from deepspeech_training.util.text import Alphabet
from deepspeech_training.util.importers import get_importers_parser, get_validate_label, get_counter, get_imported_samples, print_import_report
import progressbar
import sox
from deepspeech_training.util.downloader import SIMPLE_BAR
from deepspeech_training.util.importers import (
get_counter,
get_imported_samples,
get_importers_parser,
get_validate_label,
print_import_report
)
from deepspeech_training.util.text import Alphabet
FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript'] FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
SAMPLE_RATE = 16000 SAMPLE_RATE = 16000

View File

@ -1,20 +1,24 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
# Prerequisite: Having the sph2pipe tool in your PATH:
# https://www.ldc.upenn.edu/language-resources/tools/sphere-conversion-tools
import codecs import codecs
import fnmatch import fnmatch
import librosa
import os import os
import pandas
import soundfile # <= Has an external dependency on libsndfile
import subprocess import subprocess
import sys import sys
import unicodedata import unicodedata
from deepspeech_training.util.importers import validate_label_eng as validate_label import librosa
import pandas
import soundfile # <= Has an external dependency on libsndfile
from deepspeech_training.util.importers import \
validate_label_eng as validate_label
# Prerequisite: Having the sph2pipe tool in your PATH:
# https://www.ldc.upenn.edu/language-resources/tools/sphere-conversion-tools
def _download_and_preprocess_data(data_dir): def _download_and_preprocess_data(data_dir):
# Assume data_dir contains extracted LDC2004S13, LDC2004T19, LDC2005S13, LDC2005T19 # Assume data_dir contains extracted LDC2004S13, LDC2004T19, LDC2005S13, LDC2005T19

View File

@ -2,11 +2,12 @@
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
import glob import glob
import numpy as np
import os import os
import pandas
import tarfile import tarfile
import numpy as np
import pandas
from deepspeech_training.util.importers import get_importers_parser from deepspeech_training.util.importers import get_importers_parser
COLUMN_NAMES = ['wav_filename', 'wav_filesize', 'transcript'] COLUMN_NAMES = ['wav_filename', 'wav_filesize', 'transcript']

View File

@ -4,15 +4,18 @@ import csv
import logging import logging
import math import math
import os import os
import pandas as pd
import swifter
import subprocess import subprocess
import urllib import urllib
from deepspeech_training.util.importers import get_importers_parser, get_validate_label
from pathlib import Path from pathlib import Path
import pandas as pd
from sox import Transformer from sox import Transformer
import swifter
from deepspeech_training.util.importers import (
get_importers_parser,
get_validate_label
)
__version__ = "0.1.0" __version__ = "0.1.0"
_logger = logging.getLogger(__name__) _logger = logging.getLogger(__name__)

View File

@ -1,12 +1,14 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
import pandas
import os import os
import sys import sys
import pandas
from deepspeech_training.util.downloader import maybe_download from deepspeech_training.util.downloader import maybe_download
def _download_and_preprocess_data(data_dir): def _download_and_preprocess_data(data_dir):
# Conditionally download data # Conditionally download data
LDC93S1_BASE = "LDC93S1" LDC93S1_BASE = "LDC93S1"

View File

@ -4,17 +4,18 @@ from __future__ import absolute_import, division, print_function
import codecs import codecs
import fnmatch import fnmatch
import os import os
import pandas
import progressbar
import subprocess import subprocess
import sys import sys
import tarfile import tarfile
import unicodedata import unicodedata
from deepspeech_training.util.downloader import maybe_download import pandas
import progressbar
from sox import Transformer from sox import Transformer
from tensorflow.python.platform import gfile from tensorflow.python.platform import gfile
from deepspeech_training.util.downloader import maybe_download
SAMPLE_RATE = 16000 SAMPLE_RATE = 16000
def _download_and_preprocess_data(data_dir): def _download_and_preprocess_data(data_dir):

View File

@ -4,20 +4,25 @@ from __future__ import absolute_import, division, print_function
import argparse import argparse
import csv import csv
import os import os
import progressbar
import re import re
import sox
import subprocess import subprocess
import unicodedata import unicodedata
import zipfile import zipfile
from deepspeech_training.util.downloader import maybe_download
from deepspeech_training.util.downloader import SIMPLE_BAR
from deepspeech_training.util.importers import get_importers_parser, get_validate_label, get_counter, get_imported_samples, print_import_report
from deepspeech_training.util.text import Alphabet
from glob import glob from glob import glob
from multiprocessing import Pool from multiprocessing import Pool
import progressbar
import sox
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
from deepspeech_training.util.importers import (
get_counter,
get_imported_samples,
get_importers_parser,
get_validate_label,
print_import_report
)
from deepspeech_training.util.text import Alphabet
FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript'] FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
SAMPLE_RATE = 16000 SAMPLE_RATE = 16000

View File

@ -4,18 +4,24 @@ from __future__ import absolute_import, division, print_function
import csv import csv
import os import os
import progressbar
import subprocess import subprocess
import tarfile import tarfile
import unicodedata import unicodedata
from deepspeech_training.util.downloader import maybe_download
from deepspeech_training.util.downloader import SIMPLE_BAR
from deepspeech_training.util.importers import get_importers_parser, get_validate_label, get_counter, get_imported_samples, print_import_report
from deepspeech_training.util.text import Alphabet
from glob import glob from glob import glob
from multiprocessing import Pool from multiprocessing import Pool
import progressbar
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
from deepspeech_training.util.importers import (
get_counter,
get_imported_samples,
get_importers_parser,
get_validate_label,
print_import_report
)
from deepspeech_training.util.text import Alphabet
FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript'] FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
SAMPLE_RATE = 16000 SAMPLE_RATE = 16000
MAX_SECS = 15 MAX_SECS = 15

View File

@ -3,10 +3,11 @@ from __future__ import absolute_import, division, print_function
import glob import glob
import os import os
import pandas
import tarfile import tarfile
import wave import wave
import pandas
from deepspeech_training.util.importers import get_importers_parser from deepspeech_training.util.importers import get_importers_parser
COLUMN_NAMES = ['wav_filename', 'wav_filesize', 'transcript'] COLUMN_NAMES = ['wav_filename', 'wav_filesize', 'transcript']

View File

@ -3,11 +3,12 @@ from __future__ import absolute_import, division, print_function
import glob import glob
import json import json
import numpy as np
import os import os
import pandas
import tarfile import tarfile
import numpy as np
import pandas
from deepspeech_training.util.importers import get_importers_parser from deepspeech_training.util.importers import get_importers_parser
COLUMN_NAMES = ['wav_filename', 'wav_filesize', 'transcript'] COLUMN_NAMES = ['wav_filename', 'wav_filesize', 'transcript']

View File

@ -3,20 +3,26 @@ from __future__ import absolute_import, division, print_function
import csv import csv
import os import os
import progressbar
import re import re
import sox
import subprocess import subprocess
import tarfile import tarfile
import unicodedata import unicodedata
import zipfile import zipfile
from deepspeech_training.util.downloader import maybe_download, SIMPLE_BAR
from deepspeech_training.util.importers import get_importers_parser, get_validate_label, get_counter, get_imported_samples, print_import_report
from deepspeech_training.util.text import Alphabet
from glob import glob from glob import glob
from multiprocessing import Pool from multiprocessing import Pool
import progressbar
import sox
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
from deepspeech_training.util.importers import (
get_counter,
get_imported_samples,
get_importers_parser,
get_validate_label,
print_import_report
)
from deepspeech_training.util.text import Alphabet
FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript'] FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
SAMPLE_RATE = 16000 SAMPLE_RATE = 16000

View File

@ -6,19 +6,20 @@ from __future__ import absolute_import, division, print_function
# from the deepspeech directory run with: ./bin/import_swb.py ./data/swb/ # from the deepspeech directory run with: ./bin/import_swb.py ./data/swb/
import codecs import codecs
import fnmatch import fnmatch
import librosa
import os import os
import pandas
import requests
import soundfile # <= Has an external dependency on libsndfile
import subprocess import subprocess
import sys import sys
import tarfile import tarfile
import unicodedata import unicodedata
import wave import wave
from deepspeech_training.util.importers import validate_label_eng as validate_label import librosa
import pandas
import requests
import soundfile # <= Has an external dependency on libsndfile
from deepspeech_training.util.importers import \
validate_label_eng as validate_label
# ARCHIVE_NAME refers to ISIP alignments from 01/29/03 # ARCHIVE_NAME refers to ISIP alignments from 01/29/03
ARCHIVE_NAME = 'switchboard_word_alignments.tar.gz' ARCHIVE_NAME = 'switchboard_word_alignments.tar.gz'

View File

@ -8,23 +8,25 @@ from __future__ import absolute_import, division, print_function
import argparse import argparse
import csv import csv
import os import os
import progressbar
import random import random
import re import re
import shutil import shutil
import sox
import sys import sys
import tarfile import tarfile
import unicodedata import unicodedata
import wave import wave
import xml.etree.cElementTree as ET import xml.etree.cElementTree as ET
from glob import glob
from collections import Counter from collections import Counter
from glob import glob
from multiprocessing.pool import ThreadPool from multiprocessing.pool import ThreadPool
import progressbar
import sox
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
from deepspeech_training.util.importers import \
validate_label_eng as validate_label
from deepspeech_training.util.text import Alphabet from deepspeech_training.util.text import Alphabet
from deepspeech_training.util.importers import validate_label_eng as validate_label
from deepspeech_training.util.downloader import maybe_download, SIMPLE_BAR
SWC_URL = "https://www2.informatik.uni-hamburg.de/nats/pub/SWC/SWC_{language}.tar" SWC_URL = "https://www2.informatik.uni-hamburg.de/nats/pub/SWC/SWC_{language}.tar"
SWC_ARCHIVE = "SWC_{language}.tar" SWC_ARCHIVE = "SWC_{language}.tar"

View File

@ -1,17 +1,18 @@
#!/usr/bin/env python #!/usr/bin/env python
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
import pandas
import sys import sys
import tarfile import tarfile
import unicodedata import unicodedata
import wave import wave
from glob import glob from glob import glob
from os import makedirs, path, remove, rmdir from os import makedirs, path, remove, rmdir
import pandas
from sox import Transformer from sox import Transformer
from deepspeech_training.util.downloader import maybe_download
from tensorflow.python.platform import gfile from tensorflow.python.platform import gfile
from deepspeech_training.util.downloader import maybe_download
from deepspeech_training.util.stm import parse_stm_file from deepspeech_training.util.stm import parse_stm_file

View File

@ -11,13 +11,15 @@
''' '''
import errno import errno
import fnmatch
import os import os
from os import path import subprocess
import sys import sys
import tarfile import tarfile
import fnmatch from os import path
import pandas as pd import pandas as pd
import subprocess
def clean(word): def clean(word):
# LC ALL & strip punctuation which are not required # LC ALL & strip punctuation which are not required

View File

@ -3,18 +3,23 @@ from __future__ import absolute_import, division, print_function
import csv import csv
import os import os
import progressbar
import re import re
import sox
import subprocess import subprocess
import unidecode
import zipfile import zipfile
from deepspeech_training.util.downloader import maybe_download
from deepspeech_training.util.downloader import SIMPLE_BAR
from deepspeech_training.util.importers import get_importers_parser, get_validate_label, get_counter, get_imported_samples, print_import_report
from multiprocessing import Pool from multiprocessing import Pool
import progressbar
import sox
import unidecode
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
from deepspeech_training.util.importers import (
get_counter,
get_imported_samples,
get_importers_parser,
get_validate_label,
print_import_report
)
FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript'] FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
SAMPLE_RATE = 16000 SAMPLE_RATE = 16000

View File

@ -8,15 +8,17 @@ from __future__ import absolute_import, division, print_function
import argparse import argparse
import csv import csv
import os import os
import progressbar
import tarfile import tarfile
import unicodedata import unicodedata
import wave import wave
import xml.etree.cElementTree as ET import xml.etree.cElementTree as ET
from collections import Counter from collections import Counter
from deepspeech_training.util.downloader import maybe_download, SIMPLE_BAR
from deepspeech_training.util.importers import validate_label_eng as validate_label import progressbar
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
from deepspeech_training.util.importers import \
validate_label_eng as validate_label
from deepspeech_training.util.text import Alphabet from deepspeech_training.util.text import Alphabet
TUDA_VERSION = 'v2' TUDA_VERSION = 'v2'

View File

@ -4,17 +4,21 @@
# as per https://homepages.inf.ed.ac.uk/jyamagis/page3/page58/page58.html # as per https://homepages.inf.ed.ac.uk/jyamagis/page3/page58/page58.html
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
import librosa
import os import os
import progressbar
import random import random
import re import re
from deepspeech_training.util.downloader import maybe_download, SIMPLE_BAR
from deepspeech_training.util.importers import get_counter, get_imported_samples, print_import_report
from multiprocessing import Pool from multiprocessing import Pool
from zipfile import ZipFile from zipfile import ZipFile
import librosa
import progressbar
from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
from deepspeech_training.util.importers import (
get_counter,
get_imported_samples,
print_import_report
)
SAMPLE_RATE = 16000 SAMPLE_RATE = 16000
MAX_SECS = 10 MAX_SECS = 10

View File

@ -3,20 +3,21 @@ from __future__ import absolute_import, division, print_function
import codecs import codecs
import os import os
import pandas
import re import re
import tarfile import tarfile
import threading import threading
import unicodedata import unicodedata
from bs4 import BeautifulSoup
from deepspeech_training.util.downloader import maybe_download
from glob import glob from glob import glob
from multiprocessing.pool import ThreadPool from multiprocessing.pool import ThreadPool
from os import makedirs, path from os import makedirs, path
import pandas
from bs4 import BeautifulSoup
from six.moves import urllib from six.moves import urllib
from tensorflow.python.platform import gfile from tensorflow.python.platform import gfile
from deepspeech_training.util.downloader import maybe_download
"""The number of jobs to run in parallel""" """The number of jobs to run in parallel"""
NUM_PARALLEL = 8 NUM_PARALLEL = 8
@ -188,7 +189,3 @@ def _generate_dataset(data_dir, data_set):
if __name__=="__main__": if __name__=="__main__":
_download_and_preprocess_data(sys.argv[1]) _download_and_preprocess_data(sys.argv[1])

View File

@ -1,9 +1,11 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import tensorflow.compat.v1 as tfv1
import sys import sys
import tensorflow.compat.v1 as tfv1
def main(): def main():
with tfv1.gfile.FastGFile(sys.argv[1], 'rb') as fin: with tfv1.gfile.FastGFile(sys.argv[1], 'rb') as fin:
graph_def = tfv1.GraphDef() graph_def = tfv1.GraphDef()

View File

@ -10,7 +10,10 @@ import random
import sys import sys
from deepspeech_training.util.audio import AUDIO_TYPE_PCM from deepspeech_training.util.audio import AUDIO_TYPE_PCM
from deepspeech_training.util.sample_collections import samples_from_file, LabeledSample from deepspeech_training.util.sample_collections import (
LabeledSample,
samples_from_file
)
def play_sample(samples, index): def play_sample(samples, index):