Sort importer imports with isort

2020-03-31 13:43:00 +02:00 · 2020-03-31 13:43:00 +02:00 · b7e6b8c3e6
commit b7e6b8c3e6
parent 20b0ab17ea
27 changed files with 174 additions and 98 deletions
--- a/.isort.cfg
+++ b/.isort.cfg
@ -0,0 +1,4 @@
 [settings]
 line_length=80
 multi_line_output=3
 default_section=FIRSTPARTY
--- a/bin/build_sdb.py
+++ b/bin/build_sdb.py
@ -6,11 +6,19 @@ Use "python3 build_sdb.py -h" for help
 from __future__ import absolute_import, division, print_function
 import argparse
 import progressbar
 from deepspeech_training.util.audio import (
    AUDIO_TYPE_OPUS,
    AUDIO_TYPE_WAV,
    change_audio_types
 )
 from deepspeech_training.util.downloader import SIMPLE_BAR
-from deepspeech_training.util.audio import change_audio_types, AUDIO_TYPE_WAV, AUDIO_TYPE_OPUS
+from deepspeech_training.util.sample_collections import (
-from deepspeech_training.util.sample_collections import samples_from_files, DirectSDBWriter
+    DirectSDBWriter,
    samples_from_files
 )
 AUDIO_TYPE_LOOKUP = {
    'wav': AUDIO_TYPE_WAV,
--- a/bin/graphdef_binary_to_text.py
+++ b/bin/graphdef_binary_to_text.py
@ -1,9 +1,9 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 import tensorflow.compat.v1 as tfv1
 import sys
 import tensorflow.compat.v1 as tfv1
 from google.protobuf import text_format
--- a/bin/import_aidatatang.py
+++ b/bin/import_aidatatang.py
@ -3,9 +3,10 @@ from __future__ import absolute_import, division, print_function
 import glob
 import os
 import pandas
 import tarfile
 import pandas
 from deepspeech_training.util.importers import get_importers_parser
 COLUMN_NAMES = ['wav_filename', 'wav_filesize', 'transcript']
--- a/bin/import_aishell.py
+++ b/bin/import_aishell.py
@ -3,9 +3,10 @@ from __future__ import absolute_import, division, print_function
 import glob
 import os
 import pandas
 import tarfile
 import pandas
 from deepspeech_training.util.importers import get_importers_parser
 COLUMNNAMES = ['wav_filename', 'wav_filesize', 'transcript']
--- a/bin/import_cv.py
+++ b/bin/import_cv.py
@ -3,15 +3,22 @@ from __future__ import absolute_import, division, print_function
 import csv
 import os
 import progressbar
 import sox
 import subprocess
 import tarfile
 from glob import glob
 from multiprocessing import Pool
-from deepspeech_training.util.importers import validate_label_eng as validate_label, get_counter, get_imported_samples, print_import_report
+
-from deepspeech_training.util.downloader import maybe_download, SIMPLE_BAR
+import progressbar
 import sox
 from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
 from deepspeech_training.util.importers import (
    get_counter,
    get_imported_samples,
    print_import_report
 )
 from deepspeech_training.util.importers import \
    validate_label_eng as validate_label
 FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
 SAMPLE_RATE = 16000
--- a/bin/import_cv2.py
+++ b/bin/import_cv2.py
@ -10,16 +10,22 @@ from __future__ import absolute_import, division, print_function
 import csv
 import os
 import progressbar
 import sox
 import subprocess
 import unicodedata
 from multiprocessing import Pool
 from deepspeech_training.util.downloader import SIMPLE_BAR
 from deepspeech_training.util.text import Alphabet
 from deepspeech_training.util.importers import get_importers_parser, get_validate_label, get_counter, get_imported_samples, print_import_report
 import progressbar
 import sox
 from deepspeech_training.util.downloader import SIMPLE_BAR
 from deepspeech_training.util.importers import (
    get_counter,
    get_imported_samples,
    get_importers_parser,
    get_validate_label,
    print_import_report
 )
 from deepspeech_training.util.text import Alphabet
 FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
 SAMPLE_RATE = 16000
--- a/bin/import_fisher.py
+++ b/bin/import_fisher.py
@ -1,20 +1,24 @@
 #!/usr/bin/env python
 from __future__ import absolute_import, division, print_function
 # Prerequisite: Having the sph2pipe tool in your PATH:
 # https://www.ldc.upenn.edu/language-resources/tools/sphere-conversion-tools
 import codecs
 import fnmatch
 import librosa
 import os
 import pandas
 import soundfile # <= Has an external dependency on libsndfile
 import subprocess
 import sys
 import unicodedata
-from deepspeech_training.util.importers import validate_label_eng as validate_label
+import librosa
 import pandas
 import soundfile  # <= Has an external dependency on libsndfile
 from deepspeech_training.util.importers import \
    validate_label_eng as validate_label
 # Prerequisite: Having the sph2pipe tool in your PATH:
 # https://www.ldc.upenn.edu/language-resources/tools/sphere-conversion-tools
 def _download_and_preprocess_data(data_dir):
    # Assume data_dir contains extracted LDC2004S13, LDC2004T19, LDC2005S13, LDC2005T19
--- a/bin/import_freestmandarin.py
+++ b/bin/import_freestmandarin.py
@ -2,11 +2,12 @@
 from __future__ import absolute_import, division, print_function
 import glob
 import numpy as np
 import os
 import pandas
 import tarfile
 import numpy as np
 import pandas
 from deepspeech_training.util.importers import get_importers_parser
 COLUMN_NAMES = ['wav_filename', 'wav_filesize', 'transcript']
--- a/bin/import_gram_vaani.py
+++ b/bin/import_gram_vaani.py
@ -4,15 +4,18 @@ import csv
 import logging
 import math
 import os
 import pandas as pd
 import swifter
 import subprocess
 import urllib
 from deepspeech_training.util.importers import get_importers_parser, get_validate_label
 from pathlib import Path
 import pandas as pd
 from sox import Transformer
 import swifter
 from deepspeech_training.util.importers import (
    get_importers_parser,
    get_validate_label
 )
 __version__ = "0.1.0"
 _logger = logging.getLogger(__name__)
--- a/bin/import_ldc93s1.py
+++ b/bin/import_ldc93s1.py
@ -1,12 +1,14 @@
 #!/usr/bin/env python
 from __future__ import absolute_import, division, print_function
 import pandas
 import os
 import sys
 import pandas
 from deepspeech_training.util.downloader import maybe_download
 def _download_and_preprocess_data(data_dir):
    # Conditionally download data
    LDC93S1_BASE = "LDC93S1"
--- a/bin/import_librivox.py
+++ b/bin/import_librivox.py
@ -4,17 +4,18 @@ from __future__ import absolute_import, division, print_function
 import codecs
 import fnmatch
 import os
 import pandas
 import progressbar
 import subprocess
 import sys
 import tarfile
 import unicodedata
-from deepspeech_training.util.downloader import maybe_download
+import pandas
 import progressbar
 from sox import Transformer
 from tensorflow.python.platform import gfile
 from deepspeech_training.util.downloader import maybe_download
 SAMPLE_RATE = 16000
 def _download_and_preprocess_data(data_dir):
--- a/bin/import_lingua_libre.py
+++ b/bin/import_lingua_libre.py
@ -4,20 +4,25 @@ from __future__ import absolute_import, division, print_function
 import argparse
 import csv
 import os
 import progressbar
 import re
 import sox
 import subprocess
 import unicodedata
 import zipfile
 from deepspeech_training.util.downloader import maybe_download
 from deepspeech_training.util.downloader import SIMPLE_BAR
 from deepspeech_training.util.importers import get_importers_parser, get_validate_label, get_counter, get_imported_samples, print_import_report
 from deepspeech_training.util.text import Alphabet
 from glob import glob
 from multiprocessing import Pool
 import progressbar
 import sox
 from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
 from deepspeech_training.util.importers import (
    get_counter,
    get_imported_samples,
    get_importers_parser,
    get_validate_label,
    print_import_report
 )
 from deepspeech_training.util.text import Alphabet
 FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
 SAMPLE_RATE = 16000
--- a/bin/import_m-ailabs.py
+++ b/bin/import_m-ailabs.py
@ -4,18 +4,24 @@ from __future__ import absolute_import, division, print_function
 import csv
 import os
 import progressbar
 import subprocess
 import tarfile
 import unicodedata
 from deepspeech_training.util.downloader import maybe_download
 from deepspeech_training.util.downloader import SIMPLE_BAR
 from deepspeech_training.util.importers import get_importers_parser, get_validate_label, get_counter, get_imported_samples, print_import_report
 from deepspeech_training.util.text import Alphabet
 from glob import glob
 from multiprocessing import Pool
 import progressbar
 from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
 from deepspeech_training.util.importers import (
    get_counter,
    get_imported_samples,
    get_importers_parser,
    get_validate_label,
    print_import_report
 )
 from deepspeech_training.util.text import Alphabet
 FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
 SAMPLE_RATE = 16000
 MAX_SECS = 15
--- a/bin/import_magicdata.py
+++ b/bin/import_magicdata.py
@ -3,10 +3,11 @@ from __future__ import absolute_import, division, print_function
 import glob
 import os
 import pandas
 import tarfile
 import wave
 import pandas
 from deepspeech_training.util.importers import get_importers_parser
 COLUMN_NAMES = ['wav_filename', 'wav_filesize', 'transcript']
--- a/bin/import_primewords.py
+++ b/bin/import_primewords.py
@ -3,11 +3,12 @@ from __future__ import absolute_import, division, print_function
 import glob
 import json
 import numpy as np
 import os
 import pandas
 import tarfile
 import numpy as np
 import pandas
 from deepspeech_training.util.importers import get_importers_parser
 COLUMN_NAMES = ['wav_filename', 'wav_filesize', 'transcript']
--- a/bin/import_slr57.py
+++ b/bin/import_slr57.py
@ -3,20 +3,26 @@ from __future__ import absolute_import, division, print_function
 import csv
 import os
 import progressbar
 import re
 import sox
 import subprocess
 import tarfile
 import unicodedata
 import zipfile
 from deepspeech_training.util.downloader import maybe_download, SIMPLE_BAR
 from deepspeech_training.util.importers import get_importers_parser, get_validate_label, get_counter, get_imported_samples, print_import_report
 from deepspeech_training.util.text import Alphabet
 from glob import glob
 from multiprocessing import Pool
 import progressbar
 import sox
 from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
 from deepspeech_training.util.importers import (
    get_counter,
    get_imported_samples,
    get_importers_parser,
    get_validate_label,
    print_import_report
 )
 from deepspeech_training.util.text import Alphabet
 FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
 SAMPLE_RATE = 16000
--- a/bin/import_swb.py
+++ b/bin/import_swb.py
@ -6,19 +6,20 @@ from __future__ import absolute_import, division, print_function
 # from the deepspeech directory run with: ./bin/import_swb.py ./data/swb/
 import codecs
 import fnmatch
 import librosa
 import os
 import pandas
 import requests
 import soundfile # <= Has an external dependency on libsndfile
 import subprocess
 import sys
 import tarfile
 import unicodedata
 import wave
-from deepspeech_training.util.importers import validate_label_eng as validate_label
+import librosa
 import pandas
 import requests
 import soundfile  # <= Has an external dependency on libsndfile
 from deepspeech_training.util.importers import \
    validate_label_eng as validate_label
 # ARCHIVE_NAME refers to ISIP alignments from 01/29/03
 ARCHIVE_NAME = 'switchboard_word_alignments.tar.gz'
--- a/bin/import_swc.py
+++ b/bin/import_swc.py
@ -8,23 +8,25 @@ from __future__ import absolute_import, division, print_function
 import argparse
 import csv
 import os
 import progressbar
 import random
 import re
 import shutil
 import sox
 import sys
 import tarfile
 import unicodedata
 import wave
 import xml.etree.cElementTree as ET
 from glob import glob
 from collections import Counter
 from glob import glob
 from multiprocessing.pool import ThreadPool
 import progressbar
 import sox
 from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
 from deepspeech_training.util.importers import \
    validate_label_eng as validate_label
 from deepspeech_training.util.text import Alphabet
 from deepspeech_training.util.importers import validate_label_eng as validate_label
 from deepspeech_training.util.downloader import maybe_download, SIMPLE_BAR
 SWC_URL = "https://www2.informatik.uni-hamburg.de/nats/pub/SWC/SWC_{language}.tar"
 SWC_ARCHIVE = "SWC_{language}.tar"
--- a/bin/import_ted.py
+++ b/bin/import_ted.py
@ -1,17 +1,18 @@
 #!/usr/bin/env python
 from __future__ import absolute_import, division, print_function
 import pandas
 import sys
 import tarfile
 import unicodedata
 import wave
 from glob import glob
 from os import makedirs, path, remove, rmdir
 import pandas
 from sox import Transformer
 from deepspeech_training.util.downloader import maybe_download
 from tensorflow.python.platform import gfile
 from deepspeech_training.util.downloader import maybe_download
 from deepspeech_training.util.stm import parse_stm_file
--- a/bin/import_timit.py
+++ b/bin/import_timit.py
@ -11,13 +11,15 @@
 '''
 import errno
 import fnmatch
 import os
-from os import path
+import subprocess
 import sys
 import tarfile
-import fnmatch
+from os import path
 import pandas as pd
-import subprocess
+
 def clean(word):
    # LC ALL & strip punctuation which are not required
--- a/bin/import_ts.py
+++ b/bin/import_ts.py
@ -3,18 +3,23 @@ from __future__ import absolute_import, division, print_function
 import csv
 import os
 import progressbar
 import re
 import sox
 import subprocess
 import unidecode
 import zipfile
 from deepspeech_training.util.downloader import maybe_download
 from deepspeech_training.util.downloader import SIMPLE_BAR
 from deepspeech_training.util.importers import get_importers_parser, get_validate_label, get_counter, get_imported_samples, print_import_report
 from multiprocessing import Pool
 import progressbar
 import sox
 import unidecode
 from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
 from deepspeech_training.util.importers import (
    get_counter,
    get_imported_samples,
    get_importers_parser,
    get_validate_label,
    print_import_report
 )
 FIELDNAMES = ['wav_filename', 'wav_filesize', 'transcript']
 SAMPLE_RATE = 16000
--- a/bin/import_tuda.py
+++ b/bin/import_tuda.py
@ -8,15 +8,17 @@ from __future__ import absolute_import, division, print_function
 import argparse
 import csv
 import os
 import progressbar
 import tarfile
 import unicodedata
 import wave
 import xml.etree.cElementTree as ET
 from collections import Counter
-from deepspeech_training.util.downloader import maybe_download, SIMPLE_BAR
+
-from deepspeech_training.util.importers import validate_label_eng as validate_label
+import progressbar
 from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
 from deepspeech_training.util.importers import \
    validate_label_eng as validate_label
 from deepspeech_training.util.text import Alphabet
 TUDA_VERSION = 'v2'
--- a/bin/import_vctk.py
+++ b/bin/import_vctk.py
@ -4,17 +4,21 @@
 # as per https://homepages.inf.ed.ac.uk/jyamagis/page3/page58/page58.html
 from __future__ import absolute_import, division, print_function
 import librosa
 import os
 import progressbar
 import random
 import re
 from deepspeech_training.util.downloader import maybe_download, SIMPLE_BAR
 from deepspeech_training.util.importers import get_counter, get_imported_samples, print_import_report
 from multiprocessing import Pool
 from zipfile import ZipFile
 import librosa
 import progressbar
 from deepspeech_training.util.downloader import SIMPLE_BAR, maybe_download
 from deepspeech_training.util.importers import (
    get_counter,
    get_imported_samples,
    print_import_report
 )
 SAMPLE_RATE = 16000
 MAX_SECS = 10
--- a/bin/import_voxforge.py
+++ b/bin/import_voxforge.py
@ -3,20 +3,21 @@ from __future__ import absolute_import, division, print_function
 import codecs
 import os
 import pandas
 import re
 import tarfile
 import threading
 import unicodedata
 from bs4 import BeautifulSoup
 from deepspeech_training.util.downloader import maybe_download
 from glob import glob
 from multiprocessing.pool import ThreadPool
 from os import makedirs, path
 import pandas
 from bs4 import BeautifulSoup
 from six.moves import urllib
 from tensorflow.python.platform import gfile
 from deepspeech_training.util.downloader import maybe_download
 """The number of jobs to run in parallel"""
 NUM_PARALLEL = 8
@ -188,7 +189,3 @@ def _generate_dataset(data_dir, data_set):
 if __name__=="__main__":
    _download_and_preprocess_data(sys.argv[1])
--- a/bin/ops_in_graph.py
+++ b/bin/ops_in_graph.py
@ -1,9 +1,11 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 import tensorflow.compat.v1 as tfv1
 import sys
 import tensorflow.compat.v1 as tfv1
 def main():
    with tfv1.gfile.FastGFile(sys.argv[1], 'rb') as fin:
        graph_def = tfv1.GraphDef()
--- a/bin/play.py
+++ b/bin/play.py
@ -10,7 +10,10 @@ import random
 import sys
 from deepspeech_training.util.audio import AUDIO_TYPE_PCM
-from deepspeech_training.util.sample_collections import samples_from_file, LabeledSample
+from deepspeech_training.util.sample_collections import (
    LabeledSample,
    samples_from_file
 )
 def play_sample(samples, index):