diff --git a/training/deepspeech_training/util/audio.py b/training/deepspeech_training/util/audio.py index 04e99fbd..15c7743c 100644 --- a/training/deepspeech_training/util/audio.py +++ b/training/deepspeech_training/util/audio.py @@ -120,7 +120,10 @@ class Sample: def _unpack_and_change_audio_type(sample_and_audio_type): packed_sample, audio_type, bitrate = sample_and_audio_type - sample = packed_sample.unpack() + if hasattr(sample, 'unpack'): + sample = packed_sample.unpack() + else: + sample = packed_sample sample.change_audio_type(audio_type, bitrate=bitrate) return sample diff --git a/training/deepspeech_training/util/augmentations.py b/training/deepspeech_training/util/augmentations.py index 0934fbd5..79aa5750 100644 --- a/training/deepspeech_training/util/augmentations.py +++ b/training/deepspeech_training/util/augmentations.py @@ -152,7 +152,10 @@ def _init_augmentation_worker(preparation_context): def _load_and_augment_sample(timed_sample, context=None): sample, clock = timed_sample - realized_sample = sample.unpack() + if hasattr(sample, 'unpack'): + realized_sample = sample.unpack() + else: + realized_sample = sample return _augment_sample((realized_sample, clock), context)