Fix remote path handling for CSV sample reading
This commit is contained in:
		
							parent
							
								
									8fe972eb6f
								
							
						
					
					
						commit
						86cba458c5
					
				| @ -18,7 +18,7 @@ from .audio import ( | ||||
|     get_audio_type_from_extension, | ||||
|     write_wav | ||||
| ) | ||||
| from .io import open_remote | ||||
| from .io import open_remote, is_remote_path | ||||
| 
 | ||||
| BIG_ENDIAN = 'big' | ||||
| INT_SIZE = 4 | ||||
| @ -499,7 +499,6 @@ class CSV(SampleList): | ||||
|             If the order of the samples should be reversed | ||||
|         """ | ||||
|         rows = [] | ||||
|         csv_dir = Path(csv_filename).parent | ||||
|         with open_remote(csv_filename, 'r', encoding='utf8') as csv_file: | ||||
|             reader = csv.DictReader(csv_file) | ||||
|             if 'transcript' in reader.fieldnames: | ||||
| @ -509,9 +508,12 @@ class CSV(SampleList): | ||||
|                 raise RuntimeError('No transcript data (missing CSV column)') | ||||
|             for row in reader: | ||||
|                 wav_filename = Path(row['wav_filename']) | ||||
|                 if not wav_filename.is_absolute(): | ||||
|                     wav_filename = csv_dir / wav_filename | ||||
|                 if not wav_filename.is_absolute() and not is_remote_path(row['wav_filename']): | ||||
|                     wav_filename = Path(csv_filename).parent / wav_filename | ||||
|                     wav_filename = str(wav_filename) | ||||
|                 else: | ||||
|                     # Pathlib otherwise removes a / from filenames like hdfs:// | ||||
|                     wav_filename = row['wav_filename'] | ||||
|                 wav_filesize = int(row['wav_filesize']) if 'wav_filesize' in row else 0 | ||||
|                 if labeled: | ||||
|                     rows.append((wav_filename, wav_filesize, row['transcript'])) | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user