Add universal is_remote_path to I/O helper

This commit is contained in:
CatalinVoss 2020-11-12 14:16:37 -08:00
parent c3dc4c0d5c
commit 3d503bd69e

View File

@ -1,23 +1,41 @@
""" """
A set of I/O utils that allow us to open files on remote storage as if they were present locally. A set of I/O utils that allow us to open files on remote storage as if they were present locally and access
into HDFS storage using Tensorflow's C++ FileStream API.
Currently only includes wrappers for Google's GCS, but this can easily be expanded for AWS S3 buckets. Currently only includes wrappers for Google's GCS, but this can easily be expanded for AWS S3 buckets.
""" """
import inspect import inspect
import os import os
import sys import sys
def is_remote_path(path):
"""
Returns True iff the path is one of the remote formats that this
module supports
"""
return path.startswith('gs://') or path.starts_with('hdfs://')
def path_exists_remote(path): def path_exists_remote(path):
""" """
Wrapper that allows existance check of local and remote paths like Wrapper that allows existance check of local and remote paths like
`gs://...` `gs://...`
""" """
# Conditional import # Conditional import
if path.startswith("gs://"): if is_remote_path(path):
from tensorflow.io import gfile from tensorflow.io import gfile
return gfile.exists(path) return gfile.exists(path)
return os.path.exists(path) return os.path.exists(path)
def copy_remote(src, dst, overwrite=False):
"""
Allows us to copy a file from local to remote or vice versa
"""
from tensorflow.io import gfile
return gfile.copy(src, dst, overwrite)
def open_remote(path, mode): def open_remote(path, mode):
""" """
Wrapper around open_remote() method that can handle remote paths like `gs://...` Wrapper around open_remote() method that can handle remote paths like `gs://...`
@ -28,7 +46,7 @@ def open_remote(path, mode):
do something with the file f, whether or not we have local access to it do something with the file f, whether or not we have local access to it
""" """
# Conditional import # Conditional import
if path.startswith("gs://"): if is_remote_path(path):
from tensorflow.io import gfile from tensorflow.io import gfile
return gfile.GFile(path, mode=mode) return gfile.GFile(path, mode=mode)
return open_remote(path, mode) return open_remote(path, mode)
@ -39,7 +57,7 @@ def isdir_remote(path):
Wrapper to check if remote and local paths are directories Wrapper to check if remote and local paths are directories
""" """
# Conditional import # Conditional import
if path.startswith("gs://"): if is_remote_path(path):
from tensorflow.io import gfile from tensorflow.io import gfile
return gfile.isdir(path) return gfile.isdir(path)
return os.path.isdir(path) return os.path.isdir(path)
@ -50,7 +68,7 @@ def listdir_remote(path):
Wrapper to list paths in local dirs (alternative to using a glob, I suppose) Wrapper to list paths in local dirs (alternative to using a glob, I suppose)
""" """
# Conditional import # Conditional import
if path.startswith("gs://"): if is_remote_path(path):
from tensorflow.io import gfile from tensorflow.io import gfile
return gfile.listdir(path) return gfile.listdir(path)
return os.listdir(path) return os.listdir(path)