From b4ba1cc89762213badea49807757f6788bb94bb2 Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Wed, 8 Jul 2020 00:18:25 +0200 Subject: [PATCH 1/4] Support path-like objects in FileIO and GFile [Path-like objects](https://docs.python.org/3/glossary.html#term-path-like-object) as used in [`pathlib`](https://docs.python.org/3.8/library/pathlib.html?highlight=pathlib#module-pathlib) are the recommended way to interact with file paths in Python. This PR changes `FileIO` to allow for path-like objects which would otherwise raise an exception when passed to `compat.as_bytes`. `FileIO` and `GFile` emulate the API of [Python's builtin `open`](https://docs.python.org/3.8/library/functions.html#open) this possibility will be expected by users and should make switching between `open` and `GFile` easier. --- tensorflow/python/lib/io/file_io.py | 5 +++-- tensorflow/python/lib/io/file_io_test.py | 5 +++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/lib/io/file_io.py b/tensorflow/python/lib/io/file_io.py index c904cba08f8..8c259852ccd 100644 --- a/tensorflow/python/lib/io/file_io.py +++ b/tensorflow/python/lib/io/file_io.py @@ -38,7 +38,8 @@ class FileIO(object): """FileIO class that exposes methods to read / write to / from files. The constructor takes the following arguments: - name: name of the file + name: [path-like object](https://docs.python.org/3/glossary.html#term-path-like-object) + giving the pathname of the file to be opened. mode: one of `r`, `w`, `a`, `r+`, `w+`, `a+`. Append `b` for bytes mode. Can be used as an iterator to iterate over lines in the file. @@ -48,7 +49,7 @@ class FileIO(object): """ def __init__(self, name, mode): - self.__name = name + self.__name = compat.path_to_str(name) self.__mode = mode self._read_buf = None self._writable_file = None diff --git a/tensorflow/python/lib/io/file_io_test.py b/tensorflow/python/lib/io/file_io_test.py index 45de79fd0c7..33dbfc73474 100644 --- a/tensorflow/python/lib/io/file_io_test.py +++ b/tensorflow/python/lib/io/file_io_test.py @@ -20,6 +20,7 @@ from __future__ import division from __future__ import print_function import os.path +import pathlib import numpy as np @@ -43,6 +44,10 @@ class FileIoTest(test.TestCase): with self.assertRaises(errors.NotFoundError): _ = f.read() + def testPathLike(self): + f = file_io.FileIO(pathlib.Path("temp_file"), mode="w") + self.assertEqual(f.name, "temp_file") + def testFileDoesntExist(self): file_path = os.path.join(self._base_dir, "temp_file") self.assertFalse(file_io.file_exists(file_path)) From 575632d3ebb1742bb17bfa0f1bde69b8b7f8c619 Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Wed, 8 Jul 2020 02:52:42 +0200 Subject: [PATCH 2/4] Support path-like objects in gfile.* --- tensorflow/python/lib/io/file_io.py | 24 +++---- tensorflow/python/lib/io/file_io_test.py | 86 ++++++++++++++---------- tensorflow/python/util/compat.py | 23 +++++++ 3 files changed, 87 insertions(+), 46 deletions(-) diff --git a/tensorflow/python/lib/io/file_io.py b/tensorflow/python/lib/io/file_io.py index 8c259852ccd..146b9122d59 100644 --- a/tensorflow/python/lib/io/file_io.py +++ b/tensorflow/python/lib/io/file_io.py @@ -265,7 +265,7 @@ def file_exists_v2(path): errors.OpError: Propagates any errors reported by the FileSystem API. """ try: - _pywrap_file_io.FileExists(compat.as_bytes(path)) + _pywrap_file_io.FileExists(compat.path_to_bytes(path)) except errors.NotFoundError: return False return True @@ -296,7 +296,7 @@ def delete_file_v2(path): errors.OpError: Propagates any errors reported by the FileSystem API. E.g., `NotFoundError` if the path does not exist. """ - _pywrap_file_io.DeleteFile(compat.as_bytes(path)) + _pywrap_file_io.DeleteFile(compat.path_to_bytes(path)) def read_file_to_string(filename, binary_mode=False): @@ -448,7 +448,7 @@ def create_dir_v2(path): Raises: errors.OpError: If the operation fails. """ - _pywrap_file_io.CreateDir(compat.as_bytes(path)) + _pywrap_file_io.CreateDir(compat.path_to_bytes(path)) @tf_export(v1=["gfile.MakeDirs"]) @@ -478,7 +478,7 @@ def recursive_create_dir_v2(path): Raises: errors.OpError: If the operation fails. """ - _pywrap_file_io.RecursivelyCreateDir(compat.as_bytes(path)) + _pywrap_file_io.RecursivelyCreateDir(compat.path_to_bytes(path)) @tf_export(v1=["gfile.Copy"]) @@ -511,7 +511,7 @@ def copy_v2(src, dst, overwrite=False): errors.OpError: If the operation fails. """ _pywrap_file_io.CopyFile( - compat.as_bytes(src), compat.as_bytes(dst), overwrite) + compat.path_to_bytes(src), compat.path_to_bytes(dst), overwrite) @tf_export(v1=["gfile.Rename"]) @@ -544,7 +544,7 @@ def rename_v2(src, dst, overwrite=False): errors.OpError: If the operation fails. """ _pywrap_file_io.RenameFile( - compat.as_bytes(src), compat.as_bytes(dst), overwrite) + compat.path_to_bytes(src), compat.path_to_bytes(dst), overwrite) def atomic_write_string_to_file(filename, contents, overwrite=True): @@ -597,7 +597,7 @@ def delete_recursively_v2(path): Raises: errors.OpError: If the operation fails. """ - _pywrap_file_io.DeleteRecursively(compat.as_bytes(path)) + _pywrap_file_io.DeleteRecursively(compat.path_to_bytes(path)) @tf_export(v1=["gfile.IsDirectory"]) @@ -624,7 +624,7 @@ def is_directory_v2(path): True, if the path is a directory; False otherwise """ try: - return _pywrap_file_io.IsDirectory(compat.as_bytes(path)) + return _pywrap_file_io.IsDirectory(compat.path_to_bytes(path)) except errors.OpError: return False @@ -647,7 +647,7 @@ def has_atomic_move(path): not to use temporary locations in this case. """ try: - return _pywrap_file_io.HasAtomicMove(compat.as_bytes(path)) + return _pywrap_file_io.HasAtomicMove(compat.path_to_bytes(path)) except errors.OpError: # defaults to True return True @@ -698,7 +698,7 @@ def list_directory_v2(path): # vector of string should be interpreted as strings, not bytes. return [ compat.as_str_any(filename) - for filename in _pywrap_file_io.GetChildren(compat.as_bytes(path)) + for filename in _pywrap_file_io.GetChildren(compat.path_to_bytes(path)) ] @@ -746,7 +746,7 @@ def walk_v2(top, topdown=True, onerror=None): return "".join([os.path.join(parent, ""), item]) return os.path.join(parent, item) - top = compat.as_str_any(top) + top = compat.as_str_any(compat.path_to_str(top)) try: listing = list_directory(top) except errors.NotFoundError as err: @@ -807,7 +807,7 @@ def stat_v2(path): Raises: errors.OpError: If the operation fails. """ - return _pywrap_file_io.Stat(path) + return _pywrap_file_io.Stat(compat.path_to_str(path)) def filecmp(filename_a, filename_b): diff --git a/tensorflow/python/lib/io/file_io_test.py b/tensorflow/python/lib/io/file_io_test.py index 33dbfc73474..d3cb7b980d3 100644 --- a/tensorflow/python/lib/io/file_io_test.py +++ b/tensorflow/python/lib/io/file_io_test.py @@ -22,6 +22,7 @@ from __future__ import print_function import os.path import pathlib +from absl.testing import parameterized import numpy as np from tensorflow.python.framework import errors @@ -30,7 +31,12 @@ from tensorflow.python.platform import gfile from tensorflow.python.platform import test -class FileIoTest(test.TestCase): +run_all_path_types = parameterized.named_parameters( + ("str", os.path.join), + ("pathlib", lambda *paths: pathlib.Path(os.path.join(*paths)))) + + +class FileIoTest(test.TestCase, parameterized.TestCase): def setUp(self): self._base_dir = os.path.join(self.get_temp_dir(), "base_dir") @@ -48,14 +54,16 @@ class FileIoTest(test.TestCase): f = file_io.FileIO(pathlib.Path("temp_file"), mode="w") self.assertEqual(f.name, "temp_file") - def testFileDoesntExist(self): - file_path = os.path.join(self._base_dir, "temp_file") + @run_all_path_types + def testFileDoesntExist(self, join): + file_path = join(self._base_dir, "temp_file") self.assertFalse(file_io.file_exists(file_path)) with self.assertRaises(errors.NotFoundError): _ = file_io.read_file_to_string(file_path) - def testWriteToString(self): - file_path = os.path.join(self._base_dir, "temp_file") + @run_all_path_types + def testWriteToString(self, join): + file_path = join(self._base_dir, "temp_file") file_io.write_string_to_file(file_path, "testing") self.assertTrue(file_io.file_exists(file_path)) file_contents = file_io.read_file_to_string(file_path) @@ -133,8 +141,9 @@ class FileIoTest(test.TestCase): with self.assertRaises(errors.PermissionDeniedError): file_io.FileIO(file_path, mode="w").read() - def testFileDelete(self): - file_path = os.path.join(self._base_dir, "temp_file") + @run_all_path_types + def testFileDelete(self, join): + file_path = join(self._base_dir, "temp_file") file_io.FileIO(file_path, mode="w").write("testing") file_io.delete_file(file_path) self.assertFalse(file_io.file_exists(file_path)) @@ -176,8 +185,9 @@ class FileIoTest(test.TestCase): self.assertItemsEqual( file_io.get_matching_files(glob_pattern), expected_match) - def testCreateRecursiveDir(self): - dir_path = os.path.join(self._base_dir, "temp_dir/temp_dir1/temp_dir2") + @run_all_path_types + def testCreateRecursiveDir(self, join): + dir_path = join(self._base_dir, "temp_dir/temp_dir1/temp_dir2") file_io.recursive_create_dir(dir_path) file_io.recursive_create_dir(dir_path) # repeat creation file_path = os.path.join(dir_path, "temp_file") @@ -186,10 +196,11 @@ class FileIoTest(test.TestCase): file_io.delete_recursively(os.path.join(self._base_dir, "temp_dir")) self.assertFalse(file_io.file_exists(file_path)) - def testCopy(self): - file_path = os.path.join(self._base_dir, "temp_file") + @run_all_path_types + def testCopy(self, join): + file_path = join(self._base_dir, "temp_file") file_io.FileIO(file_path, mode="w").write("testing") - copy_path = os.path.join(self._base_dir, "copy_file") + copy_path = join(self._base_dir, "copy_file") file_io.copy(file_path, copy_path) self.assertTrue(file_io.file_exists(copy_path)) f = file_io.FileIO(file_path, mode="r") @@ -213,10 +224,11 @@ class FileIoTest(test.TestCase): with self.assertRaises(errors.AlreadyExistsError): file_io.copy(file_path, copy_path, overwrite=False) - def testRename(self): - file_path = os.path.join(self._base_dir, "temp_file") + @run_all_path_types + def testRename(self, join): + file_path = join(self._base_dir, "temp_file") file_io.FileIO(file_path, mode="w").write("testing") - rename_path = os.path.join(self._base_dir, "rename_file") + rename_path = join(self._base_dir, "rename_file") file_io.rename(file_path, rename_path) self.assertTrue(file_io.file_exists(rename_path)) self.assertFalse(file_io.file_exists(file_path)) @@ -245,13 +257,14 @@ class FileIoTest(test.TestCase): with self.assertRaises(errors.NotFoundError): file_io.delete_recursively(fake_dir_path) - def testIsDirectory(self): - dir_path = os.path.join(self._base_dir, "test_dir") + @run_all_path_types + def testIsDirectory(self, join): + dir_path = join(self._base_dir, "test_dir") # Failure for a non-existing dir. self.assertFalse(file_io.is_directory(dir_path)) file_io.create_dir(dir_path) self.assertTrue(file_io.is_directory(dir_path)) - file_path = os.path.join(dir_path, "test_file") + file_path = join(dir_path, "test_file") file_io.FileIO(file_path, mode="w").write("test") # False for a file. self.assertFalse(file_io.is_directory(file_path)) @@ -259,16 +272,17 @@ class FileIoTest(test.TestCase): file_statistics = file_io.stat(dir_path) self.assertTrue(file_statistics.is_directory) - def testListDirectory(self): - dir_path = os.path.join(self._base_dir, "test_dir") + @run_all_path_types + def testListDirectory(self, join): + dir_path = join(self._base_dir, "test_dir") file_io.create_dir(dir_path) files = ["file1.txt", "file2.txt", "file3.txt"] for name in files: - file_path = os.path.join(dir_path, name) + file_path = join(dir_path, name) file_io.FileIO(file_path, mode="w").write("testing") - subdir_path = os.path.join(dir_path, "sub_dir") + subdir_path = join(dir_path, "sub_dir") file_io.create_dir(subdir_path) - subdir_file_path = os.path.join(subdir_path, "file4.txt") + subdir_file_path = join(subdir_path, "file4.txt") file_io.FileIO(subdir_file_path, mode="w").write("testing") dir_list = file_io.list_directory(dir_path) self.assertItemsEqual(files + ["sub_dir"], dir_list) @@ -294,8 +308,10 @@ class FileIoTest(test.TestCase): mode="w").write("testing") file_io.create_dir(os.path.join(dir_path, "subdir1_2/subdir2")) - def testWalkInOrder(self): - dir_path = os.path.join(self._base_dir, "test_dir") + @run_all_path_types + def testWalkInOrder(self, join): + dir_path_str = os.path.join(self._base_dir, "test_dir") + dir_path = join(self._base_dir, "test_dir") self._setupWalkDirectories(dir_path) # Now test the walk (in_order = True) all_dirs = [] @@ -305,15 +321,15 @@ class FileIoTest(test.TestCase): all_dirs.append(w_dir) all_subdirs.append(w_subdirs) all_files.append(w_files) - self.assertItemsEqual(all_dirs, [dir_path] + [ - os.path.join(dir_path, item) + self.assertItemsEqual(all_dirs, [dir_path_str] + [ + os.path.join(dir_path_str, item) for item in ["subdir1_1", "subdir1_2", "subdir1_2/subdir2", "subdir1_3"] ]) - self.assertEqual(dir_path, all_dirs[0]) + self.assertEqual(dir_path_str, all_dirs[0]) self.assertLess( - all_dirs.index(os.path.join(dir_path, "subdir1_2")), - all_dirs.index(os.path.join(dir_path, "subdir1_2/subdir2"))) + all_dirs.index(os.path.join(dir_path_str, "subdir1_2")), + all_dirs.index(os.path.join(dir_path_str, "subdir1_2/subdir2"))) self.assertItemsEqual(all_subdirs[1:5], [[], ["subdir2"], [], []]) self.assertItemsEqual(all_subdirs[0], ["subdir1_1", "subdir1_2", "subdir1_3"]) @@ -362,8 +378,9 @@ class FileIoTest(test.TestCase): self.assertItemsEqual(all_subdirs, []) self.assertItemsEqual(all_files, []) - def testStat(self): - file_path = os.path.join(self._base_dir, "temp_file") + @run_all_path_types + def testStat(self, join): + file_path = join(self._base_dir, "temp_file") file_io.FileIO(file_path, mode="w").write("testing") file_statistics = file_io.stat(file_path) os_statistics = os.stat(file_path) @@ -517,8 +534,9 @@ class FileIoTest(test.TestCase): f.flush() self.assertEqual(content, f.read(len(content) + 1)) - def testUTF8StringPathExists(self): - file_path = os.path.join(self._base_dir, "UTF8测试_file_exist") + @run_all_path_types + def testUTF8StringPathExists(self, join): + file_path = join(self._base_dir, "UTF8测试_file_exist") file_io.write_string_to_file(file_path, "testing") v = file_io.file_exists(file_path) self.assertEqual(v, True) diff --git a/tensorflow/python/util/compat.py b/tensorflow/python/util/compat.py index b64cc2b5edf..418449ca819 100644 --- a/tensorflow/python/util/compat.py +++ b/tensorflow/python/util/compat.py @@ -180,6 +180,29 @@ def path_to_str(path): return path +def path_to_bytes(path): + r"""Converts input which is a `PathLike` object to `bytes`. + + Converts from any python constant representation of a `PathLike` object + or `str` to bytes. + + Args: + path: An object that can be converted to path representation. + + Returns: + A `bytes` object. + + Usage: + In case a simplified `bytes` version of the path is needed from an + `os.PathLike` object + + ``` + """ + if hasattr(path, '__fspath__'): + path = path.__fspath__() + return as_bytes(path) + + # Numpy 1.8 scalars don't inherit from numbers.Integral in Python 3, so we # need to check them specifically. The same goes from Real and Complex. integral_types = (_numbers.Integral, _np.integer) From 7fe25a58724408a997dd97ba4933713c645200a4 Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Wed, 8 Jul 2020 14:27:10 +0200 Subject: [PATCH 3/4] Fix docstring --- tensorflow/python/util/compat.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/python/util/compat.py b/tensorflow/python/util/compat.py index 418449ca819..c7e39895f0c 100644 --- a/tensorflow/python/util/compat.py +++ b/tensorflow/python/util/compat.py @@ -195,8 +195,6 @@ def path_to_bytes(path): Usage: In case a simplified `bytes` version of the path is needed from an `os.PathLike` object - - ``` """ if hasattr(path, '__fspath__'): path = path.__fspath__() From 5ef85fc6c826501aa852aed8228ec5dd8beeb07c Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Thu, 9 Jul 2020 00:01:23 +0200 Subject: [PATCH 4/4] Convert path to string right before passing it to the filesystem layer --- tensorflow/python/lib/io/file_io.py | 6 +++--- tensorflow/python/lib/io/file_io_test.py | 14 ++++++-------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/tensorflow/python/lib/io/file_io.py b/tensorflow/python/lib/io/file_io.py index 146b9122d59..dbf04097fce 100644 --- a/tensorflow/python/lib/io/file_io.py +++ b/tensorflow/python/lib/io/file_io.py @@ -49,7 +49,7 @@ class FileIO(object): """ def __init__(self, name, mode): - self.__name = compat.path_to_str(name) + self.__name = name self.__mode = mode self._read_buf = None self._writable_file = None @@ -77,7 +77,7 @@ class FileIO(object): raise errors.PermissionDeniedError(None, None, "File isn't open for reading") self._read_buf = _pywrap_file_io.BufferedInputStream( - self.__name, 1024 * 512) + compat.path_to_str(self.__name), 1024 * 512) def _prewrite_check(self): if not self._writable_file: @@ -85,7 +85,7 @@ class FileIO(object): raise errors.PermissionDeniedError(None, None, "File isn't open for writing") self._writable_file = _pywrap_file_io.WritableFile( - compat.as_bytes(self.__name), compat.as_bytes(self.__mode)) + compat.path_to_bytes(self.__name), compat.as_bytes(self.__mode)) def _prepare_value(self, val): if self._binary_mode: diff --git a/tensorflow/python/lib/io/file_io_test.py b/tensorflow/python/lib/io/file_io_test.py index d3cb7b980d3..d049fe84f4f 100644 --- a/tensorflow/python/lib/io/file_io_test.py +++ b/tensorflow/python/lib/io/file_io_test.py @@ -50,10 +50,6 @@ class FileIoTest(test.TestCase, parameterized.TestCase): with self.assertRaises(errors.NotFoundError): _ = f.read() - def testPathLike(self): - f = file_io.FileIO(pathlib.Path("temp_file"), mode="w") - self.assertEqual(f.name, "temp_file") - @run_all_path_types def testFileDoesntExist(self, join): file_path = join(self._base_dir, "temp_file") @@ -88,14 +84,16 @@ class FileIoTest(test.TestCase, parameterized.TestCase): file_contents = file_io.read_file_to_string(file_path) self.assertEqual("new", file_contents) - def testReadBinaryMode(self): - file_path = os.path.join(self._base_dir, "temp_file") + @run_all_path_types + def testReadBinaryMode(self, join): + file_path = join(self._base_dir, "temp_file") file_io.write_string_to_file(file_path, "testing") with file_io.FileIO(file_path, mode="rb") as f: self.assertEqual(b"testing", f.read()) - def testWriteBinaryMode(self): - file_path = os.path.join(self._base_dir, "temp_file") + @run_all_path_types + def testWriteBinaryMode(self, join): + file_path = join(self._base_dir, "temp_file") file_io.FileIO(file_path, "wb").write("testing") with file_io.FileIO(file_path, mode="r") as f: self.assertEqual("testing", f.read())