From 56f4edbb5a78ae9d29d797f39e107a28c9bcf6e5 Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Wed, 25 Mar 2020 17:46:09 -0700 Subject: [PATCH] Fix `GetMatchingPaths` with special chars bug. After 5659465166daa218168c1f50f1d63c30f9f2bbd9, `GetMatchingPaths` was converted to use RE2 instead of `fnmatch` as that allows non-local filesystems (e.g., GCS, Hadoop, S3) to also be used from Windows. However, this breaks compatibility between `tf.io.gfile.glob` and Python `glob` and that results in tests silently failing or examples being silently skipped during training. The fix is two-pronged. First, to fix #37758 only, we add regexp replacements for `(` and `)` in the pattern, escaping them before matching. After testing and seeing that this works, we then re-enable `fnmatch` on POSIX environments to reduce binary size, just like we did for mobile platforms. Fixes #37758 (everywhere) and tensorflow/tensorboard#3260 (on posix platforms). Tested via `bazel run //tensorflow/python:file_io_test` after adding a test for the pattern in #37758. Will need to be cherry-picked onto `r2.2` branch. PiperOrigin-RevId: 303009914 Change-Id: Ieab047f63e9ba6bb0ec0499e0fa864f6ca6090ff --- tensorflow/core/platform/file_system.cc | 24 ++++++++++++++---------- tensorflow/python/lib/io/file_io_test.py | 12 ++++++++++++ 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/platform/file_system.cc b/tensorflow/core/platform/file_system.cc index 7d8c6aefc33..9e96ceedbdc 100644 --- a/tensorflow/core/platform/file_system.cc +++ b/tensorflow/core/platform/file_system.cc @@ -16,9 +16,6 @@ limitations under the License. #include "tensorflow/core/platform/file_system.h" #include -#if defined(IS_MOBILE_PLATFORM) -#include -#endif #include #include @@ -26,12 +23,15 @@ limitations under the License. #include #include +#if defined(PLATFORM_POSIX) || defined(IS_MOBILE_PLATFORM) +#include +#else +#include "tensorflow/core/platform/regexp.h" +#endif // defined(PLATFORM_POSIX) || defined(IS_MOBILE_PLATFORM) + #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/errors.h" #include "tensorflow/core/platform/platform.h" -#if !defined(IS_MOBILE_PLATFORM) -#include "tensorflow/core/platform/regexp.h" -#endif #include "tensorflow/core/platform/scanner.h" #include "tensorflow/core/platform/str_util.h" #include "tensorflow/core/platform/strcat.h" @@ -39,16 +39,20 @@ limitations under the License. namespace tensorflow { bool FileSystem::Match(const string& filename, const string& pattern) { -#if defined(IS_MOBILE_PLATFORM) +#if defined(PLATFORM_POSIX) || defined(IS_MOBILE_PLATFORM) // We avoid relying on RE2 on mobile platforms, because it incurs a // significant binary size increase. + // For POSIX platforms, there is no need to depend on RE2 if `fnmatch` can be + // used safely. return fnmatch(pattern.c_str(), filename.c_str(), FNM_PATHNAME) == 0; #else string regexp(pattern); - RE2::GlobalReplace(®exp, "\\*", "[^/]*"); - RE2::GlobalReplace(®exp, "\\?", "."); + regexp = str_util::StringReplace(regexp, "*", "[^/]*", true); + regexp = str_util::StringReplace(regexp, "?", ".", true); + regexp = str_util::StringReplace(regexp, "(", "\\(", true); + regexp = str_util::StringReplace(regexp, ")", "\\)", true); return RE2::FullMatch(filename, regexp); -#endif +#endif // defined(PLATFORM_POSIX) || defined(IS_MOBILE_PLATFORM) } string FileSystem::TranslateName(const string& name) const { diff --git a/tensorflow/python/lib/io/file_io_test.py b/tensorflow/python/lib/io/file_io_test.py index 2e42eb8fbe8..af4b2e9dd60 100644 --- a/tensorflow/python/lib/io/file_io_test.py +++ b/tensorflow/python/lib/io/file_io_test.py @@ -159,6 +159,18 @@ class FileIoTest(test.TestCase): file_io.delete_recursively(dir_path) self.assertFalse(file_io.file_exists(os.path.join(dir_path, "file3.txt"))) + def testGetMatchingFilesWhenParentDirContainsParantheses(self): + dir_path = os.path.join(self._base_dir, "dir_(special)") + file_io.create_dir(dir_path) + files = ["file1.txt", "file(2).txt"] + for name in files: + file_path = os.path.join(dir_path, name) + file_io.FileIO(file_path, mode="w").write("testing") + expected_match = [os.path.join(dir_path, name) for name in files] + glob_pattern = os.path.join(dir_path, "*") + self.assertItemsEqual( + file_io.get_matching_files(glob_pattern), expected_match) + def testCreateRecursiveDir(self): dir_path = os.path.join(self._base_dir, "temp_dir/temp_dir1/temp_dir2") file_io.recursive_create_dir(dir_path)