Fix GetMatchingPaths
with special chars bug.
After 5659465166
, `GetMatchingPaths` was converted to use RE2 instead of `fnmatch` as that allows non-local filesystems (e.g., GCS, Hadoop, S3) to also be used from Windows. However, this breaks compatibility between `tf.io.gfile.glob` and Python `glob` and that results in tests silently failing or examples being silently skipped during training.
The fix is two-pronged. First, to fix #37758 only, we add regexp replacements for `(` and `)` in the pattern, escaping them before matching. After testing and seeing that this works, we then re-enable `fnmatch` on POSIX environments to reduce binary size, just like we did for mobile platforms.
Fixes #37758 (everywhere) and tensorflow/tensorboard#3260 (on posix platforms).
Tested via `bazel run //tensorflow/python:file_io_test` after adding a test for the pattern in #37758.
Will need to be cherry-picked onto `r2.2` branch.
PiperOrigin-RevId: 303009914
Change-Id: Ieab047f63e9ba6bb0ec0499e0fa864f6ca6090ff
This commit is contained in:
parent
bfc51b2d23
commit
56f4edbb5a
@ -16,9 +16,6 @@ limitations under the License.
|
|||||||
#include "tensorflow/core/platform/file_system.h"
|
#include "tensorflow/core/platform/file_system.h"
|
||||||
|
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
#if defined(IS_MOBILE_PLATFORM)
|
|
||||||
#include <fnmatch.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <deque>
|
#include <deque>
|
||||||
@ -26,12 +23,15 @@ limitations under the License.
|
|||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#if defined(PLATFORM_POSIX) || defined(IS_MOBILE_PLATFORM)
|
||||||
|
#include <fnmatch.h>
|
||||||
|
#else
|
||||||
|
#include "tensorflow/core/platform/regexp.h"
|
||||||
|
#endif // defined(PLATFORM_POSIX) || defined(IS_MOBILE_PLATFORM)
|
||||||
|
|
||||||
#include "tensorflow/core/platform/env.h"
|
#include "tensorflow/core/platform/env.h"
|
||||||
#include "tensorflow/core/platform/errors.h"
|
#include "tensorflow/core/platform/errors.h"
|
||||||
#include "tensorflow/core/platform/platform.h"
|
#include "tensorflow/core/platform/platform.h"
|
||||||
#if !defined(IS_MOBILE_PLATFORM)
|
|
||||||
#include "tensorflow/core/platform/regexp.h"
|
|
||||||
#endif
|
|
||||||
#include "tensorflow/core/platform/scanner.h"
|
#include "tensorflow/core/platform/scanner.h"
|
||||||
#include "tensorflow/core/platform/str_util.h"
|
#include "tensorflow/core/platform/str_util.h"
|
||||||
#include "tensorflow/core/platform/strcat.h"
|
#include "tensorflow/core/platform/strcat.h"
|
||||||
@ -39,16 +39,20 @@ limitations under the License.
|
|||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
|
|
||||||
bool FileSystem::Match(const string& filename, const string& pattern) {
|
bool FileSystem::Match(const string& filename, const string& pattern) {
|
||||||
#if defined(IS_MOBILE_PLATFORM)
|
#if defined(PLATFORM_POSIX) || defined(IS_MOBILE_PLATFORM)
|
||||||
// We avoid relying on RE2 on mobile platforms, because it incurs a
|
// We avoid relying on RE2 on mobile platforms, because it incurs a
|
||||||
// significant binary size increase.
|
// significant binary size increase.
|
||||||
|
// For POSIX platforms, there is no need to depend on RE2 if `fnmatch` can be
|
||||||
|
// used safely.
|
||||||
return fnmatch(pattern.c_str(), filename.c_str(), FNM_PATHNAME) == 0;
|
return fnmatch(pattern.c_str(), filename.c_str(), FNM_PATHNAME) == 0;
|
||||||
#else
|
#else
|
||||||
string regexp(pattern);
|
string regexp(pattern);
|
||||||
RE2::GlobalReplace(®exp, "\\*", "[^/]*");
|
regexp = str_util::StringReplace(regexp, "*", "[^/]*", true);
|
||||||
RE2::GlobalReplace(®exp, "\\?", ".");
|
regexp = str_util::StringReplace(regexp, "?", ".", true);
|
||||||
|
regexp = str_util::StringReplace(regexp, "(", "\\(", true);
|
||||||
|
regexp = str_util::StringReplace(regexp, ")", "\\)", true);
|
||||||
return RE2::FullMatch(filename, regexp);
|
return RE2::FullMatch(filename, regexp);
|
||||||
#endif
|
#endif // defined(PLATFORM_POSIX) || defined(IS_MOBILE_PLATFORM)
|
||||||
}
|
}
|
||||||
|
|
||||||
string FileSystem::TranslateName(const string& name) const {
|
string FileSystem::TranslateName(const string& name) const {
|
||||||
|
@ -159,6 +159,18 @@ class FileIoTest(test.TestCase):
|
|||||||
file_io.delete_recursively(dir_path)
|
file_io.delete_recursively(dir_path)
|
||||||
self.assertFalse(file_io.file_exists(os.path.join(dir_path, "file3.txt")))
|
self.assertFalse(file_io.file_exists(os.path.join(dir_path, "file3.txt")))
|
||||||
|
|
||||||
|
def testGetMatchingFilesWhenParentDirContainsParantheses(self):
|
||||||
|
dir_path = os.path.join(self._base_dir, "dir_(special)")
|
||||||
|
file_io.create_dir(dir_path)
|
||||||
|
files = ["file1.txt", "file(2).txt"]
|
||||||
|
for name in files:
|
||||||
|
file_path = os.path.join(dir_path, name)
|
||||||
|
file_io.FileIO(file_path, mode="w").write("testing")
|
||||||
|
expected_match = [os.path.join(dir_path, name) for name in files]
|
||||||
|
glob_pattern = os.path.join(dir_path, "*")
|
||||||
|
self.assertItemsEqual(
|
||||||
|
file_io.get_matching_files(glob_pattern), expected_match)
|
||||||
|
|
||||||
def testCreateRecursiveDir(self):
|
def testCreateRecursiveDir(self):
|
||||||
dir_path = os.path.join(self._base_dir, "temp_dir/temp_dir1/temp_dir2")
|
dir_path = os.path.join(self._base_dir, "temp_dir/temp_dir1/temp_dir2")
|
||||||
file_io.recursive_create_dir(dir_path)
|
file_io.recursive_create_dir(dir_path)
|
||||||
|
Loading…
Reference in New Issue
Block a user