Marked functions in str_util.h with absl equivalents as deprecated. Changed their implementations to call their absl equivalents.

PiperOrigin-RevId: 242163502
2019-04-05 11:42:54 -07:00 · 2019-04-05 11:42:54 -07:00 · 878c50a904
commit 878c50a904
parent eb0b6872f1
3 changed files with 66 additions and 290 deletions
--- a/tensorflow/core/lib/strings/proto_text_util.h
+++ b/tensorflow/core/lib/strings/proto_text_util.h
@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_LIB_STRINGS_PROTO_TEXT_UTIL_H_
 #define TENSORFLOW_CORE_LIB_STRINGS_PROTO_TEXT_UTIL_H_

+#include "absl/strings/str_cat.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/scanner.h"
 #include "tensorflow/core/lib/strings/str_util.h"
@ -101,8 +102,8 @@ class ProtoTextOutput {

 private:
  void AppendFieldAndValue(const char field_name[], StringPiece value_text) {
-    StrAppend(output_, level_empty_ ? "" : field_separator_, indent_,
-              field_name, kColonSeparator, value_text);
+    absl::StrAppend(output_, level_empty_ ? "" : field_separator_, indent_,
+                    field_name, kColonSeparator, value_text);
    level_empty_ = false;
  }

--- a/tensorflow/core/lib/strings/str_util.cc
+++ b/tensorflow/core/lib/strings/str_util.cc
@ -19,6 +19,10 @@ limitations under the License.
 #include <algorithm>
 #include <cstring>
 #include <vector>
+#include "absl/strings/ascii.h"
+#include "absl/strings/escaping.h"
+#include "absl/strings/match.h"
+#include "absl/strings/strip.h"
 #include "tensorflow/core/lib/strings/numbers.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/logging.h"
@ -26,196 +30,10 @@ limitations under the License.
 namespace tensorflow {
 namespace str_util {

-static char hex_char[] = "0123456789abcdef";
-
-string CEscape(StringPiece src) {
-  string dest;
-
-  for (unsigned char c : src) {
-    switch (c) {
-      case '\n':
-        dest.append("\\n");
-        break;
-      case '\r':
-        dest.append("\\r");
-        break;
-      case '\t':
-        dest.append("\\t");
-        break;
-      case '\"':
-        dest.append("\\\"");
-        break;
-      case '\'':
-        dest.append("\\'");
-        break;
-      case '\\':
-        dest.append("\\\\");
-        break;
-      default:
-        // Note that if we emit \xNN and the src character after that is a hex
-        // digit then that digit must be escaped too to prevent it being
-        // interpreted as part of the character code by C.
-        if ((c >= 0x80) || !isprint(c)) {
-          dest.append("\\");
-          dest.push_back(hex_char[c / 64]);
-          dest.push_back(hex_char[(c % 64) / 8]);
-          dest.push_back(hex_char[c % 8]);
-        } else {
-          dest.push_back(c);
-          break;
-        }
-    }
-  }
-
-  return dest;
-}
+string CEscape(StringPiece src) { return absl::CEscape(src); }

 namespace {  // Private helpers for CUnescape().

-inline bool is_octal_digit(unsigned char c) { return c >= '0' && c <= '7'; }
-
-inline bool ascii_isxdigit(unsigned char c) {
-  return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') ||
-         (c >= 'A' && c <= 'F');
-}
-
-inline int hex_digit_to_int(char c) {
-  int x = static_cast<unsigned char>(c);
-  if (x > '9') {
-    x += 9;
-  }
-  return x & 0xf;
-}
-
-bool CUnescapeInternal(StringPiece source, string* dest,
-                       string::size_type* dest_len, string* error) {
-  const char* p = source.data();
-  const char* end = source.end();
-  const char* last_byte = end - 1;
-
-  // We are going to write the result to dest with its iterator. If our string
-  // implementation uses copy-on-write, this will trigger a copy-on-write of
-  // dest's buffer; that is, dest will be assigned a new buffer.
-  //
-  // Note that the following way is NOT a legal way to modify a string's
-  // content:
-  //
-  //  char* d = const_cast<char*>(dest->data());
-  //
-  // This won't trigger copy-on-write of the string, and so is dangerous when
-  // the buffer is shared.
-  auto d = dest->begin();
-
-  // Small optimization for case where source = dest and there's no escaping
-  if (source.data() == dest->data()) {
-    while (p < end && *p != '\\') {
-      p++;
-      d++;
-    }
-  }
-
-  while (p < end) {
-    if (*p != '\\') {
-      *d++ = *p++;
-    } else {
-      if (++p > last_byte) {  // skip past the '\\'
-        if (error) *error = "String cannot end with \\";
-        return false;
-      }
-      switch (*p) {
-        case 'a':
-          *d++ = '\a';
-          break;
-        case 'b':
-          *d++ = '\b';
-          break;
-        case 'f':
-          *d++ = '\f';
-          break;
-        case 'n':
-          *d++ = '\n';
-          break;
-        case 'r':
-          *d++ = '\r';
-          break;
-        case 't':
-          *d++ = '\t';
-          break;
-        case 'v':
-          *d++ = '\v';
-          break;
-        case '\\':
-          *d++ = '\\';
-          break;
-        case '?':
-          *d++ = '\?';
-          break;  // \?  Who knew?
-        case '\'':
-          *d++ = '\'';
-          break;
-        case '"':
-          *d++ = '\"';
-          break;
-        case '0':
-        case '1':
-        case '2':
-        case '3':  // octal digit: 1 to 3 digits
-        case '4':
-        case '5':
-        case '6':
-        case '7': {
-          const char* octal_start = p;
-          unsigned int ch = *p - '0';
-          if (p < last_byte && is_octal_digit(p[1])) ch = ch * 8 + *++p - '0';
-          if (p < last_byte && is_octal_digit(p[1]))
-            ch = ch * 8 + *++p - '0';  // now points at last digit
-          if (ch > 0xff) {
-            if (error) {
-              *error = "Value of \\" +
-                       string(octal_start, p + 1 - octal_start) +
-                       " exceeds 0xff";
-            }
-            return false;
-          }
-          *d++ = ch;
-          break;
-        }
-        case 'x':
-        case 'X': {
-          if (p >= last_byte) {
-            if (error) *error = "String cannot end with \\x";
-            return false;
-          } else if (!ascii_isxdigit(p[1])) {
-            if (error) *error = "\\x cannot be followed by a non-hex digit";
-            return false;
-          }
-          unsigned int ch = 0;
-          const char* hex_start = p;
-          while (p < last_byte && ascii_isxdigit(p[1]))
-            // Arbitrarily many hex digits
-            ch = (ch << 4) + hex_digit_to_int(*++p);
-          if (ch > 0xFF) {
-            if (error) {
-              *error = "Value of \\" + string(hex_start, p + 1 - hex_start) +
-                       " exceeds 0xff";
-            }
-            return false;
-          }
-          *d++ = ch;
-          break;
-        }
-        default: {
-          if (error) *error = string("Unknown escape sequence: \\") + *p;
-          return false;
-        }
-      }
-      p++;  // read past letter we escaped
-    }
-  }
-  *dest_len = d - dest->begin();
-  return true;
-}
-
 template <typename T>
 bool SplitAndParseAsInts(StringPiece text, char delim,
                         std::function<bool(StringPiece, T*)> converter,
@ -233,39 +51,18 @@ bool SplitAndParseAsInts(StringPiece text, char delim,
 }  // namespace

 bool CUnescape(StringPiece source, string* dest, string* error) {
-  dest->resize(source.size());
-  string::size_type dest_size;
-  if (!CUnescapeInternal(source, dest, &dest_size, error)) {
-    return false;
-  }
-  dest->erase(dest_size);
-  return true;
+  return absl::CUnescape(source, dest, error);
 }

 void StripTrailingWhitespace(string* s) {
-  string::size_type i;
-  for (i = s->size(); i > 0 && isspace((*s)[i - 1]); --i) {
-  }
-  s->resize(i);
+  absl::StripTrailingAsciiWhitespace(s);
 }

 // Return lower-cased version of s.
-string Lowercase(StringPiece s) {
-  string result(s.data(), s.size());
-  for (char& c : result) {
-    c = tolower(c);
-  }
-  return result;
-}
+string Lowercase(StringPiece s) { return absl::AsciiStrToLower(s); }

 // Return upper-cased version of s.
-string Uppercase(StringPiece s) {
-  string result(s.data(), s.size());
-  for (char& c : result) {
-    c = toupper(c);
-  }
-  return result;
-}
+string Uppercase(StringPiece s) { return absl::AsciiStrToUpper(s); }

 string ArgDefCase(StringPiece s) {
  const size_t n = s.size();
@ -349,46 +146,32 @@ string StringReplace(StringPiece s, StringPiece oldsub, StringPiece newsub,
 }

 size_t RemoveLeadingWhitespace(StringPiece* text) {
-  size_t count = 0;
-  const char* ptr = text->data();
-  while (count < text->size() && isspace(*ptr)) {
-    count++;
-    ptr++;
-  }
-  text->remove_prefix(count);
+  absl::string_view new_text = absl::StripLeadingAsciiWhitespace(*text);
+  size_t count = text->size() - new_text.size();
+  *text = new_text;
  return count;
 }

 size_t RemoveTrailingWhitespace(StringPiece* text) {
-  size_t count = 0;
-  const char* ptr = text->data() + text->size() - 1;
-  while (count < text->size() && isspace(*ptr)) {
-    ++count;
-    --ptr;
-  }
-  text->remove_suffix(count);
+  absl::string_view new_text = absl::StripTrailingAsciiWhitespace(*text);
+  size_t count = text->size() - new_text.size();
+  *text = new_text;
  return count;
 }

 size_t RemoveWhitespaceContext(StringPiece* text) {
-  // use RemoveLeadingWhitespace() and RemoveTrailingWhitespace() to do the job
-  return (RemoveLeadingWhitespace(text) + RemoveTrailingWhitespace(text));
+  absl::string_view new_text = absl::StripAsciiWhitespace(*text);
+  size_t count = text->size() - new_text.size();
+  *text = new_text;
+  return count;
 }

 bool ConsumePrefix(StringPiece* s, StringPiece expected) {
-  if (StartsWith(*s, expected)) {
-    s->remove_prefix(expected.size());
-    return true;
-  }
-  return false;
+  return absl::ConsumePrefix(s, expected);
 }

 bool ConsumeSuffix(StringPiece* s, StringPiece expected) {
-  if (EndsWith(*s, expected)) {
-    s->remove_suffix(expected.size());
-    return true;
-  }
-  return false;
+  return absl::ConsumeSuffix(s, expected);
 }

 bool ConsumeLeadingDigits(StringPiece* s, uint64* val) {
@ -447,11 +230,12 @@ bool SplitAndParseAsInts(StringPiece text, char delim,

 bool SplitAndParseAsFloats(StringPiece text, char delim,
                           std::vector<float>* result) {
-  return SplitAndParseAsInts<float>(text, delim,
-                                    [](StringPiece str, float* value) {
-                                      return strings::safe_strtof(str, value);
-                                    },
-                                    result);
+  return SplitAndParseAsInts<float>(
+      text, delim,
+      [](StringPiece str, float* value) {
+        return strings::safe_strtof(str, value);
+      },
+      result);
 }

 size_t Strnlen(const char* str, const size_t string_max_len) {
@ -463,20 +247,15 @@ size_t Strnlen(const char* str, const size_t string_max_len) {
 }

 bool StrContains(StringPiece haystack, StringPiece needle) {
-  return std::search(haystack.begin(), haystack.end(), needle.begin(),
-                     needle.end()) != haystack.end();
+  return absl::StrContains(haystack, needle);
 }

 bool StartsWith(StringPiece text, StringPiece prefix) {
-  return prefix.empty() ||
-         (text.size() >= prefix.size() &&
-          memcmp(text.data(), prefix.data(), prefix.size()) == 0);
+  return absl::StartsWith(text, prefix);
 }

 bool EndsWith(StringPiece text, StringPiece suffix) {
-  return suffix.empty() || (text.size() >= suffix.size() &&
-                            memcmp(text.data() + (text.size() - suffix.size()),
-                                   suffix.data(), suffix.size()) == 0);
+  return absl::EndsWith(text, suffix);
 }

 }  // namespace str_util
--- a/tensorflow/core/lib/strings/str_util.h
+++ b/tensorflow/core/lib/strings/str_util.h
@ -19,6 +19,10 @@ limitations under the License.
 #include <functional>
 #include <string>
 #include <vector>
+#include "absl/base/macros.h"
+#include "absl/strings/ascii.h"
+#include "absl/strings/str_join.h"
+#include "absl/strings/str_split.h"
 #include "tensorflow/core/lib/core/stringpiece.h"
 #include "tensorflow/core/lib/strings/strcat.h"
 #include "tensorflow/core/platform/types.h"
@ -29,6 +33,7 @@ namespace str_util {

 // Returns a version of 'src' where unprintable characters have been
 // escaped using C-style escape sequences.
+ABSL_DEPRECATED("Use absl::CEscape instead.")
 string CEscape(StringPiece src);

 // Copies "source" to "dest", rewriting C-style escape sequences --
@ -38,21 +43,26 @@ string CEscape(StringPiece src);
 // 'error'. To disable error reporting, set 'error' to NULL.
 //
 // NOTE: Does not support \u or \U!
+ABSL_DEPRECATED("Use absl::CUnescape instead.")
 bool CUnescape(StringPiece source, string* dest, string* error);

 // Removes any trailing whitespace from "*s".
+ABSL_DEPRECATED("Use absl::StripTrailingAsciiWhitespace instead.")
 void StripTrailingWhitespace(string* s);

 // Removes leading ascii_isspace() characters.
 // Returns number of characters removed.
+ABSL_DEPRECATED("Use absl::StripLeadingAsciiWhitespace instead.")
 size_t RemoveLeadingWhitespace(StringPiece* text);

 // Removes trailing ascii_isspace() characters.
 // Returns number of characters removed.
+ABSL_DEPRECATED("Use absl::StripTrailingAsciiWhitespace instead.")
 size_t RemoveTrailingWhitespace(StringPiece* text);

 // Removes leading and trailing ascii_isspace() chars.
 // Returns number of chars removed.
+ABSL_DEPRECATED("Use absl::StripAsciiWhitespace instead.")
 size_t RemoveWhitespaceContext(StringPiece* text);

 // Consume a leading positive integer value.  If any digits were
@ -68,16 +78,20 @@ bool ConsumeNonWhitespace(StringPiece* s, StringPiece* val);

 // If "*s" starts with "expected", consume it and return true.
 // Otherwise, return false.
+ABSL_DEPRECATED("Use absl::ConsumeSuffix instead.")
 bool ConsumePrefix(StringPiece* s, StringPiece expected);

 // If "*s" ends with "expected", remove it and return true.
 // Otherwise, return false.
+ABSL_DEPRECATED("Use absl::ConsumePrefix instead.")
 bool ConsumeSuffix(StringPiece* s, StringPiece expected);

 // Return lower-cased version of s.
+ABSL_DEPRECATED("Use absl::AsciiStrToLower instead.")
 string Lowercase(StringPiece s);

 // Return upper-cased version of s.
+ABSL_DEPRECATED("Use absl::AsciiStrToUpper instead.")
 string Uppercase(StringPiece s);

 // Converts "^2ILoveYou!" to "i_love_you_". More specifically:
@ -102,12 +116,14 @@ string StringReplace(StringPiece s, StringPiece oldsub, StringPiece newsub,

 // Join functionality
 template <typename T>
+ABSL_DEPRECATED("Use absl::StrJoin instead.")
 string Join(const T& s, const char* sep);

 // A variant of Join where for each element of "s", f(&dest_string, elem)
 // is invoked (f is often constructed with a lambda of the form:
 //   [](string* result, ElemType elem)
 template <typename T, typename Formatter>
+ABSL_DEPRECATED("Use absl::StrJoin instead.")
 string Join(const T& s, const char* sep, Formatter f);

 struct AllowEmpty {
@ -118,16 +134,17 @@ struct SkipEmpty {
 };
 struct SkipWhitespace {
  bool operator()(StringPiece sp) const {
-    RemoveTrailingWhitespace(&sp);
-    return !sp.empty();
+    return !absl::StripTrailingAsciiWhitespace(sp).empty();
  }
 };

 // Split strings using any of the supplied delimiters. For example:
 // Split("a,b.c,d", ".,") would return {"a", "b", "c", "d"}.
+ABSL_DEPRECATED("Use absl::StrSplit instead.")
 std::vector<string> Split(StringPiece text, StringPiece delims);

 template <typename Predicate>
+ABSL_DEPRECATED("Use absl::StrSplit instead.")
 std::vector<string> Split(StringPiece text, StringPiece delims, Predicate p);

 // Split "text" at "delim" characters, and parse each component as
@ -143,29 +160,26 @@ bool SplitAndParseAsFloats(StringPiece text, char delim,
 // StartsWith()
 //
 // Returns whether a given string `text` begins with `prefix`.
+ABSL_DEPRECATED("Use absl::StartsWith instead.")
 bool StartsWith(StringPiece text, StringPiece prefix);

 // EndsWith()
 //
 // Returns whether a given string `text` ends with `suffix`.
+ABSL_DEPRECATED("Use absl::EndsWith instead.")
 bool EndsWith(StringPiece text, StringPiece suffix);

 // StrContains()
 //
 // Returns whether a given string `haystack` contains the substring `needle`.
+ABSL_DEPRECATED("Use absl::StrContains instead.")
 bool StrContains(StringPiece haystack, StringPiece needle);

 // ------------------------------------------------------------------
 // Implementation details below
 template <typename T>
 string Join(const T& s, const char* sep) {
-  string result;
-  bool first = true;
-  for (const auto& x : s) {
-    tensorflow::strings::StrAppend(&result, (first ? "" : sep), x);
-    first = false;
-  }
-  return result;
+  return absl::StrJoin(s, sep);
 }

 template <typename T>
@ -180,47 +194,29 @@ class Formatter {

 template <typename T, typename Formatter>
 string Join(const T& s, const char* sep, Formatter f) {
-  string result;
-  bool first = true;
-  for (const auto& x : s) {
-    if (!first) {
-      result.append(sep);
-    }
-    f(&result, x);
-    first = false;
-  }
-  return result;
+  return absl::StrJoin(s, sep, f);
 }

 inline std::vector<string> Split(StringPiece text, StringPiece delims) {
-  return Split(text, delims, AllowEmpty());
+  return text.empty() ? std::vector<string>()
+                      : absl::StrSplit(text, absl::ByAnyChar(delims));
 }

 template <typename Predicate>
 std::vector<string> Split(StringPiece text, StringPiece delims, Predicate p) {
-  std::vector<string> result;
-  size_t token_start = 0;
-  if (!text.empty()) {
-    for (size_t i = 0; i < text.size() + 1; i++) {
-      if ((i == text.size()) || (delims.find(text[i]) != StringPiece::npos)) {
-        StringPiece token(text.data() + token_start, i - token_start);
-        if (p(token)) {
-          result.emplace_back(token);
-        }
-        token_start = i + 1;
-      }
-    }
-  }
-  return result;
+  return text.empty() ? std::vector<string>()
+                      : absl::StrSplit(text, absl::ByAnyChar(delims), p);
 }

+ABSL_DEPRECATED("Use absl::StrSplit instead.")
 inline std::vector<string> Split(StringPiece text, char delim) {
-  return Split(text, StringPiece(&delim, 1));
+  return text.empty() ? std::vector<string>() : absl::StrSplit(text, delim);
 }

 template <typename Predicate>
-std::vector<string> Split(StringPiece text, char delims, Predicate p) {
-  return Split(text, StringPiece(&delims, 1), p);
+ABSL_DEPRECATED("Use absl::StrSplit instead.")
+std::vector<string> Split(StringPiece text, char delim, Predicate p) {
+  return text.empty() ? std::vector<string>() : absl::StrSplit(text, delim, p);
 }

 // Returns the length of the given null-terminated byte string 'str'.