From a616fc34ebdb0bd2438f9cb8854611f9f11322e0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 27 Jul 2020 16:10:58 -0700 Subject: [PATCH] Adds support for string separators to AddJoinedString Also fixes a bug where an initial empty string would not generate a separator. PiperOrigin-RevId: 323462049 Change-Id: I99703ff6b641bcca1b1a72b234ab4766b05a5ae5 --- RELEASE.md | 2 ++ tensorflow/lite/string_util.cc | 27 +++++++++++-------- tensorflow/lite/string_util.h | 2 ++ tensorflow/lite/string_util_test.cc | 42 +++++++++++++++++++++++------ 4 files changed, 54 insertions(+), 19 deletions(-) diff --git a/RELEASE.md b/RELEASE.md index 6c7562a80df..7182846a805 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -86,6 +86,8 @@ True, the function may use type annotations to optimize the tracing performance. * `tf.lite`: + * `DynamicBuffer::AddJoinedString()` will now add a separator if the first + string to be joined is empty. * * `tf.random`: * diff --git a/tensorflow/lite/string_util.cc b/tensorflow/lite/string_util.cc index 44719858f2a..799a850a0d4 100644 --- a/tensorflow/lite/string_util.cc +++ b/tensorflow/lite/string_util.cc @@ -35,27 +35,32 @@ void DynamicBuffer::AddString(const StringRef& string) { void DynamicBuffer::AddJoinedString(const std::vector& strings, char separator) { + StringRef ref; + ref.str = &separator; + ref.len = 1; + AddJoinedString(strings, ref); +} + +void DynamicBuffer::AddJoinedString(const std::vector& strings, + StringRef separator) { // Resize the data buffer. - int total_len = strings.size() - 1; + int total_len = (strings.size() - 1) * separator.len; for (StringRef ref : strings) { total_len += ref.len; } data_.resize(data_.size() + total_len); - int current_idx = 0; - for (StringRef ref : strings) { - char* dst = data_.data() + offset_.back() + current_idx; - + char* dst = data_.data() + offset_.back(); + for (int i = 0; i < strings.size(); ++i) { // Fill separator if not first string. - if (current_idx != 0) { - *dst = separator; - ++dst; - ++current_idx; + if (i != 0) { + memcpy(dst, separator.str, separator.len); + dst += separator.len; } // Fill content of the string. - memcpy(dst, ref.str, ref.len); - current_idx += ref.len; + memcpy(dst, strings[i].str, strings[i].len); + dst += strings[i].len; } offset_.push_back(offset_.back() + total_len); } diff --git a/tensorflow/lite/string_util.h b/tensorflow/lite/string_util.h index 879aa76b83b..2086f9badbf 100644 --- a/tensorflow/lite/string_util.h +++ b/tensorflow/lite/string_util.h @@ -69,6 +69,8 @@ class DynamicBuffer { // Join a list of string with separator, and add as a single string to the // buffer. void AddJoinedString(const std::vector& strings, char separator); + void AddJoinedString(const std::vector& strings, + StringRef separator); // Fill content into a buffer and returns the number of bytes stored. // The function allocates space for the buffer but does NOT take ownership. diff --git a/tensorflow/lite/string_util_test.cc b/tensorflow/lite/string_util_test.cc index 28d93840c56..d5c4909fcad 100644 --- a/tensorflow/lite/string_util_test.cc +++ b/tensorflow/lite/string_util_test.cc @@ -97,27 +97,53 @@ TEST(StringUtil, TestStringUtil) { ASSERT_EQ(t2->bytes, 15); } -TEST(StringUtil, TestAddJoinedString) { +TEST(StringUtil, TestAddJoinedStringCharSeparator) { Interpreter interpreter; interpreter.AddTensors(1); TfLiteTensor* t0 = interpreter.tensor(0); t0->type = kTfLiteString; t0->allocation_type = kTfLiteDynamic; - char s0[] = "ABC"; - char s1[] = "DEFG"; - char s2[] = ""; - char s3[] = "XYZ"; + char s0[] = ""; + char s1[] = "ABC"; + char s2[] = "DEFG"; + char s3[] = ""; + char s4[] = "XYZ"; DynamicBuffer buf; - buf.AddJoinedString({{s0, 3}, {s1, 4}, {s2, 0}, {s3, 3}}, ' '); + buf.AddJoinedString({{s0, 0}, {s1, 3}, {s2, 4}, {s3, 0}, {s4, 3}}, ' '); buf.WriteToTensorAsVector(t0); ASSERT_EQ(GetStringCount(t0), 1); StringRef str_ref; str_ref = GetString(t0, 0); - ASSERT_EQ(string(str_ref.str, str_ref.len), "ABC DEFG XYZ"); - ASSERT_EQ(t0->bytes, 25); + ASSERT_EQ(string(str_ref.str, str_ref.len), " ABC DEFG XYZ"); + ASSERT_EQ(t0->bytes, 26); +} + +TEST(StringUtil, TestAddJoinedStringStringRefSeparator) { + Interpreter interpreter; + interpreter.AddTensors(1); + TfLiteTensor* t0 = interpreter.tensor(0); + t0->type = kTfLiteString; + t0->allocation_type = kTfLiteDynamic; + + char s[] = " - "; + char s0[] = ""; + char s1[] = "ABC"; + char s2[] = "DEFG"; + char s3[] = ""; + char s4[] = "XYZ"; + + DynamicBuffer buf; + buf.AddJoinedString({{s0, 0}, {s1, 3}, {s2, 4}, {s3, 0}, {s4, 3}}, {s, 3}); + buf.WriteToTensorAsVector(t0); + + ASSERT_EQ(GetStringCount(t0), 1); + StringRef str_ref; + str_ref = GetString(t0, 0); + ASSERT_EQ(string(str_ref.str, str_ref.len), " - ABC - DEFG - - XYZ"); + ASSERT_EQ(t0->bytes, 34); } TEST(StringUtil, TestEmptyList) {