Adds support for string separators to AddJoinedString

Also fixes a bug where an initial empty string would not generate a separator.

PiperOrigin-RevId: 323462049
Change-Id: I99703ff6b641bcca1b1a72b234ab4766b05a5ae5
This commit is contained in:
A. Unique TensorFlower 2020-07-27 16:10:58 -07:00 committed by TensorFlower Gardener
parent 11ae78cc4d
commit a616fc34eb
4 changed files with 54 additions and 19 deletions

View File

@ -86,6 +86,8 @@
True, the function may use type annotations to optimize the tracing
performance.
* `tf.lite`:
* `DynamicBuffer::AddJoinedString()` will now add a separator if the first
string to be joined is empty.
* <ADD RELEASE NOTES HERE>
* `tf.random`:
* <ADD RELEASE NOTES HERE>

View File

@ -35,27 +35,32 @@ void DynamicBuffer::AddString(const StringRef& string) {
void DynamicBuffer::AddJoinedString(const std::vector<StringRef>& strings,
char separator) {
StringRef ref;
ref.str = &separator;
ref.len = 1;
AddJoinedString(strings, ref);
}
void DynamicBuffer::AddJoinedString(const std::vector<StringRef>& strings,
StringRef separator) {
// Resize the data buffer.
int total_len = strings.size() - 1;
int total_len = (strings.size() - 1) * separator.len;
for (StringRef ref : strings) {
total_len += ref.len;
}
data_.resize(data_.size() + total_len);
int current_idx = 0;
for (StringRef ref : strings) {
char* dst = data_.data() + offset_.back() + current_idx;
char* dst = data_.data() + offset_.back();
for (int i = 0; i < strings.size(); ++i) {
// Fill separator if not first string.
if (current_idx != 0) {
*dst = separator;
++dst;
++current_idx;
if (i != 0) {
memcpy(dst, separator.str, separator.len);
dst += separator.len;
}
// Fill content of the string.
memcpy(dst, ref.str, ref.len);
current_idx += ref.len;
memcpy(dst, strings[i].str, strings[i].len);
dst += strings[i].len;
}
offset_.push_back(offset_.back() + total_len);
}

View File

@ -69,6 +69,8 @@ class DynamicBuffer {
// Join a list of string with separator, and add as a single string to the
// buffer.
void AddJoinedString(const std::vector<StringRef>& strings, char separator);
void AddJoinedString(const std::vector<StringRef>& strings,
StringRef separator);
// Fill content into a buffer and returns the number of bytes stored.
// The function allocates space for the buffer but does NOT take ownership.

View File

@ -97,27 +97,53 @@ TEST(StringUtil, TestStringUtil) {
ASSERT_EQ(t2->bytes, 15);
}
TEST(StringUtil, TestAddJoinedString) {
TEST(StringUtil, TestAddJoinedStringCharSeparator) {
Interpreter interpreter;
interpreter.AddTensors(1);
TfLiteTensor* t0 = interpreter.tensor(0);
t0->type = kTfLiteString;
t0->allocation_type = kTfLiteDynamic;
char s0[] = "ABC";
char s1[] = "DEFG";
char s2[] = "";
char s3[] = "XYZ";
char s0[] = "";
char s1[] = "ABC";
char s2[] = "DEFG";
char s3[] = "";
char s4[] = "XYZ";
DynamicBuffer buf;
buf.AddJoinedString({{s0, 3}, {s1, 4}, {s2, 0}, {s3, 3}}, ' ');
buf.AddJoinedString({{s0, 0}, {s1, 3}, {s2, 4}, {s3, 0}, {s4, 3}}, ' ');
buf.WriteToTensorAsVector(t0);
ASSERT_EQ(GetStringCount(t0), 1);
StringRef str_ref;
str_ref = GetString(t0, 0);
ASSERT_EQ(string(str_ref.str, str_ref.len), "ABC DEFG XYZ");
ASSERT_EQ(t0->bytes, 25);
ASSERT_EQ(string(str_ref.str, str_ref.len), " ABC DEFG XYZ");
ASSERT_EQ(t0->bytes, 26);
}
TEST(StringUtil, TestAddJoinedStringStringRefSeparator) {
Interpreter interpreter;
interpreter.AddTensors(1);
TfLiteTensor* t0 = interpreter.tensor(0);
t0->type = kTfLiteString;
t0->allocation_type = kTfLiteDynamic;
char s[] = " - ";
char s0[] = "";
char s1[] = "ABC";
char s2[] = "DEFG";
char s3[] = "";
char s4[] = "XYZ";
DynamicBuffer buf;
buf.AddJoinedString({{s0, 0}, {s1, 3}, {s2, 4}, {s3, 0}, {s4, 3}}, {s, 3});
buf.WriteToTensorAsVector(t0);
ASSERT_EQ(GetStringCount(t0), 1);
StringRef str_ref;
str_ref = GetString(t0, 0);
ASSERT_EQ(string(str_ref.str, str_ref.len), " - ABC - DEFG - - XYZ");
ASSERT_EQ(t0->bytes, 34);
}
TEST(StringUtil, TestEmptyList) {