Support user-specified string input in tflite benchmark tools.

- Users can give it with flag "--input_layer_value_files". For example, if there are 2 input tensors, --input_layer_value_files=input1:input1_file_path,input2:input2_file. - The list of strings are separated with the delimiter '\0'. For example, if the tensor shape is (1x3xSTRING), the file format should be "first_string_value\0second_string_value\0third_string_value\0". PiperOrigin-RevId: 299272934 Change-Id: Iab5951c903fb4976e0dcf43dd09ba4695653dab3
2020-03-05 21:48:16 -08:00 · 2020-03-05 21:48:16 -08:00 · 70fab67be3
commit 70fab67be3
parent 8f50df8872
5 changed files with 124 additions and 47 deletions
--- a/tensorflow/lite/testdata/string_input_model.bin
+++ b/tensorflow/lite/testdata/string_input_model.bin
--- a/tensorflow/lite/tools/benchmark/BUILD
+++ b/tensorflow/lite/tools/benchmark/BUILD
@ -90,10 +90,12 @@ cc_test(
    args = [
        "--fp32_graph=$(location //tensorflow/lite:testdata/multi_add.bin)",
        "--int8_graph=$(location //tensorflow/lite:testdata/add_quantized_int8.bin)",
+        "--string_graph=$(location //tensorflow/lite:testdata/string_input_model.bin)",
    ],
    data = [
        "//tensorflow/lite:testdata/add_quantized_int8.bin",
        "//tensorflow/lite:testdata/multi_add.bin",
+        "//tensorflow/lite:testdata/string_input_model.bin",
    ],
    tags = [
        "tflite_not_portable_android",
@ -103,6 +105,7 @@ cc_test(
        ":benchmark_performance_options",
        ":benchmark_tflite_model_lib",
        "//tensorflow/lite:framework",
+        "//tensorflow/lite:string_util",
        "//tensorflow/lite/testing:util",
        "//tensorflow/lite/tools:command_line_flags",
        "@com_google_absl//absl/algorithm",
--- a/tensorflow/lite/tools/benchmark/benchmark_test.cc
+++ b/tensorflow/lite/tools/benchmark/benchmark_test.cc
@ -22,6 +22,7 @@ limitations under the License.
 #include "absl/algorithm/algorithm.h"
 #include "absl/strings/str_format.h"
 #include "tensorflow/lite/interpreter.h"
+#include "tensorflow/lite/string_util.h"
 #include "tensorflow/lite/testing/util.h"
 #include "tensorflow/lite/tools/benchmark/benchmark_performance_options.h"
 #include "tensorflow/lite/tools/benchmark/benchmark_tflite_model.h"
@ -30,13 +31,14 @@ limitations under the License.
 namespace {
 const std::string* g_fp32_model_path = nullptr;
 const std::string* g_int8_model_path = nullptr;
+const std::string* g_string_model_path = nullptr;
 }  // namespace

 namespace tflite {
 namespace benchmark {
 namespace {

-enum class ModelGraphType { FP32, INT8 };
+enum class ModelGraphType { FP32, INT8, STRING };

 BenchmarkParams CreateParams(int32_t num_runs, float min_secs, float max_secs,
                             ModelGraphType graph_type = ModelGraphType::FP32) {
@ -53,6 +55,9 @@ BenchmarkParams CreateParams(int32_t num_runs, float min_secs, float max_secs,
  if (graph_type == ModelGraphType::INT8) {
    params.AddParam("graph",
                    BenchmarkParam::Create<std::string>(*g_int8_model_path));
+  } else if (graph_type == ModelGraphType::STRING) {
+    params.AddParam("graph",
+                    BenchmarkParam::Create<std::string>(*g_string_model_path));
  } else {
    // by default, simply use the fp32 one.
    params.AddParam("graph",
@ -97,6 +102,9 @@ BenchmarkParams CreateFp32Params() {
 BenchmarkParams CreateInt8Params() {
  return CreateParams(2, 1.0f, 150.0f, ModelGraphType::INT8);
 }
+BenchmarkParams CreateStringParams() {
+  return CreateParams(2, 1.0f, 150.0f, ModelGraphType::STRING);
+}

 std::string CreateFilePath(const std::string& file_name) {
  return std::string(getenv("TEST_TMPDIR")) + file_name;
@ -126,11 +134,20 @@ void WriteInputLayerValueFile(const std::string& file_path,
 }

 void CheckInputTensorValue(const TfLiteTensor* input_tensor,
-                           char tensor_value) {
+                           char expected_value) {
  ASSERT_THAT(input_tensor, testing::NotNull());
  EXPECT_TRUE(std::all_of(
      input_tensor->data.raw, input_tensor->data.raw + input_tensor->bytes,
-      [tensor_value](char c) { return c == tensor_value; }));
+      [expected_value](char c) { return c == expected_value; }));
+}
+
+void CheckInputTensorValue(const TfLiteTensor* input_tensor,
+                           int tensor_dim_index,
+                           const std::string& expected_value) {
+  StringRef tensor_value = GetString(input_tensor, tensor_dim_index);
+  EXPECT_TRUE(absl::equal(tensor_value.str, tensor_value.str + tensor_value.len,
+                          expected_value.c_str(),
+                          expected_value.c_str() + expected_value.length()));
 }

 class TestBenchmark : public BenchmarkTfLiteModel {
@ -165,6 +182,13 @@ TEST(BenchmarkTest, DoesntCrashInt8Model) {
  benchmark.Run();
 }

+TEST(BenchmarkTest, DoesntCrashStringModel) {
+  ASSERT_THAT(g_int8_model_path, testing::NotNull());
+
+  TestBenchmark benchmark(CreateStringParams());
+  benchmark.Run();
+}
+
 TEST(BenchmarkTest, DoesntCrashMultiPerfOptions) {
  ASSERT_THAT(g_fp32_model_path, testing::NotNull());

@ -267,6 +291,38 @@ TEST(BenchmarkTest, DoesntCrashWithExplicitInputValueFilesInt8Model) {
  CheckInputTensorValue(benchmark.GetInputTensor(0), file_value);
 }

+TEST(BenchmarkTest, DoesntCrashWithExplicitInputValueFilesStringModel) {
+  ASSERT_THAT(g_string_model_path, testing::NotNull());
+  const std::string file_path = CreateFilePath("string_binary");
+  const std::string string_value_0 = "abcd";
+  const std::string string_value_1 = "12345";
+  const std::string string_value_2 = "a1b2c3d4e5";
+  std::ofstream file(file_path);
+  // Store the terminating null-character ('\0') at the end of the returned
+  // value by std::string::c_str().
+  file.write(string_value_0.c_str(), string_value_0.length() + 1);
+  file.write(string_value_1.c_str(), string_value_1.length() + 1);
+  file.write(string_value_2.c_str(), string_value_2.length() + 1);
+  file.close();
+
+  // Note: the following input-related params are *specific* to model
+  // 'g_string_model_path' which is specified as
+  // 'lite:testdata/string_input_model.bin for the test.
+  BenchmarkParams params = CreateStringParams();
+  params.Set<std::string>("input_layer", "a");
+  params.Set<std::string>("input_layer_shape", "1,3");
+  params.Set<std::string>("input_layer_value_files", "a:" + file_path);
+  TestBenchmark benchmark(std::move(params));
+  benchmark.Run();
+
+  auto input_tensor = benchmark.GetInputTensor(0);
+  ASSERT_THAT(input_tensor, testing::NotNull());
+  EXPECT_EQ(GetStringCount(input_tensor), 3);
+  CheckInputTensorValue(input_tensor, 0, string_value_0);
+  CheckInputTensorValue(input_tensor, 1, string_value_1);
+  CheckInputTensorValue(input_tensor, 2, string_value_2);
+}
+
 class MaxDurationWorksTestListener : public BenchmarkListener {
  void OnBenchmarkEnd(const BenchmarkResults& results) override {
    const int64_t num_actul_runs = results.inference_time_us().count();
@ -316,16 +372,19 @@ TEST(BenchmarkTest, ParametersArePopulatedWhenInputShapeIsNotSpecified) {
 }  // namespace tflite

 int main(int argc, char** argv) {
-  std::string fp32_model_path, int8_model_path;
+  std::string fp32_model_path, int8_model_path, string_model_path;
  std::vector<tflite::Flag> flags = {
      tflite::Flag::CreateFlag("fp32_graph", &fp32_model_path,
                               "Path to a fp32 model file."),
      tflite::Flag::CreateFlag("int8_graph", &int8_model_path,
                               "Path to a int8 model file."),
+      tflite::Flag::CreateFlag("string_graph", &string_model_path,
+                               "Path to a string model file."),
  };

  g_fp32_model_path = &fp32_model_path;
  g_int8_model_path = &int8_model_path;
+  g_string_model_path = &string_model_path;

  const bool parse_result =
      tflite::Flags::Parse(&argc, const_cast<const char**>(argv), flags);
--- a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc
+++ b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.cc
@ -101,13 +101,18 @@ std::vector<std::string> Split(const std::string& str, const char delim) {
  return results;
 }

-void FillRandomString(tflite::DynamicBuffer* buffer,
-                      const std::vector<int>& sizes,
-                      const std::function<std::string()>& random_func) {
+int GetNumElements(const TfLiteIntArray* dim_array) {
  int num_elements = 1;
-  for (int dim : sizes) {
-    num_elements *= dim;
+  for (size_t i = 0; i < dim_array->size; i++) {
+    num_elements *= dim_array->data[i];
  }
+  return num_elements;
+}
+
+void FillRandomString(tflite::DynamicBuffer* buffer,
+                      const TfLiteIntArray* dim_array,
+                      const std::function<std::string()>& random_func) {
+  int num_elements = GetNumElements(dim_array);
  for (int i = 0; i < num_elements; ++i) {
    auto str = random_func();
    buffer->AddString(str.data(), str.length());
@ -233,15 +238,6 @@ TfLiteStatus PopulateInputLayerInfo(
  return kTfLiteOk;
 }

-std::vector<int> TfLiteIntArrayToVector(const TfLiteIntArray* int_array) {
-  std::vector<int> values;
-  values.reserve(int_array->size);
-  for (size_t i = 0; i < int_array->size; i++) {
-    values.push_back(int_array->data[i]);
-  }
-  return values;
-}
-
 std::shared_ptr<profiling::ProfileSummaryFormatter>
 CreateProfileSummaryFormatter(bool format_as_csv) {
  return format_as_csv
@ -317,7 +313,9 @@ std::vector<Flag> BenchmarkTfLiteModel::GetFlags() {
          "of input layer name and value file path separated by ':', e.g. "
          "input1:file_path1,input2:file_path2. If the input_name appears both "
          "in input_layer_value_range and input_layer_value_files, "
-          "input_layer_value_range of the input_name will be ignored."),
+          "input_layer_value_range of the input_name will be ignored. The file "
+          "format is binary and it should be array format or null separated "
+          "strings format."),
      CreateFlag<bool>("use_legacy_nnapi", &params_, "use legacy nnapi api"),
      CreateFlag<bool>("allow_fp16", &params_, "allow fp16"),
      CreateFlag<bool>("require_full_delegation", &params_,
@ -416,25 +414,41 @@ int64_t BenchmarkTfLiteModel::MayGetModelFileSize() {

 BenchmarkTfLiteModel::InputTensorData BenchmarkTfLiteModel::LoadInputTensorData(
    const TfLiteTensor& t, const std::string& input_file_path) {
+  std::ifstream value_file(input_file_path, std::ios::binary);
+  if (!value_file.good()) {
+    TFLITE_LOG(FATAL) << "Failed to read the input_layer_value_file:"
+                      << input_file_path;
+  }
  InputTensorData t_data;
  if (t.type == kTfLiteString) {
-    // TODO(b/149184079): Will update string type logic.
-  } else {
-    t_data.bytes = t.bytes;
-    std::ifstream value_file(input_file_path, std::ios::binary | std::ios::ate);
-    if (!value_file.good()) {
-      TFLITE_LOG(FATAL) << "Failed to read the input_layer_value_file:"
-                        << input_file_path;
+    t_data.data = VoidUniquePtr(
+        static_cast<void*>(new tflite::DynamicBuffer()),
+        [](void* ptr) { delete static_cast<DynamicBuffer*>(ptr); });
+    std::string line;
+    size_t num_line = 0;
+    // Read the line with the delimiter '\0'.
+    while (std::getline(value_file, line, '\0')) {
+      num_line++;
+      static_cast<DynamicBuffer*>(t_data.data.get())
+          ->AddString(line.data(), line.length());
    }
+    int num_elements = GetNumElements(t.dims);
+    if (num_line != num_elements) {
+      TFLITE_LOG(FATAL) << "The number of string in the input_layer_value_file("
+                        << input_file_path << ") is " << num_line
+                        << ". It should be " << num_elements << ".";
+    }
+  } else {
+    value_file.seekg(0, std::ios_base::end);
    if (value_file.tellg() != t.bytes) {
      TFLITE_LOG(FATAL) << "The size of " << input_file_path << " is "
                        << value_file.tellg() << " bytes. It should be "
                        << t.bytes << " bytes.";
    }
-    // Now initialize the type-erased unique_ptr (with custom deleter).
-    t_data.data = std::unique_ptr<void, void (*)(void*)>(
-        static_cast<void*>(new char[t.bytes]),
-        [](void* ptr) { delete[] static_cast<char*>(ptr); });
+    t_data.bytes = t.bytes;
+    t_data.data =
+        VoidUniquePtr(static_cast<void*>(new char[t.bytes]),
+                      [](void* ptr) { delete[] static_cast<char*>(ptr); });
    value_file.clear();
    value_file.seekg(0, std::ios_base::beg);
    value_file.read(static_cast<char*>(t_data.data.get()), t.bytes);
@ -453,11 +467,7 @@ BenchmarkTfLiteModel::CreateRandomTensorData(const TfLiteTensor& t,
    low_range = layer_info->low;
    high_range = layer_info->high;
  }
-  std::vector<int> sizes = TfLiteIntArrayToVector(t.dims);
-  int num_elements = 1;
-  for (int i = 0; i < sizes.size(); ++i) {
-    num_elements *= sizes[i];
-  }
+  int num_elements = GetNumElements(t.dims);
  switch (t.type) {
    case kTfLiteFloat32: {
      return CreateInputTensorData<float>(
@ -564,12 +574,17 @@ TfLiteStatus BenchmarkTfLiteModel::ResetInputsAndOutputs() {
    int i = interpreter_inputs[j];
    TfLiteTensor* t = interpreter_->tensor(i);
    if (t->type == kTfLiteString) {
-      tflite::DynamicBuffer buffer;
-      std::vector<int> sizes = TfLiteIntArrayToVector(t->dims);
-      FillRandomString(&buffer, sizes, []() {
-        return "we're have some friends over saturday to hang out in the yard";
-      });
-      buffer.WriteToTensor(t, /*new_shape=*/nullptr);
+      if (inputs_data_[j].data) {
+        static_cast<DynamicBuffer*>(inputs_data_[j].data.get())
+            ->WriteToTensor(t, /*new_shape=*/nullptr);
+      } else {
+        tflite::DynamicBuffer buffer;
+        FillRandomString(&buffer, t->dims, []() {
+          return "we're have some friends over saturday to hang out in the "
+                 "yard";
+        });
+        buffer.WriteToTensor(t, /*new_shape=*/nullptr);
+      }
    } else {
      std::memcpy(t->data.raw, inputs_data_[j].data.get(),
                  inputs_data_[j].bytes);
--- a/tensorflow/lite/tools/benchmark/benchmark_tflite_model.h
+++ b/tensorflow/lite/tools/benchmark/benchmark_tflite_model.h
@ -89,10 +89,13 @@ class BenchmarkTfLiteModel : public BenchmarkModel {
  std::unique_ptr<tflite::Interpreter> interpreter_;

 private:
+  // Implement type erasure with unique_ptr with custom deleter.
+  using VoidUniquePtr = std::unique_ptr<void, void (*)(void*)>;
+
  struct InputTensorData {
    InputTensorData() : data(nullptr, nullptr) {}

-    std::unique_ptr<void, void (*)(void*)> data;
+    VoidUniquePtr data;
    size_t bytes;
  };

@ -105,11 +108,8 @@ class BenchmarkTfLiteModel : public BenchmarkModel {
    std::generate_n(raw, num_elements, [&]() {
      return static_cast<T>(distribution(random_engine_));
    });
-    // Now initialize the type-erased unique_ptr (with custom deleter) from
-    // 'raw'.
-    tmp.data = std::unique_ptr<void, void (*)(void*)>(
-        static_cast<void*>(raw),
-        [](void* ptr) { delete[] static_cast<T*>(ptr); });
+    tmp.data = VoidUniquePtr(static_cast<void*>(raw),
+                             [](void* ptr) { delete[] static_cast<T*>(ptr); });
    return tmp;
  }