From 06d25a7e62465fd2e663b1b9e79280d972475ac8 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Thu, 6 Jul 2017 11:24:47 -0700
Subject: [PATCH] Add a HVX graph execution test for quantized inception v3
 model with quantized input

PiperOrigin-RevId: 161105730
---
 .../hexagon/hexagon_graph_execution_test.cc   | 88 ++++++++++++++++---
 1 file changed, 75 insertions(+), 13 deletions(-)
diff --git a/tensorflow/core/kernels/hexagon/hexagon_graph_execution_test.cc b/tensorflow/core/kernels/hexagon/hexagon_graph_execution_test.cc
index 130109b813f..9dcdfd6aa45 100644
--- a/tensorflow/core/kernels/hexagon/hexagon_graph_execution_test.cc
+++ b/tensorflow/core/kernels/hexagon/hexagon_graph_execution_test.cc
@@ -24,6 +24,9 @@ https://storage.googleapis.com/download.tensorflow.org/models/imagenet_comp_grap
 adb push /tmp/imagenet_comp_graph_label_strings.txt /data/local/tmp
 */
 
+// define EIGEN_USE_THREADS to include quantization_utils.h
+#define EIGEN_USE_THREADS
+
 #include <memory>
 
 #include "tensorflow/core/framework/tensor_shape.pb.h"
@@ -34,6 +37,7 @@ adb push /tmp/imagenet_comp_graph_label_strings.txt /data/local/tmp
 #include "tensorflow/core/kernels/hexagon/hexagon_ops_definitions.h"
 #include "tensorflow/core/kernels/hexagon/i_graph_transfer_ops_definitions.h"
 #include "tensorflow/core/kernels/i_remote_fused_graph_executor.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/lib/core/casts.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
@@ -52,6 +56,10 @@ using ByteArray = HexagonControlWrapper::ByteArray;
 constexpr const char* const IMAGE_FILENAME = "/data/local/tmp/img_299x299.bmp";
 constexpr const char* const MODEL_FILENAME =
     "/data/local/tmp/tensorflow_inception_v3_stripped_optimized_quantized.pb";
+constexpr const char* const MODEL_WITH_QUANTIZED_INPUT_FILENAME =
+    "/data/local/tmp/"
+    "tensorflow_inception_v3_stripped_optimized_quantized_with_quantized_input."
+    "pb";
 constexpr const char* const FUSED_MODEL_FILENAME =
     "/data/local/tmp/"
     "tensorflow_inception_v3_stripped_optimized_quantized_fused_hexagon.pb";
@@ -64,7 +72,7 @@ const int WIDTH = 299;
 const int HEIGHT = 299;
 const int DEPTH = 3;
 const int EXPECTED_FIRST_RESULT_ID = 59;
-const int EXECUTION_REPEAT_COUNT = 3;
+const int EXECUTION_REPEAT_COUNT = 10;
 
 static void CheckHexagonControllerVersion() {
   HexagonControlWrapper hexagon_control_wrapper;
@@ -165,8 +173,16 @@ static void LoadImage(std::vector<float>* img_floats_ptr) {
   }
 }
 
+static void QuantizeImage(const std::vector<float>& float_vec,
+                          std::vector<quint8>* quint8_vec) {
+  quint8_vec->resize(float_vec.size());
+  for (int i = 0; i < float_vec.size(); ++i) {
+    quint8_vec->at(i) = FloatToQuantized<quint8>(float_vec[i], -1.0f, 1.0f);
+  }
+}
+
 static Tensor BuildImageTensor(const std::vector<float>& img_floats) {
-  LOG(INFO) << "Ioading image finished.";
+  LOG(INFO) << "Loading image finished.";
   Tensor img_tensor(DT_FLOAT, {1, WIDTH, HEIGHT, DEPTH});
   CHECK_EQ(WIDTH * HEIGHT * DEPTH, img_floats.size());
   CHECK_EQ(img_tensor.TotalBytes(), img_floats.size() * sizeof(float));
@@ -176,6 +192,18 @@ static Tensor BuildImageTensor(const std::vector<float>& img_floats) {
   return img_tensor;
 }
 
+static Tensor BuildQuantizedImageTensor(
+    const std::vector<quint8>& quantized_img) {
+  LOG(INFO) << "Loading image finished.";
+  Tensor img_tensor(DT_QUINT8, {1, WIDTH, HEIGHT, DEPTH});
+  CHECK_EQ(WIDTH * HEIGHT * DEPTH, quantized_img.size());
+  CHECK_EQ(img_tensor.TotalBytes(), quantized_img.size() * sizeof(quint8));
+  LOG(INFO) << "Copy data to tensor.";
+  std::memcpy(img_tensor.flat<quint8>().data(), quantized_img.data(),
+              img_tensor.TotalBytes());
+  return img_tensor;
+}
+
 /* static */ RemoteFusedGraphExecuteInfo
 BuildRemoteFusedGraphExecuteInfoWithGraphTransferInfo(
     const GraphTransferInfo& graph_transfer_info) {
@@ -210,10 +238,8 @@ BuildRemoteFusedGraphExecuteInfoWithGraphTransferInfo(
   return execute_info;
 }
 
-static void RunInferenceByHexagonControlWrapper(
-    const GraphTransferer& gt, const std::vector<float>& img_floats) {
-  const Tensor img_tensor = BuildImageTensor(img_floats);
-
+static void RunInferenceByHexagonControlWrapper(const GraphTransferer& gt,
+                                                const Tensor& img_tensor) {
   const RemoteFusedGraphExecuteInfo execute_info =
       BuildRemoteFusedGraphExecuteInfoWithGraphTransferInfo(
           gt.GetGraphTransferInfo());
@@ -229,13 +255,11 @@ static void RunInferenceByHexagonControlWrapper(
   hexagon_control_wrapper.FillInputNode("Mul", img_tensor);
 
   // 4. Execute graph
-  profile_utils::CpuUtils::EnableClockCycleProfiling(true);
-  ClockCycleProfiler prof;
+  const int64 start_time_us = Env::Default()->NowMicros();
   for (int i = 0; i < EXECUTION_REPEAT_COUNT; ++i) {
-    prof.Start();
     hexagon_control_wrapper.ExecuteGraph();
-    prof.Stop();
   }
+  const int64 end_time_us = Env::Default()->NowMicros();
 
   // 5-1. Read output node's outputs
   std::vector<ByteArray> outputs;
@@ -244,7 +268,8 @@ static void RunInferenceByHexagonControlWrapper(
   // 5-2. Dump results
   DumpTop10Results(outputs);
   CheckFirstResult(outputs, EXPECTED_FIRST_RESULT_ID);
-  prof.DumpStatistics("Graph Execution");
+  LOG(INFO) << "Average execution time = "
+            << (end_time_us - start_time_us) / EXECUTION_REPEAT_COUNT << "us";
 
   // 6. Teardown graph in hexagon
   hexagon_control_wrapper.TeardownGraph();
@@ -405,7 +430,43 @@ TEST(GraphTransferer,
 
   std::vector<float> img_floats;
   LoadImage(&img_floats);
-  RunInferenceByHexagonControlWrapper(gt, img_floats);
+  const Tensor img_tensor = BuildImageTensor(img_floats);
+  RunInferenceByHexagonControlWrapper(gt, img_tensor);
+}
+
+TEST(GraphTransferer,
+     DISABLED_RunInceptionV3OnHexagonExampleWithHexagonWrapperQuantizedInput) {
+  LOG(INFO) << "Run inception v3 on hexagon with hexagon controller "
+            << "with quantized input";
+  CheckHexagonControllerVersion();
+
+  const IGraphTransferOpsDefinitions* ops_definitions =
+      &HexagonOpsDefinitions::getInstance();
+  std::vector<std::pair<string, Tensor>> inputs;
+  inputs.emplace_back("Mul", Tensor(DT_QUINT8, {1, WIDTH, HEIGHT, DEPTH}));
+  std::vector<string> output_node_names = {"softmax"};
+
+  GraphTransferer gt;
+  gt.EnableStrictCheckMode(false);
+  profile_utils::CpuUtils::EnableClockCycleProfiling(true);
+  ClockCycleProfiler prof;
+  prof.Start();
+  Status status = gt.LoadGraphFromProtoFile(
+      *ops_definitions, MODEL_WITH_QUANTIZED_INPUT_FILENAME, inputs,
+      output_node_names,
+      /*is_text_proto=*/false,
+      /*shape_inference_for_unknown_shape=*/false,
+      /*dry_run_for_unknown_shape=*/true);
+  ASSERT_TRUE(status.ok()) << status;
+  prof.Stop();
+  prof.DumpStatistics("LoadGraphFromProtoFile");
+
+  std::vector<float> img_floats;
+  LoadImage(&img_floats);
+  std::vector<quint8> quantized_img;
+  QuantizeImage(img_floats, &quantized_img);
+  const Tensor img_tensor = BuildQuantizedImageTensor(quantized_img);
+  RunInferenceByHexagonControlWrapper(gt, img_tensor);
 }
 
 TEST(GraphTransferer,
@@ -436,7 +497,8 @@ TEST(GraphTransferer,
 
   std::vector<float> img_floats;
   LoadImage(&img_floats);
-  RunInferenceByHexagonControlWrapper(gt, img_floats);
+  const Tensor img_tensor = BuildImageTensor(img_floats);
+  RunInferenceByHexagonControlWrapper(gt, img_tensor);
 }
 
 TEST(GraphTransferer, RunInceptionV3OnHexagonExampleWithTfRuntime) {