Add a HVX graph execution test for quantized inception v3 model with quantized input

PiperOrigin-RevId: 161105730
2017-07-06 11:24:47 -07:00 · 2017-07-06 11:24:47 -07:00 · 06d25a7e62
commit 06d25a7e62
parent 2559fda880
1 changed files with 75 additions and 13 deletions
--- a/tensorflow/core/kernels/hexagon/hexagon_graph_execution_test.cc
+++ b/tensorflow/core/kernels/hexagon/hexagon_graph_execution_test.cc
@ -24,6 +24,9 @@ https://storage.googleapis.com/download.tensorflow.org/models/imagenet_comp_grap
 adb push /tmp/imagenet_comp_graph_label_strings.txt /data/local/tmp
 */

+// define EIGEN_USE_THREADS to include quantization_utils.h
+#define EIGEN_USE_THREADS
+
 #include <memory>

 #include "tensorflow/core/framework/tensor_shape.pb.h"
@ -34,6 +37,7 @@ adb push /tmp/imagenet_comp_graph_label_strings.txt /data/local/tmp
 #include "tensorflow/core/kernels/hexagon/hexagon_ops_definitions.h"
 #include "tensorflow/core/kernels/hexagon/i_graph_transfer_ops_definitions.h"
 #include "tensorflow/core/kernels/i_remote_fused_graph_executor.h"
+#include "tensorflow/core/kernels/quantization_utils.h"
 #include "tensorflow/core/lib/core/casts.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/core/status_test_util.h"
@ -52,6 +56,10 @@ using ByteArray = HexagonControlWrapper::ByteArray;
 constexpr const char* const IMAGE_FILENAME = "/data/local/tmp/img_299x299.bmp";
 constexpr const char* const MODEL_FILENAME =
    "/data/local/tmp/tensorflow_inception_v3_stripped_optimized_quantized.pb";
+constexpr const char* const MODEL_WITH_QUANTIZED_INPUT_FILENAME =
+    "/data/local/tmp/"
+    "tensorflow_inception_v3_stripped_optimized_quantized_with_quantized_input."
+    "pb";
 constexpr const char* const FUSED_MODEL_FILENAME =
    "/data/local/tmp/"
    "tensorflow_inception_v3_stripped_optimized_quantized_fused_hexagon.pb";
@ -64,7 +72,7 @@ const int WIDTH = 299;
 const int HEIGHT = 299;
 const int DEPTH = 3;
 const int EXPECTED_FIRST_RESULT_ID = 59;
-const int EXECUTION_REPEAT_COUNT = 3;
+const int EXECUTION_REPEAT_COUNT = 10;

 static void CheckHexagonControllerVersion() {
  HexagonControlWrapper hexagon_control_wrapper;
@ -165,8 +173,16 @@ static void LoadImage(std::vector<float>* img_floats_ptr) {
  }
 }

+static void QuantizeImage(const std::vector<float>& float_vec,
+                          std::vector<quint8>* quint8_vec) {
+  quint8_vec->resize(float_vec.size());
+  for (int i = 0; i < float_vec.size(); ++i) {
+    quint8_vec->at(i) = FloatToQuantized<quint8>(float_vec[i], -1.0f, 1.0f);
+  }
+}
+
 static Tensor BuildImageTensor(const std::vector<float>& img_floats) {
-  LOG(INFO) << "Ioading image finished.";
+  LOG(INFO) << "Loading image finished.";
  Tensor img_tensor(DT_FLOAT, {1, WIDTH, HEIGHT, DEPTH});
  CHECK_EQ(WIDTH * HEIGHT * DEPTH, img_floats.size());
  CHECK_EQ(img_tensor.TotalBytes(), img_floats.size() * sizeof(float));
@ -176,6 +192,18 @@ static Tensor BuildImageTensor(const std::vector<float>& img_floats) {
  return img_tensor;
 }

+static Tensor BuildQuantizedImageTensor(
+    const std::vector<quint8>& quantized_img) {
+  LOG(INFO) << "Loading image finished.";
+  Tensor img_tensor(DT_QUINT8, {1, WIDTH, HEIGHT, DEPTH});
+  CHECK_EQ(WIDTH * HEIGHT * DEPTH, quantized_img.size());
+  CHECK_EQ(img_tensor.TotalBytes(), quantized_img.size() * sizeof(quint8));
+  LOG(INFO) << "Copy data to tensor.";
+  std::memcpy(img_tensor.flat<quint8>().data(), quantized_img.data(),
+              img_tensor.TotalBytes());
+  return img_tensor;
+}
+
 /* static */ RemoteFusedGraphExecuteInfo
 BuildRemoteFusedGraphExecuteInfoWithGraphTransferInfo(
    const GraphTransferInfo& graph_transfer_info) {
@ -210,10 +238,8 @@ BuildRemoteFusedGraphExecuteInfoWithGraphTransferInfo(
  return execute_info;
 }

-static void RunInferenceByHexagonControlWrapper(
-    const GraphTransferer& gt, const std::vector<float>& img_floats) {
-  const Tensor img_tensor = BuildImageTensor(img_floats);
-
+static void RunInferenceByHexagonControlWrapper(const GraphTransferer& gt,
+                                                const Tensor& img_tensor) {
  const RemoteFusedGraphExecuteInfo execute_info =
      BuildRemoteFusedGraphExecuteInfoWithGraphTransferInfo(
          gt.GetGraphTransferInfo());
@ -229,13 +255,11 @@ static void RunInferenceByHexagonControlWrapper(
  hexagon_control_wrapper.FillInputNode("Mul", img_tensor);

  // 4. Execute graph
-  profile_utils::CpuUtils::EnableClockCycleProfiling(true);
-  ClockCycleProfiler prof;
+  const int64 start_time_us = Env::Default()->NowMicros();
  for (int i = 0; i < EXECUTION_REPEAT_COUNT; ++i) {
-    prof.Start();
    hexagon_control_wrapper.ExecuteGraph();
-    prof.Stop();
  }
+  const int64 end_time_us = Env::Default()->NowMicros();

  // 5-1. Read output node's outputs
  std::vector<ByteArray> outputs;
@ -244,7 +268,8 @@ static void RunInferenceByHexagonControlWrapper(
  // 5-2. Dump results
  DumpTop10Results(outputs);
  CheckFirstResult(outputs, EXPECTED_FIRST_RESULT_ID);
-  prof.DumpStatistics("Graph Execution");
+  LOG(INFO) << "Average execution time = "
+            << (end_time_us - start_time_us) / EXECUTION_REPEAT_COUNT << "us";

  // 6. Teardown graph in hexagon
  hexagon_control_wrapper.TeardownGraph();
@ -405,7 +430,43 @@ TEST(GraphTransferer,

  std::vector<float> img_floats;
  LoadImage(&img_floats);
-  RunInferenceByHexagonControlWrapper(gt, img_floats);
+  const Tensor img_tensor = BuildImageTensor(img_floats);
+  RunInferenceByHexagonControlWrapper(gt, img_tensor);
+}
+
+TEST(GraphTransferer,
+     DISABLED_RunInceptionV3OnHexagonExampleWithHexagonWrapperQuantizedInput) {
+  LOG(INFO) << "Run inception v3 on hexagon with hexagon controller "
+            << "with quantized input";
+  CheckHexagonControllerVersion();
+
+  const IGraphTransferOpsDefinitions* ops_definitions =
+      &HexagonOpsDefinitions::getInstance();
+  std::vector<std::pair<string, Tensor>> inputs;
+  inputs.emplace_back("Mul", Tensor(DT_QUINT8, {1, WIDTH, HEIGHT, DEPTH}));
+  std::vector<string> output_node_names = {"softmax"};
+
+  GraphTransferer gt;
+  gt.EnableStrictCheckMode(false);
+  profile_utils::CpuUtils::EnableClockCycleProfiling(true);
+  ClockCycleProfiler prof;
+  prof.Start();
+  Status status = gt.LoadGraphFromProtoFile(
+      *ops_definitions, MODEL_WITH_QUANTIZED_INPUT_FILENAME, inputs,
+      output_node_names,
+      /*is_text_proto=*/false,
+      /*shape_inference_for_unknown_shape=*/false,
+      /*dry_run_for_unknown_shape=*/true);
+  ASSERT_TRUE(status.ok()) << status;
+  prof.Stop();
+  prof.DumpStatistics("LoadGraphFromProtoFile");
+
+  std::vector<float> img_floats;
+  LoadImage(&img_floats);
+  std::vector<quint8> quantized_img;
+  QuantizeImage(img_floats, &quantized_img);
+  const Tensor img_tensor = BuildQuantizedImageTensor(quantized_img);
+  RunInferenceByHexagonControlWrapper(gt, img_tensor);
 }

 TEST(GraphTransferer,
@ -436,7 +497,8 @@ TEST(GraphTransferer,

  std::vector<float> img_floats;
  LoadImage(&img_floats);
-  RunInferenceByHexagonControlWrapper(gt, img_floats);
+  const Tensor img_tensor = BuildImageTensor(img_floats);
+  RunInferenceByHexagonControlWrapper(gt, img_tensor);
 }

 TEST(GraphTransferer, RunInceptionV3OnHexagonExampleWithTfRuntime) {