Merge pull request #41757 from tg-at-google:wsign-compare-semi-final-lite-python-stream-executor

PiperOrigin-RevId: 323633899 Change-Id: I671d46d1fc3db5a3d597dce88af4c017fd1883aa
2020-07-28 12:59:56 -07:00 · 2020-07-28 12:59:56 -07:00 · 33e709ba69
commit 33e709ba69
parent 3ad158c7b9 9424fb57d2
16 changed files with 46 additions and 41 deletions
--- a/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h
+++ b/tensorflow/lite/delegates/nnapi/nnapi_delegate_kernel.h
@ -39,7 +39,8 @@ class OperandMapping {
  // Given a TFLite index return the ANN index. If it doesn't exist
  // return -1.
  int lite_index_to_ann(int index) const {
-    if (index >= 0 && index < lite_tensor_to_ann_tensor_.size())
+    const int64_t max_size = lite_tensor_to_ann_tensor_.size();
+    if (index >= 0 && index < max_size)
      return lite_tensor_to_ann_tensor_[index];
    else
      return -1;
@ -60,7 +61,8 @@ class OperandMapping {

  // Add a new mapping from `tflite_index` and return the NN API tensor index.
  int add_new_ann_tensor_index(int tflite_index) {
-    if (tflite_index >= lite_tensor_to_ann_tensor_.size()) {
+    const int64_t current_size = lite_tensor_to_ann_tensor_.size();
+    if (tflite_index >= current_size) {
      lite_tensor_to_ann_tensor_.resize(tflite_index + 1, -1);
    }
    const int new_tensor_index = next_ann_tensor_index_++;
@ -72,7 +74,8 @@ class OperandMapping {
  // converted during copying the data to the memory allocated for NN API.
  // kTfLiteNoType means no conversion is needed.
  TfLiteType lite_index_to_ann_type_conversion(int index) const {
-    if (index >= 0 && index < index_to_type_conversion_.size())
+    const int64_t max_size = index_to_type_conversion_.size();
+    if (index >= 0 && index < max_size)
      return index_to_type_conversion_[index];
    else
      return kTfLiteNoType;
@ -80,7 +83,8 @@ class OperandMapping {

  // Add a new mapping from TFLite index to a type conversion.
  void add_type_conversion(int tflite_index, TfLiteType tflite_type) {
-    if (tflite_index >= index_to_type_conversion_.size()) {
+    const int64_t current_size = index_to_type_conversion_.size();
+    if (tflite_index >= current_size) {
      index_to_type_conversion_.resize(tflite_index + 1, kTfLiteNoType);
    }
    index_to_type_conversion_[tflite_index] = tflite_type;
--- a/tensorflow/lite/delegates/nnapi/quant_lstm_sup.cc
+++ b/tensorflow/lite/delegates/nnapi/quant_lstm_sup.cc
@ -36,7 +36,7 @@ void ExtractQuantLstmWeightsSubmatrix(const TfLiteIntArray* submatrix_dims,

  submatrix->resize(NumElements(submatrix_dims));

-  for (uint32_t i = 0; i < submatrix_rows * submatrix_cols; ++i) {
+  for (uint32_t i = 0, end = submatrix_rows * submatrix_cols; i < end; ++i) {
    const uint32_t row = i / submatrix_cols;
    const uint32_t column = i % submatrix_cols;
    (*submatrix)[i] =
--- a/tensorflow/lite/toco/graph_transformations/convert_trivial_transpose_to_reshape.cc
+++ b/tensorflow/lite/toco/graph_transformations/convert_trivial_transpose_to_reshape.cc
@ -31,7 +31,7 @@ bool TransposeAffectsMemoryOrder(std::vector<int> perm,
  // just the shape) then the flat buffer representation shouldn't change.
  std::vector<int> old_major_index_ordering;
  std::vector<int> new_major_index_ordering;
-  for (int i = 0, iter_limit = in_shape.size(); i < iter_limit; i++) {
+  for (int i = 0, end = in_shape.size(); i < end; i++) {
    if (in_shape[i] != 1) {
      old_major_index_ordering.push_back(i);
    }
--- a/tensorflow/lite/toco/graph_transformations/dequantize.cc
+++ b/tensorflow/lite/toco/graph_transformations/dequantize.cc
@ -35,7 +35,7 @@ void DequantizeBuffer(Array* array) {
  auto& new_data = array->GetMutableBuffer<ArrayDataType::kFloat>().data;
  new_data.resize(old_data.size());
  const auto& qparams = array->GetQuantizationParams();
-  for (int i = 0, iter_limit = old_data.size(); i < iter_limit; i++) {
+  for (int i = 0, end = old_data.size(); i < end; i++) {
    new_data[i] = qparams.scale * (old_data[i] - qparams.zero_point);
  }
 }
--- a/tensorflow/lite/toco/graph_transformations/drop_fake_quant.cc
+++ b/tensorflow/lite/toco/graph_transformations/drop_fake_quant.cc
@ -45,8 +45,7 @@ namespace toco {
  }

  // Drop min/max inputs
-  for (int i = 1, iter_limit = fakequant_op->inputs.size(); i < iter_limit;
-       i++) {
+  for (int i = 1, end = fakequant_op->inputs.size(); i < end; i++) {
    if (CountOpsWithInput(*model, fakequant_op->inputs[i]) == 1) {
      model->EraseArray(fakequant_op->inputs[i]);
    }
--- a/tensorflow/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc
+++ b/tensorflow/lite/toco/graph_transformations/ensure_uint8_weights_safe_for_fast_int8_kernels.cc
@ -166,7 +166,7 @@ namespace toco {
  int index_of_previous_bad_value = 0;
  bool changed = false;

-  for (int i = 0, iter_limit = buffer_data.size(); i < iter_limit; i++) {
+  for (int i = 0, end = buffer_data.size(); i < end; i++) {
    if (buffer_data[i] == 0) {
      count_bad++;
      if (count_bad > 1) {
--- a/tensorflow/lite/toco/tflite/import.cc
+++ b/tensorflow/lite/toco/tflite/import.cc
@ -157,7 +157,7 @@ void ImportOperators(
      }
    }
    auto outputs = input_op->outputs();
-    for (int i = 0; i < outputs->Length(); i++) {
+    for (int i = 0, end = outputs->Length(); i < end; i++) {
      auto output_index = outputs->Get(i);
      const std::string& output_name = tensors_table.at(output_index);
      op->outputs.push_back(output_name);
--- a/tensorflow/lite/tools/optimize/model_utils.cc
+++ b/tensorflow/lite/tools/optimize/model_utils.cc
@ -125,7 +125,7 @@ bool HasMinMax(const TensorT* tensor) {
 }

 void SetOperatorCodeVersion(ModelT* model) {
-  for (int subgraph_idx = 0; subgraph_idx < model->subgraphs.size();
+  for (int subgraph_idx = 0, end = model->subgraphs.size(); subgraph_idx < end;
       subgraph_idx++) {
    SubGraphT* subgraph = model->subgraphs.at(subgraph_idx).get();
    // Iterate backward to avoid messing with index.
--- a/tensorflow/lite/tools/optimize/quantization_utils.cc
+++ b/tensorflow/lite/tools/optimize/quantization_utils.cc
@ -259,7 +259,7 @@ TfLiteStatus AdjustWeightsForBiasScale(QuantizationParametersT* quant_params,

  // Per channel quantization
  if (channel_dim_size > 1) {
-    for (size_t i = 0; i < channel_dim_size; ++i) {
+    for (int i = 0; i < channel_dim_size; ++i) {
      // Current scale is not compatible with bias. Adjust max/min values.
      if (std::abs(bias_data[i]) >=
          0.5 * input_scale * weight_scales[i] * kScale) {
@ -636,7 +636,7 @@ TfLiteStatus SymmetricPerChannelBiasQuantize(ModelT* model, TensorT* tensor,
                                             ErrorReporter* error_reporter) {
  // Compute scales.
  std::vector<float> scales(number_of_dimension);
-  for (size_t i = 0; i < number_of_dimension; i++) {
+  for (int i = 0; i < number_of_dimension; i++) {
    scales[i] = input_scale * weight_scales[i];
  }

@ -703,19 +703,19 @@ float GetEffectiveScale(ModelT* model, SubGraphT* subgraph, int op_idx,
                        std::vector<float> factors) {
  float scale = 1.0f;
  OperatorT* op = subgraph->operators[op_idx].get();
-  for (int i = 0; i < input_index.size(); ++i) {
+  for (int i = 0, end = input_index.size(); i < end; ++i) {
    const int index_local = input_index[i];
    const int index_global = op->inputs[index_local];
    const TensorT* tensor = subgraph->tensors[index_global].get();
    scale *= tensor->quantization->scale[0];
  }
-  for (int i = 0; i < intermediate_index.size(); ++i) {
+  for (int i = 0, end = intermediate_index.size(); i < end; ++i) {
    const int index_local = intermediate_index[i];
    const int index_global = op->intermediates[index_local];
    const TensorT* tensor = subgraph->tensors[index_global].get();
    scale *= tensor->quantization->scale[0];
  }
-  for (int i = 0; i < factors.size(); ++i) {
+  for (int i = 0, end = factors.size(); i < end; ++i) {
    scale *= factors[i];
  }
  return scale;
--- a/tensorflow/lite/tools/optimize/quantize_weights.cc
+++ b/tensorflow/lite/tools/optimize/quantize_weights.cc
@ -329,7 +329,7 @@ void MakeTensor(const string& name, const std::vector<int32_t>& shape,

 // Updates operator code versions for the operators with INT8 inputs.
 void UpdateInt8OperatorVersions(ModelT* model, bool use_updated_hybrid_scheme) {
-  for (int i = 0; i < model->operator_codes.size(); ++i) {
+  for (int i = 0, end = model->operator_codes.size(); i < end; ++i) {
    const BuiltinOperator& op_code = model->operator_codes[i]->builtin_code;
    if (op_code == BuiltinOperator_RNN ||
        op_code == BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN ||
@ -414,8 +414,8 @@ TfLiteStatus QuantizeWeightsInt8(flatbuffers::FlatBufferBuilder* builder,
  std::unique_ptr<ModelT> model;
  model.reset(input_model->UnPack());

-  for (int subgraph_index = 0; subgraph_index < model->subgraphs.size();
-       ++subgraph_index) {
+  for (int subgraph_index = 0, end = model->subgraphs.size();
+       subgraph_index < end; ++subgraph_index) {
    SubGraphT* subgraph = model->subgraphs.at(subgraph_index).get();

    absl::flat_hash_map<int32_t, TensorPerChannel> tensor_map;
@ -538,12 +538,12 @@ TfLiteStatus QuantizeWeightsFloat16(flatbuffers::FlatBufferBuilder* builder,
  std::unique_ptr<ModelT> model;
  model.reset(input_model->UnPack());

-  for (int subgraph_index = 0; subgraph_index < model->subgraphs.size();
-       ++subgraph_index) {
+  for (int subgraph_index = 0, end = model->subgraphs.size();
+       subgraph_index < end; ++subgraph_index) {
    SubGraphT* subgraph = model->subgraphs.at(subgraph_index).get();

    absl::flat_hash_map<int32_t, TensorT*> tensor_map;
-    for (int i = 0; i < subgraph->operators.size(); ++i) {
+    for (int i = 0, sub_end = subgraph->operators.size(); i < sub_end; ++i) {
      OperatorT* op = subgraph->operators[i].get();
      for (auto tensor_idx : op->inputs) {
        // Skip optional tensors.
--- a/tensorflow/lite/tools/verifier.cc
+++ b/tensorflow/lite/tools/verifier.cc
@ -106,9 +106,10 @@ bool VerifyStringTensorBuffer(const Tensor& tensor, const Buffer& buffer,
    return false;
  }
  offset += sizeof(int32_t);
-  for (int i = 1; i <= num_strings; i++, offset += sizeof(int32_t)) {
+  for (int i = 1, end = num_strings; i <= end; i++, offset += sizeof(int32_t)) {
    int string_offset = *GetIntPtr(buffer_ptr + offset);
-    if (string_offset < prev_ptr || string_offset > buffer_size) {
+    if (string_offset < static_cast<int>(prev_ptr) ||
+        string_offset > static_cast<int>(buffer_size)) {
      ReportError(error_reporter,
                  "String tensor %s buffer is invalid: index %d",
                  NameOrEmptyString(tensor.name()), i);
@ -221,7 +222,7 @@ absl::optional<uint64_t> VerifyAndCountElements(
        }
      }

-      if (num_elements != array_segments_size - 1) {
+      if (static_cast<int>(num_elements) != array_segments_size - 1) {
        return absl::nullopt;
      }

@ -254,15 +255,15 @@ absl::optional<uint64_t> VerifyAndCountSparseElements(const Tensor& tensor) {

  const int total_dims = sparsity->traversal_order()->size();
  const int original_rank = tensor.shape()->size();
-
-  if (total_dims < original_rank ||
-      sparsity->dim_metadata()->size() != total_dims) {
+  const int sparsity_dim_metadata_size = sparsity->dim_metadata()->size();
+  if (total_dims < original_rank || sparsity_dim_metadata_size != total_dims) {
    return absl::nullopt;
  }

  const int block_rank = total_dims - original_rank;
+  const int sparsity_block_map_size = sparsity->block_map()->size();
  if (block_rank > 0 && (sparsity->block_map() == nullptr ||
-                         sparsity->block_map()->size() != block_rank)) {
+                         sparsity_block_map_size != block_rank)) {
    return absl::nullopt;
  }

@ -446,7 +447,7 @@ bool VerifySubGraphConsistency(const Model& model, const SubGraph& subgraph,
  absl::flat_hash_set<int> subgraph_input_tensors, constant_tensors,
      variable_tensors, output_tensors;
  if (subgraph.tensors()) {
-    for (int i = 0; i < subgraph.tensors()->size(); ++i) {
+    for (int i = 0, end = subgraph.tensors()->size(); i < end; ++i) {
      const auto* tensor = subgraph.tensors()->Get(i);
      if (IsConstantTensor(*tensor, model)) {
        constant_tensors.insert(i);
@ -462,7 +463,8 @@ bool VerifySubGraphConsistency(const Model& model, const SubGraph& subgraph,
  }

  if (subgraph.operators()) {
-    for (int op_idx = 0; op_idx < subgraph.operators()->size(); ++op_idx) {
+    for (int op_idx = 0, end = subgraph.operators()->size(); op_idx < end;
+         ++op_idx) {
      const auto* op = subgraph.operators()->Get(op_idx);
      if (!model.operator_codes() ||
          (op->opcode_index() >= model.operator_codes()->size())) {
--- a/tensorflow/python/client/session_ref.cc
+++ b/tensorflow/python/client/session_ref.cc
@ -146,8 +146,7 @@ class SessionLogger {
    // Build an index from fetch tensor name to first index in
    // output_tensor_names.
    std::unordered_map<string, int> output_name_to_offset;
-    for (int i = 0, iter_limit = output_tensor_names.size(); i < iter_limit;
-         ++i) {
+    for (int i = 0, end = output_tensor_names.size(); i < end; ++i) {
      const string& name = output_tensor_names[i];
      if (output_name_to_offset.insert(std::make_pair(name, i)).second) {
        req->add_fetch(name);
--- a/tensorflow/python/lib/core/ndarray_tensor.cc
+++ b/tensorflow/python/lib/core/ndarray_tensor.cc
@ -271,7 +271,7 @@ Status CopyTF_TensorStringsToPyArray(const TF_Tensor* src, uint64 nelems,
  std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
      TF_NewStatus(), TF_DeleteStatus);
  auto iter = make_safe(PyArray_IterNew(reinterpret_cast<PyObject*>(dst)));
-  for (int64 i = 0; i < nelems; ++i) {
+  for (int64 i = 0; i < static_cast<int64>(nelems); ++i) {
    const tstring& tstr_i = tstr[i];
    auto py_string =
        make_safe(PyBytes_FromStringAndSize(tstr_i.data(), tstr_i.size()));
--- a/tensorflow/stream_executor/device_description.cc
+++ b/tensorflow/stream_executor/device_description.cc
@ -125,9 +125,10 @@ bool DeviceDescription::rocm_amdgpu_isa_version(int *version) const {

 bool ThreadDimOk(const DeviceDescription &device_description,
                 const ThreadDim &thread_dim) {
-  auto total_threads = thread_dim.x * thread_dim.y * thread_dim.z;
-  auto threads_per_block_limit = device_description.threads_per_block_limit();
-  if (total_threads > static_cast<uint64>(threads_per_block_limit)) {
+  const int64 total_threads = thread_dim.x * thread_dim.y * thread_dim.z;
+  const int64 threads_per_block_limit =
+      device_description.threads_per_block_limit();
+  if (total_threads > threads_per_block_limit) {
    VLOG(2) << "exceeded total-thread-per-block limit: " << total_threads
            << " vs limit " << threads_per_block_limit;
    return false;
--- a/tensorflow/stream_executor/stream.cc
+++ b/tensorflow/stream_executor/stream.cc
@ -1886,7 +1886,7 @@ Stream *Stream::GetOrCreateSubStream() {

  // Look for the first reusable sub_stream that is ok, dropping !ok sub_streams
  // we encounter along the way.
-  for (int64 index = 0; index < sub_streams_.size();) {
+  for (size_t index = 0; index < sub_streams_.size();) {
    std::pair<std::unique_ptr<Stream>, bool> &pair = sub_streams_[index];
    if (pair.second) {
      // The sub_stream is reusable.
@ -1937,7 +1937,7 @@ void Stream::ReturnSubStream(Stream *sub_stream) {
  absl::MutexLock lock(&mu_);

  // Look for the sub-stream.
-  for (int64 index = 0; index < sub_streams_.size(); ++index) {
+  for (int64 index = 0, end = sub_streams_.size(); index < end; ++index) {
    std::pair<std::unique_ptr<Stream>, bool> &pair = sub_streams_[index];
    if (pair.first.get() != sub_stream) {
      continue;
--- a/tensorflow/stream_executor/stream_executor_pimpl.cc
+++ b/tensorflow/stream_executor/stream_executor_pimpl.cc
@ -478,7 +478,7 @@ port::Status StreamExecutor::GetStatus(Stream *stream) {

 DeviceMemoryBase StreamExecutor::Allocate(uint64 size, int64 memory_space) {
  if (memory_limit_bytes_ > 0 &&
-      mem_alloc_bytes_ + size > memory_limit_bytes_) {
+      static_cast<int64>(mem_alloc_bytes_ + size) > memory_limit_bytes_) {
    LOG(WARNING) << "Not enough memory to allocate " << size << " on device "
                 << device_ordinal_
                 << " within provided limit. [used=" << mem_alloc_bytes_