Merge pull request #41757 from tg-at-google:wsign-compare-semi-final-lite-python-stream-executor
PiperOrigin-RevId: 323633899 Change-Id: I671d46d1fc3db5a3d597dce88af4c017fd1883aa
This commit is contained in:
commit
33e709ba69
tensorflow
lite
delegates/nnapi
toco
graph_transformations
convert_trivial_transpose_to_reshape.ccdequantize.ccdrop_fake_quant.ccensure_uint8_weights_safe_for_fast_int8_kernels.cc
tflite
tools
python
stream_executor
@ -39,7 +39,8 @@ class OperandMapping {
|
||||
// Given a TFLite index return the ANN index. If it doesn't exist
|
||||
// return -1.
|
||||
int lite_index_to_ann(int index) const {
|
||||
if (index >= 0 && index < lite_tensor_to_ann_tensor_.size())
|
||||
const int64_t max_size = lite_tensor_to_ann_tensor_.size();
|
||||
if (index >= 0 && index < max_size)
|
||||
return lite_tensor_to_ann_tensor_[index];
|
||||
else
|
||||
return -1;
|
||||
@ -60,7 +61,8 @@ class OperandMapping {
|
||||
|
||||
// Add a new mapping from `tflite_index` and return the NN API tensor index.
|
||||
int add_new_ann_tensor_index(int tflite_index) {
|
||||
if (tflite_index >= lite_tensor_to_ann_tensor_.size()) {
|
||||
const int64_t current_size = lite_tensor_to_ann_tensor_.size();
|
||||
if (tflite_index >= current_size) {
|
||||
lite_tensor_to_ann_tensor_.resize(tflite_index + 1, -1);
|
||||
}
|
||||
const int new_tensor_index = next_ann_tensor_index_++;
|
||||
@ -72,7 +74,8 @@ class OperandMapping {
|
||||
// converted during copying the data to the memory allocated for NN API.
|
||||
// kTfLiteNoType means no conversion is needed.
|
||||
TfLiteType lite_index_to_ann_type_conversion(int index) const {
|
||||
if (index >= 0 && index < index_to_type_conversion_.size())
|
||||
const int64_t max_size = index_to_type_conversion_.size();
|
||||
if (index >= 0 && index < max_size)
|
||||
return index_to_type_conversion_[index];
|
||||
else
|
||||
return kTfLiteNoType;
|
||||
@ -80,7 +83,8 @@ class OperandMapping {
|
||||
|
||||
// Add a new mapping from TFLite index to a type conversion.
|
||||
void add_type_conversion(int tflite_index, TfLiteType tflite_type) {
|
||||
if (tflite_index >= index_to_type_conversion_.size()) {
|
||||
const int64_t current_size = index_to_type_conversion_.size();
|
||||
if (tflite_index >= current_size) {
|
||||
index_to_type_conversion_.resize(tflite_index + 1, kTfLiteNoType);
|
||||
}
|
||||
index_to_type_conversion_[tflite_index] = tflite_type;
|
||||
|
@ -36,7 +36,7 @@ void ExtractQuantLstmWeightsSubmatrix(const TfLiteIntArray* submatrix_dims,
|
||||
|
||||
submatrix->resize(NumElements(submatrix_dims));
|
||||
|
||||
for (uint32_t i = 0; i < submatrix_rows * submatrix_cols; ++i) {
|
||||
for (uint32_t i = 0, end = submatrix_rows * submatrix_cols; i < end; ++i) {
|
||||
const uint32_t row = i / submatrix_cols;
|
||||
const uint32_t column = i % submatrix_cols;
|
||||
(*submatrix)[i] =
|
||||
|
@ -31,7 +31,7 @@ bool TransposeAffectsMemoryOrder(std::vector<int> perm,
|
||||
// just the shape) then the flat buffer representation shouldn't change.
|
||||
std::vector<int> old_major_index_ordering;
|
||||
std::vector<int> new_major_index_ordering;
|
||||
for (int i = 0, iter_limit = in_shape.size(); i < iter_limit; i++) {
|
||||
for (int i = 0, end = in_shape.size(); i < end; i++) {
|
||||
if (in_shape[i] != 1) {
|
||||
old_major_index_ordering.push_back(i);
|
||||
}
|
||||
|
@ -35,7 +35,7 @@ void DequantizeBuffer(Array* array) {
|
||||
auto& new_data = array->GetMutableBuffer<ArrayDataType::kFloat>().data;
|
||||
new_data.resize(old_data.size());
|
||||
const auto& qparams = array->GetQuantizationParams();
|
||||
for (int i = 0, iter_limit = old_data.size(); i < iter_limit; i++) {
|
||||
for (int i = 0, end = old_data.size(); i < end; i++) {
|
||||
new_data[i] = qparams.scale * (old_data[i] - qparams.zero_point);
|
||||
}
|
||||
}
|
||||
|
@ -45,8 +45,7 @@ namespace toco {
|
||||
}
|
||||
|
||||
// Drop min/max inputs
|
||||
for (int i = 1, iter_limit = fakequant_op->inputs.size(); i < iter_limit;
|
||||
i++) {
|
||||
for (int i = 1, end = fakequant_op->inputs.size(); i < end; i++) {
|
||||
if (CountOpsWithInput(*model, fakequant_op->inputs[i]) == 1) {
|
||||
model->EraseArray(fakequant_op->inputs[i]);
|
||||
}
|
||||
|
@ -166,7 +166,7 @@ namespace toco {
|
||||
int index_of_previous_bad_value = 0;
|
||||
bool changed = false;
|
||||
|
||||
for (int i = 0, iter_limit = buffer_data.size(); i < iter_limit; i++) {
|
||||
for (int i = 0, end = buffer_data.size(); i < end; i++) {
|
||||
if (buffer_data[i] == 0) {
|
||||
count_bad++;
|
||||
if (count_bad > 1) {
|
||||
|
@ -157,7 +157,7 @@ void ImportOperators(
|
||||
}
|
||||
}
|
||||
auto outputs = input_op->outputs();
|
||||
for (int i = 0; i < outputs->Length(); i++) {
|
||||
for (int i = 0, end = outputs->Length(); i < end; i++) {
|
||||
auto output_index = outputs->Get(i);
|
||||
const std::string& output_name = tensors_table.at(output_index);
|
||||
op->outputs.push_back(output_name);
|
||||
|
@ -125,7 +125,7 @@ bool HasMinMax(const TensorT* tensor) {
|
||||
}
|
||||
|
||||
void SetOperatorCodeVersion(ModelT* model) {
|
||||
for (int subgraph_idx = 0; subgraph_idx < model->subgraphs.size();
|
||||
for (int subgraph_idx = 0, end = model->subgraphs.size(); subgraph_idx < end;
|
||||
subgraph_idx++) {
|
||||
SubGraphT* subgraph = model->subgraphs.at(subgraph_idx).get();
|
||||
// Iterate backward to avoid messing with index.
|
||||
|
@ -259,7 +259,7 @@ TfLiteStatus AdjustWeightsForBiasScale(QuantizationParametersT* quant_params,
|
||||
|
||||
// Per channel quantization
|
||||
if (channel_dim_size > 1) {
|
||||
for (size_t i = 0; i < channel_dim_size; ++i) {
|
||||
for (int i = 0; i < channel_dim_size; ++i) {
|
||||
// Current scale is not compatible with bias. Adjust max/min values.
|
||||
if (std::abs(bias_data[i]) >=
|
||||
0.5 * input_scale * weight_scales[i] * kScale) {
|
||||
@ -636,7 +636,7 @@ TfLiteStatus SymmetricPerChannelBiasQuantize(ModelT* model, TensorT* tensor,
|
||||
ErrorReporter* error_reporter) {
|
||||
// Compute scales.
|
||||
std::vector<float> scales(number_of_dimension);
|
||||
for (size_t i = 0; i < number_of_dimension; i++) {
|
||||
for (int i = 0; i < number_of_dimension; i++) {
|
||||
scales[i] = input_scale * weight_scales[i];
|
||||
}
|
||||
|
||||
@ -703,19 +703,19 @@ float GetEffectiveScale(ModelT* model, SubGraphT* subgraph, int op_idx,
|
||||
std::vector<float> factors) {
|
||||
float scale = 1.0f;
|
||||
OperatorT* op = subgraph->operators[op_idx].get();
|
||||
for (int i = 0; i < input_index.size(); ++i) {
|
||||
for (int i = 0, end = input_index.size(); i < end; ++i) {
|
||||
const int index_local = input_index[i];
|
||||
const int index_global = op->inputs[index_local];
|
||||
const TensorT* tensor = subgraph->tensors[index_global].get();
|
||||
scale *= tensor->quantization->scale[0];
|
||||
}
|
||||
for (int i = 0; i < intermediate_index.size(); ++i) {
|
||||
for (int i = 0, end = intermediate_index.size(); i < end; ++i) {
|
||||
const int index_local = intermediate_index[i];
|
||||
const int index_global = op->intermediates[index_local];
|
||||
const TensorT* tensor = subgraph->tensors[index_global].get();
|
||||
scale *= tensor->quantization->scale[0];
|
||||
}
|
||||
for (int i = 0; i < factors.size(); ++i) {
|
||||
for (int i = 0, end = factors.size(); i < end; ++i) {
|
||||
scale *= factors[i];
|
||||
}
|
||||
return scale;
|
||||
|
@ -329,7 +329,7 @@ void MakeTensor(const string& name, const std::vector<int32_t>& shape,
|
||||
|
||||
// Updates operator code versions for the operators with INT8 inputs.
|
||||
void UpdateInt8OperatorVersions(ModelT* model, bool use_updated_hybrid_scheme) {
|
||||
for (int i = 0; i < model->operator_codes.size(); ++i) {
|
||||
for (int i = 0, end = model->operator_codes.size(); i < end; ++i) {
|
||||
const BuiltinOperator& op_code = model->operator_codes[i]->builtin_code;
|
||||
if (op_code == BuiltinOperator_RNN ||
|
||||
op_code == BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN ||
|
||||
@ -414,8 +414,8 @@ TfLiteStatus QuantizeWeightsInt8(flatbuffers::FlatBufferBuilder* builder,
|
||||
std::unique_ptr<ModelT> model;
|
||||
model.reset(input_model->UnPack());
|
||||
|
||||
for (int subgraph_index = 0; subgraph_index < model->subgraphs.size();
|
||||
++subgraph_index) {
|
||||
for (int subgraph_index = 0, end = model->subgraphs.size();
|
||||
subgraph_index < end; ++subgraph_index) {
|
||||
SubGraphT* subgraph = model->subgraphs.at(subgraph_index).get();
|
||||
|
||||
absl::flat_hash_map<int32_t, TensorPerChannel> tensor_map;
|
||||
@ -538,12 +538,12 @@ TfLiteStatus QuantizeWeightsFloat16(flatbuffers::FlatBufferBuilder* builder,
|
||||
std::unique_ptr<ModelT> model;
|
||||
model.reset(input_model->UnPack());
|
||||
|
||||
for (int subgraph_index = 0; subgraph_index < model->subgraphs.size();
|
||||
++subgraph_index) {
|
||||
for (int subgraph_index = 0, end = model->subgraphs.size();
|
||||
subgraph_index < end; ++subgraph_index) {
|
||||
SubGraphT* subgraph = model->subgraphs.at(subgraph_index).get();
|
||||
|
||||
absl::flat_hash_map<int32_t, TensorT*> tensor_map;
|
||||
for (int i = 0; i < subgraph->operators.size(); ++i) {
|
||||
for (int i = 0, sub_end = subgraph->operators.size(); i < sub_end; ++i) {
|
||||
OperatorT* op = subgraph->operators[i].get();
|
||||
for (auto tensor_idx : op->inputs) {
|
||||
// Skip optional tensors.
|
||||
|
@ -106,9 +106,10 @@ bool VerifyStringTensorBuffer(const Tensor& tensor, const Buffer& buffer,
|
||||
return false;
|
||||
}
|
||||
offset += sizeof(int32_t);
|
||||
for (int i = 1; i <= num_strings; i++, offset += sizeof(int32_t)) {
|
||||
for (int i = 1, end = num_strings; i <= end; i++, offset += sizeof(int32_t)) {
|
||||
int string_offset = *GetIntPtr(buffer_ptr + offset);
|
||||
if (string_offset < prev_ptr || string_offset > buffer_size) {
|
||||
if (string_offset < static_cast<int>(prev_ptr) ||
|
||||
string_offset > static_cast<int>(buffer_size)) {
|
||||
ReportError(error_reporter,
|
||||
"String tensor %s buffer is invalid: index %d",
|
||||
NameOrEmptyString(tensor.name()), i);
|
||||
@ -221,7 +222,7 @@ absl::optional<uint64_t> VerifyAndCountElements(
|
||||
}
|
||||
}
|
||||
|
||||
if (num_elements != array_segments_size - 1) {
|
||||
if (static_cast<int>(num_elements) != array_segments_size - 1) {
|
||||
return absl::nullopt;
|
||||
}
|
||||
|
||||
@ -254,15 +255,15 @@ absl::optional<uint64_t> VerifyAndCountSparseElements(const Tensor& tensor) {
|
||||
|
||||
const int total_dims = sparsity->traversal_order()->size();
|
||||
const int original_rank = tensor.shape()->size();
|
||||
|
||||
if (total_dims < original_rank ||
|
||||
sparsity->dim_metadata()->size() != total_dims) {
|
||||
const int sparsity_dim_metadata_size = sparsity->dim_metadata()->size();
|
||||
if (total_dims < original_rank || sparsity_dim_metadata_size != total_dims) {
|
||||
return absl::nullopt;
|
||||
}
|
||||
|
||||
const int block_rank = total_dims - original_rank;
|
||||
const int sparsity_block_map_size = sparsity->block_map()->size();
|
||||
if (block_rank > 0 && (sparsity->block_map() == nullptr ||
|
||||
sparsity->block_map()->size() != block_rank)) {
|
||||
sparsity_block_map_size != block_rank)) {
|
||||
return absl::nullopt;
|
||||
}
|
||||
|
||||
@ -446,7 +447,7 @@ bool VerifySubGraphConsistency(const Model& model, const SubGraph& subgraph,
|
||||
absl::flat_hash_set<int> subgraph_input_tensors, constant_tensors,
|
||||
variable_tensors, output_tensors;
|
||||
if (subgraph.tensors()) {
|
||||
for (int i = 0; i < subgraph.tensors()->size(); ++i) {
|
||||
for (int i = 0, end = subgraph.tensors()->size(); i < end; ++i) {
|
||||
const auto* tensor = subgraph.tensors()->Get(i);
|
||||
if (IsConstantTensor(*tensor, model)) {
|
||||
constant_tensors.insert(i);
|
||||
@ -462,7 +463,8 @@ bool VerifySubGraphConsistency(const Model& model, const SubGraph& subgraph,
|
||||
}
|
||||
|
||||
if (subgraph.operators()) {
|
||||
for (int op_idx = 0; op_idx < subgraph.operators()->size(); ++op_idx) {
|
||||
for (int op_idx = 0, end = subgraph.operators()->size(); op_idx < end;
|
||||
++op_idx) {
|
||||
const auto* op = subgraph.operators()->Get(op_idx);
|
||||
if (!model.operator_codes() ||
|
||||
(op->opcode_index() >= model.operator_codes()->size())) {
|
||||
|
@ -146,8 +146,7 @@ class SessionLogger {
|
||||
// Build an index from fetch tensor name to first index in
|
||||
// output_tensor_names.
|
||||
std::unordered_map<string, int> output_name_to_offset;
|
||||
for (int i = 0, iter_limit = output_tensor_names.size(); i < iter_limit;
|
||||
++i) {
|
||||
for (int i = 0, end = output_tensor_names.size(); i < end; ++i) {
|
||||
const string& name = output_tensor_names[i];
|
||||
if (output_name_to_offset.insert(std::make_pair(name, i)).second) {
|
||||
req->add_fetch(name);
|
||||
|
@ -271,7 +271,7 @@ Status CopyTF_TensorStringsToPyArray(const TF_Tensor* src, uint64 nelems,
|
||||
std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
|
||||
TF_NewStatus(), TF_DeleteStatus);
|
||||
auto iter = make_safe(PyArray_IterNew(reinterpret_cast<PyObject*>(dst)));
|
||||
for (int64 i = 0; i < nelems; ++i) {
|
||||
for (int64 i = 0; i < static_cast<int64>(nelems); ++i) {
|
||||
const tstring& tstr_i = tstr[i];
|
||||
auto py_string =
|
||||
make_safe(PyBytes_FromStringAndSize(tstr_i.data(), tstr_i.size()));
|
||||
|
@ -125,9 +125,10 @@ bool DeviceDescription::rocm_amdgpu_isa_version(int *version) const {
|
||||
|
||||
bool ThreadDimOk(const DeviceDescription &device_description,
|
||||
const ThreadDim &thread_dim) {
|
||||
auto total_threads = thread_dim.x * thread_dim.y * thread_dim.z;
|
||||
auto threads_per_block_limit = device_description.threads_per_block_limit();
|
||||
if (total_threads > static_cast<uint64>(threads_per_block_limit)) {
|
||||
const int64 total_threads = thread_dim.x * thread_dim.y * thread_dim.z;
|
||||
const int64 threads_per_block_limit =
|
||||
device_description.threads_per_block_limit();
|
||||
if (total_threads > threads_per_block_limit) {
|
||||
VLOG(2) << "exceeded total-thread-per-block limit: " << total_threads
|
||||
<< " vs limit " << threads_per_block_limit;
|
||||
return false;
|
||||
|
@ -1886,7 +1886,7 @@ Stream *Stream::GetOrCreateSubStream() {
|
||||
|
||||
// Look for the first reusable sub_stream that is ok, dropping !ok sub_streams
|
||||
// we encounter along the way.
|
||||
for (int64 index = 0; index < sub_streams_.size();) {
|
||||
for (size_t index = 0; index < sub_streams_.size();) {
|
||||
std::pair<std::unique_ptr<Stream>, bool> &pair = sub_streams_[index];
|
||||
if (pair.second) {
|
||||
// The sub_stream is reusable.
|
||||
@ -1937,7 +1937,7 @@ void Stream::ReturnSubStream(Stream *sub_stream) {
|
||||
absl::MutexLock lock(&mu_);
|
||||
|
||||
// Look for the sub-stream.
|
||||
for (int64 index = 0; index < sub_streams_.size(); ++index) {
|
||||
for (int64 index = 0, end = sub_streams_.size(); index < end; ++index) {
|
||||
std::pair<std::unique_ptr<Stream>, bool> &pair = sub_streams_[index];
|
||||
if (pair.first.get() != sub_stream) {
|
||||
continue;
|
||||
|
@ -478,7 +478,7 @@ port::Status StreamExecutor::GetStatus(Stream *stream) {
|
||||
|
||||
DeviceMemoryBase StreamExecutor::Allocate(uint64 size, int64 memory_space) {
|
||||
if (memory_limit_bytes_ > 0 &&
|
||||
mem_alloc_bytes_ + size > memory_limit_bytes_) {
|
||||
static_cast<int64>(mem_alloc_bytes_ + size) > memory_limit_bytes_) {
|
||||
LOG(WARNING) << "Not enough memory to allocate " << size << " on device "
|
||||
<< device_ordinal_
|
||||
<< " within provided limit. [used=" << mem_alloc_bytes_
|
||||
|
Loading…
Reference in New Issue
Block a user