resolutions
This commit is contained in:
parent
3bb28df8d4
commit
99686bb275
tensorflow
lite
delegates/nnapi
toco
graph_transformations
convert_trivial_transpose_to_reshape.ccdequantize.ccdrop_fake_quant.ccensure_uint8_weights_safe_for_fast_int8_kernels.cc
tflite
tools
python
stream_executor
@ -39,7 +39,8 @@ class OperandMapping {
|
|||||||
// Given a TFLite index return the ANN index. If it doesn't exist
|
// Given a TFLite index return the ANN index. If it doesn't exist
|
||||||
// return -1.
|
// return -1.
|
||||||
int lite_index_to_ann(int index) const {
|
int lite_index_to_ann(int index) const {
|
||||||
if (index >= 0 && index < lite_tensor_to_ann_tensor_.size())
|
const int64_t max_size = lite_tensor_to_ann_tensor_.size();
|
||||||
|
if (index >= 0 && index < max_size)
|
||||||
return lite_tensor_to_ann_tensor_[index];
|
return lite_tensor_to_ann_tensor_[index];
|
||||||
else
|
else
|
||||||
return -1;
|
return -1;
|
||||||
@ -60,7 +61,8 @@ class OperandMapping {
|
|||||||
|
|
||||||
// Add a new mapping from `tflite_index` and return the NN API tensor index.
|
// Add a new mapping from `tflite_index` and return the NN API tensor index.
|
||||||
int add_new_ann_tensor_index(int tflite_index) {
|
int add_new_ann_tensor_index(int tflite_index) {
|
||||||
if (tflite_index >= lite_tensor_to_ann_tensor_.size()) {
|
const int64_t current_size = lite_tensor_to_ann_tensor_.size();
|
||||||
|
if (tflite_index >= current_size) {
|
||||||
lite_tensor_to_ann_tensor_.resize(tflite_index + 1, -1);
|
lite_tensor_to_ann_tensor_.resize(tflite_index + 1, -1);
|
||||||
}
|
}
|
||||||
const int new_tensor_index = next_ann_tensor_index_++;
|
const int new_tensor_index = next_ann_tensor_index_++;
|
||||||
@ -72,7 +74,8 @@ class OperandMapping {
|
|||||||
// converted during copying the data to the memory allocated for NN API.
|
// converted during copying the data to the memory allocated for NN API.
|
||||||
// kTfLiteNoType means no conversion is needed.
|
// kTfLiteNoType means no conversion is needed.
|
||||||
TfLiteType lite_index_to_ann_type_conversion(int index) const {
|
TfLiteType lite_index_to_ann_type_conversion(int index) const {
|
||||||
if (index >= 0 && index < index_to_type_conversion_.size())
|
const int64_t max_size = index_to_type_conversion_.size();
|
||||||
|
if (index >= 0 && index < max_size)
|
||||||
return index_to_type_conversion_[index];
|
return index_to_type_conversion_[index];
|
||||||
else
|
else
|
||||||
return kTfLiteNoType;
|
return kTfLiteNoType;
|
||||||
@ -80,7 +83,8 @@ class OperandMapping {
|
|||||||
|
|
||||||
// Add a new mapping from TFLite index to a type conversion.
|
// Add a new mapping from TFLite index to a type conversion.
|
||||||
void add_type_conversion(int tflite_index, TfLiteType tflite_type) {
|
void add_type_conversion(int tflite_index, TfLiteType tflite_type) {
|
||||||
if (tflite_index >= index_to_type_conversion_.size()) {
|
const int64_t current_size = index_to_type_conversion_.size();
|
||||||
|
if (tflite_index >= current_size) {
|
||||||
index_to_type_conversion_.resize(tflite_index + 1, kTfLiteNoType);
|
index_to_type_conversion_.resize(tflite_index + 1, kTfLiteNoType);
|
||||||
}
|
}
|
||||||
index_to_type_conversion_[tflite_index] = tflite_type;
|
index_to_type_conversion_[tflite_index] = tflite_type;
|
||||||
|
@ -36,7 +36,7 @@ void ExtractQuantLstmWeightsSubmatrix(const TfLiteIntArray* submatrix_dims,
|
|||||||
|
|
||||||
submatrix->resize(NumElements(submatrix_dims));
|
submatrix->resize(NumElements(submatrix_dims));
|
||||||
|
|
||||||
for (uint32_t i = 0; i < submatrix_rows * submatrix_cols; ++i) {
|
for (uint32_t i = 0, end = submatrix_rows * submatrix_cols; i < end; ++i) {
|
||||||
const uint32_t row = i / submatrix_cols;
|
const uint32_t row = i / submatrix_cols;
|
||||||
const uint32_t column = i % submatrix_cols;
|
const uint32_t column = i % submatrix_cols;
|
||||||
(*submatrix)[i] =
|
(*submatrix)[i] =
|
||||||
|
@ -31,7 +31,7 @@ bool TransposeAffectsMemoryOrder(std::vector<int> perm,
|
|||||||
// just the shape) then the flat buffer representation shouldn't change.
|
// just the shape) then the flat buffer representation shouldn't change.
|
||||||
std::vector<int> old_major_index_ordering;
|
std::vector<int> old_major_index_ordering;
|
||||||
std::vector<int> new_major_index_ordering;
|
std::vector<int> new_major_index_ordering;
|
||||||
for (int i = 0, iter_limit = in_shape.size(); i < iter_limit; i++) {
|
for (int i = 0, end = in_shape.size(); i < end; i++) {
|
||||||
if (in_shape[i] != 1) {
|
if (in_shape[i] != 1) {
|
||||||
old_major_index_ordering.push_back(i);
|
old_major_index_ordering.push_back(i);
|
||||||
}
|
}
|
||||||
|
@ -35,7 +35,7 @@ void DequantizeBuffer(Array* array) {
|
|||||||
auto& new_data = array->GetMutableBuffer<ArrayDataType::kFloat>().data;
|
auto& new_data = array->GetMutableBuffer<ArrayDataType::kFloat>().data;
|
||||||
new_data.resize(old_data.size());
|
new_data.resize(old_data.size());
|
||||||
const auto& qparams = array->GetQuantizationParams();
|
const auto& qparams = array->GetQuantizationParams();
|
||||||
for (int i = 0, iter_limit = old_data.size(); i < iter_limit; i++) {
|
for (int i = 0, end = old_data.size(); i < end; i++) {
|
||||||
new_data[i] = qparams.scale * (old_data[i] - qparams.zero_point);
|
new_data[i] = qparams.scale * (old_data[i] - qparams.zero_point);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -45,8 +45,7 @@ namespace toco {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Drop min/max inputs
|
// Drop min/max inputs
|
||||||
for (int i = 1, iter_limit = fakequant_op->inputs.size(); i < iter_limit;
|
for (int i = 1, end = fakequant_op->inputs.size(); i < end; i++) {
|
||||||
i++) {
|
|
||||||
if (CountOpsWithInput(*model, fakequant_op->inputs[i]) == 1) {
|
if (CountOpsWithInput(*model, fakequant_op->inputs[i]) == 1) {
|
||||||
model->EraseArray(fakequant_op->inputs[i]);
|
model->EraseArray(fakequant_op->inputs[i]);
|
||||||
}
|
}
|
||||||
|
@ -166,7 +166,7 @@ namespace toco {
|
|||||||
int index_of_previous_bad_value = 0;
|
int index_of_previous_bad_value = 0;
|
||||||
bool changed = false;
|
bool changed = false;
|
||||||
|
|
||||||
for (int i = 0, iter_limit = buffer_data.size(); i < iter_limit; i++) {
|
for (int i = 0, end = buffer_data.size(); i < end; i++) {
|
||||||
if (buffer_data[i] == 0) {
|
if (buffer_data[i] == 0) {
|
||||||
count_bad++;
|
count_bad++;
|
||||||
if (count_bad > 1) {
|
if (count_bad > 1) {
|
||||||
|
@ -157,7 +157,7 @@ void ImportOperators(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
auto outputs = input_op->outputs();
|
auto outputs = input_op->outputs();
|
||||||
for (int i = 0; i < outputs->Length(); i++) {
|
for (int i = 0, end = outputs->Length(); i < end; i++) {
|
||||||
auto output_index = outputs->Get(i);
|
auto output_index = outputs->Get(i);
|
||||||
const std::string& output_name = tensors_table.at(output_index);
|
const std::string& output_name = tensors_table.at(output_index);
|
||||||
op->outputs.push_back(output_name);
|
op->outputs.push_back(output_name);
|
||||||
|
@ -125,7 +125,7 @@ bool HasMinMax(const TensorT* tensor) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void SetOperatorCodeVersion(ModelT* model) {
|
void SetOperatorCodeVersion(ModelT* model) {
|
||||||
for (int subgraph_idx = 0; subgraph_idx < model->subgraphs.size();
|
for (int subgraph_idx = 0, end = model->subgraphs.size(); subgraph_idx < end;
|
||||||
subgraph_idx++) {
|
subgraph_idx++) {
|
||||||
SubGraphT* subgraph = model->subgraphs.at(subgraph_idx).get();
|
SubGraphT* subgraph = model->subgraphs.at(subgraph_idx).get();
|
||||||
// Iterate backward to avoid messing with index.
|
// Iterate backward to avoid messing with index.
|
||||||
|
@ -259,7 +259,7 @@ TfLiteStatus AdjustWeightsForBiasScale(QuantizationParametersT* quant_params,
|
|||||||
|
|
||||||
// Per channel quantization
|
// Per channel quantization
|
||||||
if (channel_dim_size > 1) {
|
if (channel_dim_size > 1) {
|
||||||
for (size_t i = 0; i < channel_dim_size; ++i) {
|
for (int i = 0; i < channel_dim_size; ++i) {
|
||||||
// Current scale is not compatible with bias. Adjust max/min values.
|
// Current scale is not compatible with bias. Adjust max/min values.
|
||||||
if (std::abs(bias_data[i]) >=
|
if (std::abs(bias_data[i]) >=
|
||||||
0.5 * input_scale * weight_scales[i] * kScale) {
|
0.5 * input_scale * weight_scales[i] * kScale) {
|
||||||
@ -636,7 +636,7 @@ TfLiteStatus SymmetricPerChannelBiasQuantize(ModelT* model, TensorT* tensor,
|
|||||||
ErrorReporter* error_reporter) {
|
ErrorReporter* error_reporter) {
|
||||||
// Compute scales.
|
// Compute scales.
|
||||||
std::vector<float> scales(number_of_dimension);
|
std::vector<float> scales(number_of_dimension);
|
||||||
for (size_t i = 0; i < number_of_dimension; i++) {
|
for (int i = 0; i < number_of_dimension; i++) {
|
||||||
scales[i] = input_scale * weight_scales[i];
|
scales[i] = input_scale * weight_scales[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -703,19 +703,19 @@ float GetEffectiveScale(ModelT* model, SubGraphT* subgraph, int op_idx,
|
|||||||
std::vector<float> factors) {
|
std::vector<float> factors) {
|
||||||
float scale = 1.0f;
|
float scale = 1.0f;
|
||||||
OperatorT* op = subgraph->operators[op_idx].get();
|
OperatorT* op = subgraph->operators[op_idx].get();
|
||||||
for (int i = 0; i < input_index.size(); ++i) {
|
for (int i = 0, end = input_index.size(); i < end; ++i) {
|
||||||
const int index_local = input_index[i];
|
const int index_local = input_index[i];
|
||||||
const int index_global = op->inputs[index_local];
|
const int index_global = op->inputs[index_local];
|
||||||
const TensorT* tensor = subgraph->tensors[index_global].get();
|
const TensorT* tensor = subgraph->tensors[index_global].get();
|
||||||
scale *= tensor->quantization->scale[0];
|
scale *= tensor->quantization->scale[0];
|
||||||
}
|
}
|
||||||
for (int i = 0; i < intermediate_index.size(); ++i) {
|
for (int i = 0, end = intermediate_index.size(); i < end; ++i) {
|
||||||
const int index_local = intermediate_index[i];
|
const int index_local = intermediate_index[i];
|
||||||
const int index_global = op->intermediates[index_local];
|
const int index_global = op->intermediates[index_local];
|
||||||
const TensorT* tensor = subgraph->tensors[index_global].get();
|
const TensorT* tensor = subgraph->tensors[index_global].get();
|
||||||
scale *= tensor->quantization->scale[0];
|
scale *= tensor->quantization->scale[0];
|
||||||
}
|
}
|
||||||
for (int i = 0; i < factors.size(); ++i) {
|
for (int i = 0, end = factors.size(); i < end; ++i) {
|
||||||
scale *= factors[i];
|
scale *= factors[i];
|
||||||
}
|
}
|
||||||
return scale;
|
return scale;
|
||||||
|
@ -324,7 +324,7 @@ void MakeTensor(const string& name, const std::vector<int32_t>& shape,
|
|||||||
|
|
||||||
// Updates operator code versions for the operators with INT8 inputs.
|
// Updates operator code versions for the operators with INT8 inputs.
|
||||||
void UpdateInt8OperatorVersions(ModelT* model) {
|
void UpdateInt8OperatorVersions(ModelT* model) {
|
||||||
for (int i = 0; i < model->operator_codes.size(); ++i) {
|
for (int i = 0, end = model->operator_codes.size(); i < end; ++i) {
|
||||||
const BuiltinOperator& op_code = model->operator_codes[i]->builtin_code;
|
const BuiltinOperator& op_code = model->operator_codes[i]->builtin_code;
|
||||||
if (op_code == BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM ||
|
if (op_code == BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM ||
|
||||||
op_code == BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN ||
|
op_code == BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN ||
|
||||||
@ -406,7 +406,7 @@ TfLiteStatus QuantizeWeightsInt8(flatbuffers::FlatBufferBuilder* builder,
|
|||||||
std::unique_ptr<ModelT> model;
|
std::unique_ptr<ModelT> model;
|
||||||
model.reset(input_model->UnPack());
|
model.reset(input_model->UnPack());
|
||||||
|
|
||||||
for (int subgraph_index = 0; subgraph_index < model->subgraphs.size();
|
for (int subgraph_index = 0, end = model->subgraphs.size(); subgraph_index < end;
|
||||||
++subgraph_index) {
|
++subgraph_index) {
|
||||||
SubGraphT* subgraph = model->subgraphs.at(subgraph_index).get();
|
SubGraphT* subgraph = model->subgraphs.at(subgraph_index).get();
|
||||||
|
|
||||||
@ -530,12 +530,13 @@ TfLiteStatus QuantizeWeightsFloat16(flatbuffers::FlatBufferBuilder* builder,
|
|||||||
std::unique_ptr<ModelT> model;
|
std::unique_ptr<ModelT> model;
|
||||||
model.reset(input_model->UnPack());
|
model.reset(input_model->UnPack());
|
||||||
|
|
||||||
for (int subgraph_index = 0; subgraph_index < model->subgraphs.size();
|
for (int subgraph_index = 0, end = model->subgraphs.size(); subgraph_index < end
|
||||||
|
;
|
||||||
++subgraph_index) {
|
++subgraph_index) {
|
||||||
SubGraphT* subgraph = model->subgraphs.at(subgraph_index).get();
|
SubGraphT* subgraph = model->subgraphs.at(subgraph_index).get();
|
||||||
|
|
||||||
absl::flat_hash_map<int32_t, TensorT*> tensor_map;
|
absl::flat_hash_map<int32_t, TensorT*> tensor_map;
|
||||||
for (int i = 0; i < subgraph->operators.size(); ++i) {
|
for (int i = 0, sub_end = subgraph->operators.size(); i < sub_end; ++i) {
|
||||||
OperatorT* op = subgraph->operators[i].get();
|
OperatorT* op = subgraph->operators[i].get();
|
||||||
for (auto tensor_idx : op->inputs) {
|
for (auto tensor_idx : op->inputs) {
|
||||||
// Skip optional tensors.
|
// Skip optional tensors.
|
||||||
|
@ -106,9 +106,9 @@ bool VerifyStringTensorBuffer(const Tensor& tensor, const Buffer& buffer,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
offset += sizeof(int32_t);
|
offset += sizeof(int32_t);
|
||||||
for (int i = 1; i <= num_strings; i++, offset += sizeof(int32_t)) {
|
for (int i = 1, end = num_strings; i <= end; i++, offset += sizeof(int32_t)) {
|
||||||
int string_offset = *GetIntPtr(buffer_ptr + offset);
|
int string_offset = *GetIntPtr(buffer_ptr + offset);
|
||||||
if (string_offset < prev_ptr || string_offset > buffer_size) {
|
if (string_offset < static_cast<int>(prev_ptr) || string_offset > static_cast<int>(buffer_size)) {
|
||||||
ReportError(error_reporter,
|
ReportError(error_reporter,
|
||||||
"String tensor %s buffer is invalid: index %d",
|
"String tensor %s buffer is invalid: index %d",
|
||||||
NameOrEmptyString(tensor.name()), i);
|
NameOrEmptyString(tensor.name()), i);
|
||||||
@ -221,7 +221,7 @@ absl::optional<uint64_t> VerifyAndCountElements(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (num_elements != array_segments_size - 1) {
|
if (static_cast<int>(num_elements) != array_segments_size - 1) {
|
||||||
return absl::nullopt;
|
return absl::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -254,15 +254,16 @@ absl::optional<uint64_t> VerifyAndCountSparseElements(const Tensor& tensor) {
|
|||||||
|
|
||||||
const int total_dims = sparsity->traversal_order()->size();
|
const int total_dims = sparsity->traversal_order()->size();
|
||||||
const int original_rank = tensor.shape()->size();
|
const int original_rank = tensor.shape()->size();
|
||||||
|
const int sparsity_dim_metadata_size = sparsity->dim_metadata()->size();
|
||||||
if (total_dims < original_rank ||
|
if (total_dims < original_rank ||
|
||||||
sparsity->dim_metadata()->size() != total_dims) {
|
sparsity_dim_metadata_size != total_dims) {
|
||||||
return absl::nullopt;
|
return absl::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
const int block_rank = total_dims - original_rank;
|
const int block_rank = total_dims - original_rank;
|
||||||
|
const int sparsity_block_map_size = sparsity->block_map()->size();
|
||||||
if (block_rank > 0 && (sparsity->block_map() == nullptr ||
|
if (block_rank > 0 && (sparsity->block_map() == nullptr ||
|
||||||
sparsity->block_map()->size() != block_rank)) {
|
sparsity_block_map_size != block_rank)) {
|
||||||
return absl::nullopt;
|
return absl::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -446,7 +447,7 @@ bool VerifySubGraphConsistency(const Model& model, const SubGraph& subgraph,
|
|||||||
absl::flat_hash_set<int> subgraph_input_tensors, constant_tensors,
|
absl::flat_hash_set<int> subgraph_input_tensors, constant_tensors,
|
||||||
variable_tensors, output_tensors;
|
variable_tensors, output_tensors;
|
||||||
if (subgraph.tensors()) {
|
if (subgraph.tensors()) {
|
||||||
for (int i = 0; i < subgraph.tensors()->size(); ++i) {
|
for (int i = 0, end = subgraph.tensors()->size(); i < end; ++i) {
|
||||||
const auto* tensor = subgraph.tensors()->Get(i);
|
const auto* tensor = subgraph.tensors()->Get(i);
|
||||||
if (IsConstantTensor(*tensor, model)) {
|
if (IsConstantTensor(*tensor, model)) {
|
||||||
constant_tensors.insert(i);
|
constant_tensors.insert(i);
|
||||||
@ -462,7 +463,7 @@ bool VerifySubGraphConsistency(const Model& model, const SubGraph& subgraph,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (subgraph.operators()) {
|
if (subgraph.operators()) {
|
||||||
for (int op_idx = 0; op_idx < subgraph.operators()->size(); ++op_idx) {
|
for (int op_idx = 0, end = subgraph.operators()->size(); op_idx < end; ++op_idx) {
|
||||||
const auto* op = subgraph.operators()->Get(op_idx);
|
const auto* op = subgraph.operators()->Get(op_idx);
|
||||||
if (!model.operator_codes() ||
|
if (!model.operator_codes() ||
|
||||||
(op->opcode_index() >= model.operator_codes()->size())) {
|
(op->opcode_index() >= model.operator_codes()->size())) {
|
||||||
|
@ -146,8 +146,7 @@ class SessionLogger {
|
|||||||
// Build an index from fetch tensor name to first index in
|
// Build an index from fetch tensor name to first index in
|
||||||
// output_tensor_names.
|
// output_tensor_names.
|
||||||
std::unordered_map<string, int> output_name_to_offset;
|
std::unordered_map<string, int> output_name_to_offset;
|
||||||
for (int i = 0, iter_limit = output_tensor_names.size(); i < iter_limit;
|
for (int i = 0, end = output_tensor_names.size(); i < end; ++i) {
|
||||||
++i) {
|
|
||||||
const string& name = output_tensor_names[i];
|
const string& name = output_tensor_names[i];
|
||||||
if (output_name_to_offset.insert(std::make_pair(name, i)).second) {
|
if (output_name_to_offset.insert(std::make_pair(name, i)).second) {
|
||||||
req->add_fetch(name);
|
req->add_fetch(name);
|
||||||
|
@ -271,7 +271,7 @@ Status CopyTF_TensorStringsToPyArray(const TF_Tensor* src, uint64 nelems,
|
|||||||
std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
|
std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
|
||||||
TF_NewStatus(), TF_DeleteStatus);
|
TF_NewStatus(), TF_DeleteStatus);
|
||||||
auto iter = make_safe(PyArray_IterNew(reinterpret_cast<PyObject*>(dst)));
|
auto iter = make_safe(PyArray_IterNew(reinterpret_cast<PyObject*>(dst)));
|
||||||
for (int64 i = 0; i < nelems; ++i) {
|
for (int64 i = 0; i < static_cast<int64>(nelems); ++i) {
|
||||||
const tstring& tstr_i = tstr[i];
|
const tstring& tstr_i = tstr[i];
|
||||||
auto py_string =
|
auto py_string =
|
||||||
make_safe(PyBytes_FromStringAndSize(tstr_i.data(), tstr_i.size()));
|
make_safe(PyBytes_FromStringAndSize(tstr_i.data(), tstr_i.size()));
|
||||||
|
@ -125,9 +125,9 @@ bool DeviceDescription::rocm_amdgpu_isa_version(int *version) const {
|
|||||||
|
|
||||||
bool ThreadDimOk(const DeviceDescription &device_description,
|
bool ThreadDimOk(const DeviceDescription &device_description,
|
||||||
const ThreadDim &thread_dim) {
|
const ThreadDim &thread_dim) {
|
||||||
auto total_threads = thread_dim.x * thread_dim.y * thread_dim.z;
|
const int64 total_threads = thread_dim.x * thread_dim.y * thread_dim.z;
|
||||||
auto threads_per_block_limit = device_description.threads_per_block_limit();
|
const int64 threads_per_block_limit = device_description.threads_per_block_limit();
|
||||||
if (total_threads > static_cast<uint64>(threads_per_block_limit)) {
|
if (total_threads > threads_per_block_limit) {
|
||||||
VLOG(2) << "exceeded total-thread-per-block limit: " << total_threads
|
VLOG(2) << "exceeded total-thread-per-block limit: " << total_threads
|
||||||
<< " vs limit " << threads_per_block_limit;
|
<< " vs limit " << threads_per_block_limit;
|
||||||
return false;
|
return false;
|
||||||
|
@ -1886,7 +1886,7 @@ Stream *Stream::GetOrCreateSubStream() {
|
|||||||
|
|
||||||
// Look for the first reusable sub_stream that is ok, dropping !ok sub_streams
|
// Look for the first reusable sub_stream that is ok, dropping !ok sub_streams
|
||||||
// we encounter along the way.
|
// we encounter along the way.
|
||||||
for (int64 index = 0; index < sub_streams_.size();) {
|
for (size_t index = 0; index < sub_streams_.size();) {
|
||||||
std::pair<std::unique_ptr<Stream>, bool> &pair = sub_streams_[index];
|
std::pair<std::unique_ptr<Stream>, bool> &pair = sub_streams_[index];
|
||||||
if (pair.second) {
|
if (pair.second) {
|
||||||
// The sub_stream is reusable.
|
// The sub_stream is reusable.
|
||||||
@ -1937,7 +1937,7 @@ void Stream::ReturnSubStream(Stream *sub_stream) {
|
|||||||
absl::MutexLock lock(&mu_);
|
absl::MutexLock lock(&mu_);
|
||||||
|
|
||||||
// Look for the sub-stream.
|
// Look for the sub-stream.
|
||||||
for (int64 index = 0; index < sub_streams_.size(); ++index) {
|
for (int64 index = 0, end = sub_streams_.size(); index < end; ++index) {
|
||||||
std::pair<std::unique_ptr<Stream>, bool> &pair = sub_streams_[index];
|
std::pair<std::unique_ptr<Stream>, bool> &pair = sub_streams_[index];
|
||||||
if (pair.first.get() != sub_stream) {
|
if (pair.first.get() != sub_stream) {
|
||||||
continue;
|
continue;
|
||||||
|
@ -478,7 +478,7 @@ port::Status StreamExecutor::GetStatus(Stream *stream) {
|
|||||||
|
|
||||||
DeviceMemoryBase StreamExecutor::Allocate(uint64 size, int64 memory_space) {
|
DeviceMemoryBase StreamExecutor::Allocate(uint64 size, int64 memory_space) {
|
||||||
if (memory_limit_bytes_ > 0 &&
|
if (memory_limit_bytes_ > 0 &&
|
||||||
mem_alloc_bytes_ + size > memory_limit_bytes_) {
|
static_cast<int64>(mem_alloc_bytes_ + size) > memory_limit_bytes_) {
|
||||||
LOG(WARNING) << "Not enough memory to allocate " << size << " on device "
|
LOG(WARNING) << "Not enough memory to allocate " << size << " on device "
|
||||||
<< device_ordinal_
|
<< device_ordinal_
|
||||||
<< " within provided limit. [used=" << mem_alloc_bytes_
|
<< " within provided limit. [used=" << mem_alloc_bytes_
|
||||||
|
Loading…
Reference in New Issue
Block a user