Add shape annotation statistics.
PiperOrigin-RevId: 250780619
This commit is contained in:
parent
fc399d6cd7
commit
e8489233f5
@ -569,6 +569,9 @@ class SymbolicShapeRefiner {
|
|||||||
std::vector<const TensorProto*> input_tensor_protos;
|
std::vector<const TensorProto*> input_tensor_protos;
|
||||||
std::vector<const TensorProto*> output_tensor_protos;
|
std::vector<const TensorProto*> output_tensor_protos;
|
||||||
std::vector<ShapeHandle> output_tensors_as_shapes;
|
std::vector<ShapeHandle> output_tensors_as_shapes;
|
||||||
|
|
||||||
|
// Output shapes incompatible between annotation and shape inference.
|
||||||
|
bool shape_incompatible = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
NodeContext* GetNodeContext(const NodeDef* node) {
|
NodeContext* GetNodeContext(const NodeDef* node) {
|
||||||
@ -1036,6 +1039,28 @@ class SymbolicShapeRefiner {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool SameShapes(ShapeHandle inferred_shape,
|
||||||
|
ShapeHandle annotated_shape) const {
|
||||||
|
if (inferred_shape.SameHandle(annotated_shape)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (InferenceContext::Rank(inferred_shape) !=
|
||||||
|
InferenceContext::Rank(annotated_shape)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const int rank = InferenceContext::Rank(inferred_shape);
|
||||||
|
for (int i = 0; i < rank; ++i) {
|
||||||
|
int64 val1 = InferenceContext::Value(
|
||||||
|
InferenceContext::DimKnownRank(inferred_shape, i));
|
||||||
|
int64 val2 = InferenceContext::Value(
|
||||||
|
InferenceContext::DimKnownRank(annotated_shape, i));
|
||||||
|
if (val1 != val2) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool EquivalentShapesAndTypes(const std::vector<ShapeAndType>& st1,
|
bool EquivalentShapesAndTypes(const std::vector<ShapeAndType>& st1,
|
||||||
const std::vector<ShapeAndType>& st2) const {
|
const std::vector<ShapeAndType>& st2) const {
|
||||||
if (st1.size() != st2.size()) {
|
if (st1.size() != st2.size()) {
|
||||||
@ -1380,9 +1405,26 @@ class SymbolicShapeRefiner {
|
|||||||
|
|
||||||
const TensorShapeProto& shape =
|
const TensorShapeProto& shape =
|
||||||
attr.at(kOutputShapes).list().shape(shape_index);
|
attr.at(kOutputShapes).list().shape(shape_index);
|
||||||
|
if (shape.dim().empty()) continue;
|
||||||
|
|
||||||
ShapeHandle output_shape;
|
ShapeHandle output_shape;
|
||||||
TF_RETURN_IF_ERROR(ic->MakeShapeFromShapeProto(shape, &output_shape));
|
TF_RETURN_IF_ERROR(ic->MakeShapeFromShapeProto(shape, &output_shape));
|
||||||
|
|
||||||
|
// Check if annotated shapes are incompatible with inferred shapes.
|
||||||
|
if ((ic->FullyDefined(ic->output(i)) &&
|
||||||
|
!SameShapes(ic->output(i), output_shape)) ||
|
||||||
|
(!ic->FullyDefined(ic->output(i)) &&
|
||||||
|
!CompatibleShapes(ic->output(i), output_shape))) {
|
||||||
|
LOG(WARNING)
|
||||||
|
<< "UpdateOutputShapesUsingAnnotatedInformation() -- node: "
|
||||||
|
<< node.name() << ", inferred output shape "
|
||||||
|
<< "doesn't match for i=" << i << ": "
|
||||||
|
<< "ic->output(k): " << ic->DebugString(ic->output(i))
|
||||||
|
<< ", annotated output shape: " << ic->DebugString(output_shape)
|
||||||
|
<< " -- " << node.DebugString();
|
||||||
|
c->shape_incompatible = true;
|
||||||
|
}
|
||||||
|
|
||||||
// Only use annotated shapes if the inference shape is unknown and
|
// Only use annotated shapes if the inference shape is unknown and
|
||||||
// compatible with annotated shapes.
|
// compatible with annotated shapes.
|
||||||
if (!ic->FullyDefined(ic->output(i)) &&
|
if (!ic->FullyDefined(ic->output(i)) &&
|
||||||
@ -2282,8 +2324,15 @@ Status GraphProperties::InferStatically(bool assume_valid_feeds,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (aggressive_shape_inference && ctx->shape_incompatible)
|
||||||
|
incompatible_shape_nodes_.insert(node.name());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (aggressive_shape_inference && !incompatible_shape_nodes_.empty())
|
||||||
|
LOG(WARNING) << incompatible_shape_nodes_.size()
|
||||||
|
<< " nodes have incompatible output shapes.";
|
||||||
|
|
||||||
// Help trace the unknown dimensions to their origins.
|
// Help trace the unknown dimensions to their origins.
|
||||||
VerboseLogUnknownDimensionSources(item_.graph, input_properties_,
|
VerboseLogUnknownDimensionSources(item_.graph, input_properties_,
|
||||||
output_properties_);
|
output_properties_);
|
||||||
|
@ -17,7 +17,9 @@ limitations under the License.
|
|||||||
#define TENSORFLOW_CORE_GRAPPLER_COSTS_GRAPH_PROPERTIES_H_
|
#define TENSORFLOW_CORE_GRAPPLER_COSTS_GRAPH_PROPERTIES_H_
|
||||||
|
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
#include <unordered_set>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "tensorflow/core/framework/shape_inference.h"
|
#include "tensorflow/core/framework/shape_inference.h"
|
||||||
#include "tensorflow/core/grappler/clusters/cluster.h"
|
#include "tensorflow/core/grappler/clusters/cluster.h"
|
||||||
#include "tensorflow/core/grappler/costs/op_performance_data.pb.h"
|
#include "tensorflow/core/grappler/costs/op_performance_data.pb.h"
|
||||||
@ -129,7 +131,12 @@ class GraphProperties {
|
|||||||
void ClearOutputProperties(const string& node_name);
|
void ClearOutputProperties(const string& node_name);
|
||||||
// Returns true if we have *any* properties.
|
// Returns true if we have *any* properties.
|
||||||
bool has_properties() const {
|
bool has_properties() const {
|
||||||
return input_properties_.size() > 0 || output_properties_.size() > 0;
|
return !input_properties_.empty() || !output_properties_.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CheckShapeIncompatible(const string& node_name) const {
|
||||||
|
return incompatible_shape_nodes_.find(node_name) !=
|
||||||
|
incompatible_shape_nodes_.end();
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -181,6 +188,10 @@ class GraphProperties {
|
|||||||
std::unordered_map<string, std::vector<OpInfo::TensorProperties>>
|
std::unordered_map<string, std::vector<OpInfo::TensorProperties>>
|
||||||
output_properties_;
|
output_properties_;
|
||||||
const std::vector<OpInfo::TensorProperties> missing_properties_;
|
const std::vector<OpInfo::TensorProperties> missing_properties_;
|
||||||
|
|
||||||
|
// Nodes with output shape incompatible between shape inference and
|
||||||
|
// annotation.
|
||||||
|
std::unordered_set<string> incompatible_shape_nodes_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // end namespace grappler
|
} // end namespace grappler
|
||||||
|
@ -89,6 +89,25 @@ struct RecvNodeDescriptorEqual {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void UpdateDeviceAnnotationState(const NodeDef* node,
|
||||||
|
const NodeState& node_state,
|
||||||
|
DeviceState* device) {
|
||||||
|
bool annotated = node->attr().count(kExecutionCount) > 0;
|
||||||
|
int64 execution_count = annotated ? node->attr().at(kExecutionCount).i() : 1;
|
||||||
|
|
||||||
|
if (annotated) {
|
||||||
|
auto& shape_annotation_stats = device->shape_annotation_stats;
|
||||||
|
shape_annotation_stats.num_ops_annotated += 1;
|
||||||
|
shape_annotation_stats.num_ops_executed += execution_count;
|
||||||
|
shape_annotation_stats.num_ops_executed_more_than_once +=
|
||||||
|
execution_count > 1 ? 1 : 0;
|
||||||
|
shape_annotation_stats.num_ops_with_incompatible_shapes +=
|
||||||
|
node_state.shape_incompatible ? 1 : 0;
|
||||||
|
shape_annotation_stats.num_ops_with_dynamic_shapes +=
|
||||||
|
(execution_count > 1 && node->attr().count(kOutputSame) == 0) ? 1 : 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
const NodeDef* LIFOManager::GetCurrNode() {
|
const NodeDef* LIFOManager::GetCurrNode() {
|
||||||
@ -714,6 +733,8 @@ NodeState& VirtualScheduler::GetNodeStateOrCreateIt(const NodeDef* node) {
|
|||||||
graph_properties_->GetInputProperties(node->name());
|
graph_properties_->GetInputProperties(node->name());
|
||||||
node_state.output_properties =
|
node_state.output_properties =
|
||||||
graph_properties_->GetOutputProperties(node->name());
|
graph_properties_->GetOutputProperties(node->name());
|
||||||
|
node_state.shape_incompatible =
|
||||||
|
graph_properties_->CheckShapeIncompatible(node->name());
|
||||||
|
|
||||||
// Some ops may need further processing to the input / output properties:
|
// Some ops may need further processing to the input / output properties:
|
||||||
// _Send and _Recv.
|
// _Send and _Recv.
|
||||||
@ -791,6 +812,7 @@ bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) {
|
|||||||
node_state.execution_count = node->attr().count(kExecutionCount) == 0
|
node_state.execution_count = node->attr().count(kExecutionCount) == 0
|
||||||
? 1
|
? 1
|
||||||
: node->attr().at(kExecutionCount).i();
|
: node->attr().at(kExecutionCount).i();
|
||||||
|
|
||||||
Costs total_node_costs =
|
Costs total_node_costs =
|
||||||
MultiplyCosts(node_costs, node_state.execution_count);
|
MultiplyCosts(node_costs, node_state.execution_count);
|
||||||
graph_costs_ = CombineCosts(graph_costs_, total_node_costs);
|
graph_costs_ = CombineCosts(graph_costs_, total_node_costs);
|
||||||
@ -824,6 +846,9 @@ bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) {
|
|||||||
auto curr_time = device.GetCurrTime();
|
auto curr_time = device.GetCurrTime();
|
||||||
node_state.time_finished = curr_time;
|
node_state.time_finished = curr_time;
|
||||||
|
|
||||||
|
// Update shape annotation states.
|
||||||
|
UpdateDeviceAnnotationState(node, node_state, &device);
|
||||||
|
|
||||||
// Update device memory usage.
|
// Update device memory usage.
|
||||||
if (!IsPersistent(*node)) {
|
if (!IsPersistent(*node)) {
|
||||||
for (const auto& port_num_output_pair : node_state.outputs) {
|
for (const auto& port_num_output_pair : node_state.outputs) {
|
||||||
@ -973,6 +998,21 @@ Costs VirtualScheduler::Summary() const {
|
|||||||
<< state.device_costs.num_ops_with_unknown_shapes
|
<< state.device_costs.num_ops_with_unknown_shapes
|
||||||
<< " having unknown shapes";
|
<< " having unknown shapes";
|
||||||
|
|
||||||
|
// Device shape annotation statistics.
|
||||||
|
const auto& device_annotation_stats = state.shape_annotation_stats;
|
||||||
|
if (device_annotation_stats.num_ops_annotated > 0) {
|
||||||
|
VLOG(1) << device_annotation_stats.num_ops_annotated
|
||||||
|
<< " ops with shape annotation, with "
|
||||||
|
<< device_annotation_stats.num_ops_executed_more_than_once
|
||||||
|
<< " executed more than once, "
|
||||||
|
<< device_annotation_stats.num_ops_with_dynamic_shapes
|
||||||
|
<< " with dynamic shapes, "
|
||||||
|
<< device_annotation_stats.num_ops_with_incompatible_shapes
|
||||||
|
<< " with incompatible shapes, "
|
||||||
|
<< device_annotation_stats.num_ops_executed
|
||||||
|
<< " ops executed in total.";
|
||||||
|
}
|
||||||
|
|
||||||
VLOG(1) << "Per-op execution time / compute time / memory time "
|
VLOG(1) << "Per-op execution time / compute time / memory time "
|
||||||
<< " / intermediate memory time"
|
<< " / intermediate memory time"
|
||||||
<< " (and memory usage at peak memory usage):";
|
<< " (and memory usage at peak memory usage):";
|
||||||
|
@ -73,12 +73,16 @@ struct NodeState {
|
|||||||
// How many times this node has been executed, e.g. in a while loop.
|
// How many times this node has been executed, e.g. in a while loop.
|
||||||
int execution_count;
|
int execution_count;
|
||||||
|
|
||||||
|
// Output shape incompatible between shape annotation and shape inference.
|
||||||
|
bool shape_incompatible;
|
||||||
|
|
||||||
NodeState() {
|
NodeState() {
|
||||||
num_inputs_ready = 0;
|
num_inputs_ready = 0;
|
||||||
time_ready = Costs::Duration::max();
|
time_ready = Costs::Duration::max();
|
||||||
time_scheduled = Costs::Duration::max();
|
time_scheduled = Costs::Duration::max();
|
||||||
time_finished = Costs::Duration::max();
|
time_finished = Costs::Duration::max();
|
||||||
execution_count = 0;
|
execution_count = 0;
|
||||||
|
shape_incompatible = false;
|
||||||
// Note that num_outputs_executed and time_no_references are not initialized
|
// Note that num_outputs_executed and time_no_references are not initialized
|
||||||
// here, since we don't know the size (i.e., # outputs for this node).
|
// here, since we don't know the size (i.e., # outputs for this node).
|
||||||
}
|
}
|
||||||
@ -116,6 +120,21 @@ struct DeviceState {
|
|||||||
int64 memory_usage; // Current temporary memory usage
|
int64 memory_usage; // Current temporary memory usage
|
||||||
int64 max_memory_usage; // Max temporary memory usage
|
int64 max_memory_usage; // Max temporary memory usage
|
||||||
|
|
||||||
|
// Shape annotation statistics.
|
||||||
|
struct ShapeAnnotationStats {
|
||||||
|
// Number of ops with shape annotated.
|
||||||
|
int64 num_ops_annotated = 0;
|
||||||
|
// Number of ops executed multiple times (e.g. in a loop).
|
||||||
|
int64 num_ops_executed_more_than_once = 0;
|
||||||
|
// Number of ops executed: account for execution count.
|
||||||
|
int64 num_ops_executed = 0;
|
||||||
|
// Number of ops with dynamic shapes (e.g. shape changes in a loop).
|
||||||
|
int64 num_ops_with_dynamic_shapes = 0;
|
||||||
|
// Number of ops with incompatible shapes between annotation and shape
|
||||||
|
// inference.
|
||||||
|
int64 num_ops_with_incompatible_shapes = 0;
|
||||||
|
} shape_annotation_stats;
|
||||||
|
|
||||||
DeviceState() {
|
DeviceState() {
|
||||||
device_costs = Costs::ZeroCosts();
|
device_costs = Costs::ZeroCosts();
|
||||||
device_costs.num_ops_total = 0;
|
device_costs.num_ops_total = 0;
|
||||||
|
Loading…
Reference in New Issue
Block a user