diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc index 0be1d5df616..8f3e24242eb 100644 --- a/tensorflow/core/common_runtime/executor.cc +++ b/tensorflow/core/common_runtime/executor.cc @@ -714,10 +714,13 @@ Status ExecutorImpl::Initialize(const Graph& graph) { used_outputs[e->src_output()] = true; } } + int i = 0; for (bool used_output : used_outputs) { if (!used_output) { metrics::RecordUnusedOutput(n->type_string()); + item->kernel->set_output_required(i, false); } + ++i; } } @@ -2093,9 +2096,9 @@ Status ExecutorState::ProcessOutputs(const NodeItem& item, OpKernelContext* ctx, for (int i = 0; i < item.num_outputs; ++i) { const TensorValue val = ctx->release_output(i); if (val.tensor == nullptr) { - // Unless it's a Switch or a Recv, the node must produce a - // tensor value at i-th output. - if (!item.is_recv_or_switch) { + // Unless it's a Switch or a Recv, or the executor has marked the output + // as not required, the node must produce a tensor value at i-th output. + if (!(item.is_recv_or_switch || !item.kernel->output_required(i))) { s.Update(errors::Internal("Missing ", i, "-th output from ", FormatNodeDefForError(item.kernel->def()))); } diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc index 38c56eb3b1c..e6fbdd1c113 100644 --- a/tensorflow/core/framework/op_kernel.cc +++ b/tensorflow/core/framework/op_kernel.cc @@ -105,7 +105,8 @@ OpKernel::OpKernel(OpKernelConstruction* context, bool is_deferred) type_string_view_(props_->node_def.op()), graph_def_version_(context->graph_def_version()), is_deferred_(is_deferred), - cost_estimate_(OpKernel::kInitialCostEstimateCycles) { + cost_estimate_(OpKernel::kInitialCostEstimateCycles), + outputs_required_(context->num_outputs(), true) { OP_REQUIRES_OK(context, NameRangesForNode(props_->node_def, *props_->op_def, &input_name_map_, &output_name_map_)); @@ -133,7 +134,8 @@ OpKernel::OpKernel(OpKernelConstruction* context, NodeDef&& custom_def, type_string_view_(props_->node_def.op()), graph_def_version_(context->graph_def_version()), is_deferred_(is_deferred), - cost_estimate_(OpKernel::kInitialCostEstimateCycles) { + cost_estimate_(OpKernel::kInitialCostEstimateCycles), + outputs_required_(context->num_outputs(), true) { OP_REQUIRES_OK(context, NameRangesForNode(props_->node_def, *props_->op_def, &input_name_map_, &output_name_map_)); diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h index 9e22321b42c..8f339e878b9 100644 --- a/tensorflow/core/framework/op_kernel.h +++ b/tensorflow/core/framework/op_kernel.h @@ -156,6 +156,18 @@ class OpKernel { // Returns a pointer to the tensor stored inside constant ops. virtual const Tensor* const_tensor() const { return nullptr; } + // Returns true if this kernel must produce its ith output. + // REQUIRES: 0 <= i < num_inputs(). + bool output_required(int i) const { return outputs_required_[i]; } + + // Hints whether or not the ith output must be produced when running the + // kernel. By default, all outputs are required. The kernel implementation + // may ignore the hint. + // REQUIRES: 0 <= i < num_inputs(). + void set_output_required(int i, bool is_required) { + outputs_required_[i] = is_required; + } + // Updates the dynamic cost estimate, which is used to determine whether this // op is expensive. The new cost estimate is a weighted average of the old // cost estimate and the latest cost. @@ -223,6 +235,7 @@ class OpKernel { const bool is_deferred_; bool expensive_; std::atomic_uint_fast64_t cost_estimate_; + std::vector<bool> outputs_required_; TF_DISALLOW_COPY_AND_ASSIGN(OpKernel); }; @@ -941,10 +954,8 @@ class OpKernelContext { // should call allocate_output(index, ...), set_output(index, ...), // set_output_ref(index, ...), or set the status to a non-ok value. // If it returns false, it may output, but is not required to do so. - // TODO(mrry): Convert this to return Status, and implement a string - // name version. bool output_required(int index) const { - return true; // TODO(josh11b): implement + return op_kernel().output_required(index); } // Allocation of tensors during kernel execution inside the Compute diff --git a/tensorflow/core/kernels/sparse_fill_empty_rows_op.cc b/tensorflow/core/kernels/sparse_fill_empty_rows_op.cc index a97c5cb47a2..8de93cf9b30 100644 --- a/tensorflow/core/kernels/sparse_fill_empty_rows_op.cc +++ b/tensorflow/core/kernels/sparse_fill_empty_rows_op.cc @@ -78,16 +78,23 @@ class SparseFillEmptyRowsOp : public OpKernel { const int64 N = indices_t.shape().dim_size(0); const int64 dense_rows = dense_shape(0); - Tensor* empty_row_indicator_t; - OP_REQUIRES_OK(context, context->allocate_output(kEmptyRowIndicatorOutput, - TensorShape({dense_rows}), - &empty_row_indicator_t)); - auto empty_row_indicator = empty_row_indicator_t->vec<bool>(); - Tensor* reverse_index_map_t; - OP_REQUIRES_OK(context, context->allocate_output(kReverseIndexMapOutput, - TensorShape({N}), - &reverse_index_map_t)); - auto reverse_index_map = reverse_index_map_t->vec<int64>(); + bool* empty_row_indicator = nullptr; + if (context->output_required(kEmptyRowIndicatorOutput)) { + Tensor* empty_row_indicator_t = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output(kEmptyRowIndicatorOutput, + TensorShape({dense_rows}), + &empty_row_indicator_t)); + empty_row_indicator = empty_row_indicator_t->vec<bool>().data(); + } + int64* reverse_index_map = nullptr; + if (context->output_required(kReverseIndexMapOutput)) { + Tensor* reverse_index_map_t = nullptr; + OP_REQUIRES_OK(context, context->allocate_output(kReverseIndexMapOutput, + TensorShape({N}), + &reverse_index_map_t)); + reverse_index_map = reverse_index_map_t->vec<int64>().data(); + } int rank = indices_t.shape().dim_size(1); @@ -122,8 +129,11 @@ class SparseFillEmptyRowsOp : public OpKernel { bool all_rows_full = true; for (int row = 0; row < dense_rows; ++row) { // csr_offset here describes the number of elements in this dense row - empty_row_indicator(row) = (csr_offset[row] == 0); - all_rows_full = all_rows_full & !empty_row_indicator(row); + bool row_empty = (csr_offset[row] == 0); + if (empty_row_indicator) { + empty_row_indicator[row] = row_empty; + } + all_rows_full = all_rows_full & !row_empty; // In filled version, each row has at least one element. csr_offset[row] = std::max(csr_offset[row], int64{1}); // Update csr_offset to represent the number of elements up to and @@ -140,8 +150,10 @@ class SparseFillEmptyRowsOp : public OpKernel { if (all_rows_full) { context->set_output(kOutputIndicesOutput, indices_t); context->set_output(kOutputValuesOutput, values_t); - for (int64 i = 0; i < N; ++i) { - reverse_index_map(i) = i; + if (reverse_index_map) { + for (int64 i = 0; i < N; ++i) { + reverse_index_map[i] = i; + } } } else { Tensor* output_indices_t; @@ -169,7 +181,9 @@ class SparseFillEmptyRowsOp : public OpKernel { std::copy_n(&indices(i, 0), rank, &output_indices(output_i, 0)); output_values(output_i) = values(i); // We'll need this reverse index map to backprop correctly. - reverse_index_map(i) = output_i; + if (reverse_index_map) { + reverse_index_map[i] = output_i; + } } // Fill in values for rows that are missing