Fix bug on the gradients graph computation caused by an Assign node - C++ API (#12397)

* Fix bug on the gradients graph computation caused by an Assign node.
* Adding a reachable_nodes array to the gradient computation graph.
* Adding test cases when the Var has unreachable nodes.
This commit is contained in:
Courtial Florian 2017-09-05 18:22:22 +02:00 committed by Martin Wicke
parent 3f43cabbb1
commit c8981445c5
4 changed files with 127 additions and 2 deletions

View File

@ -78,6 +78,10 @@ class SymbolicGradientBuilder {
const std::vector<Output>& grad_inputs,
std::vector<Output>* grad_outputs);
// Returns a list mapping whether each node in the graph is reachable
// from outputs_. Keyed by node id.
std::vector<bool> GetReachableNodes();
const Scope& scope_;
const ops::GradOpRegistry* registry_;
const std::vector<Output>& outputs_;
@ -143,11 +147,36 @@ Status SymbolicGradientBuilder::BackpropAlongEdge(const Output& dst_grad,
return Status::OK();
}
std::vector<bool> SymbolicGradientBuilder::GetReachableNodes() {
std::vector<bool> reachable_nodes(scope_.graph()->num_node_ids(), false);
std::deque<Node*> queue;
for (const Output& out : outputs_) {
if (!reachable_nodes[out.node()->id()]) {
queue.push_back(out.node());
reachable_nodes[out.node()->id()] = true;
}
}
while (!queue.empty()) {
Node* n = queue.front();
queue.pop_front();
for (const Edge* e : n->in_edges()) {
if (e->IsControlEdge()) continue;
queue.push_back(e->src());
reachable_nodes[e->src()->id()] = true;
}
}
return reachable_nodes;
}
Status SymbolicGradientBuilder::Initialize() {
if (outputs_.size() != grad_inputs_.size()) {
return errors::InvalidArgument(
"Must specify a gradient input for each output.");
}
std::vector<bool> reachable_nodes = GetReachableNodes();
// TODO(theflofly) Check that inputs_ are reachable from
// outputs_ using reachable_nodes
grad_outputs_->clear();
grad_outputs_->resize(inputs_.size());
// Populate `output_nodes_` from node ids in `outputs_`.
@ -188,12 +217,15 @@ Status SymbolicGradientBuilder::Initialize() {
if (output_nodes_.find(n->id()) == output_nodes_.end()) {
// Internal node: continue BFS along connected outputs.
for (const Edge* e : n->out_edges()) {
if (e->IsControlEdge()) continue;
++num_expected_backprops;
// If a node is not reachable from outputs_,
// we don't expect it to receive a backpropagated gradient.
// It will not be counted in num_expected_backprops.
if (e->IsControlEdge() || !reachable_nodes[e->dst()->id()]) continue;
if (visited.find(e->dst()) == visited.end()) {
queue.push_back(e->dst());
visited.insert(e->dst());
}
++num_expected_backprops;
}
} else {
// Output node: stop BFS and update `num_expected_backprops` for

View File

@ -364,6 +364,73 @@ TEST_F(GradientsTest, MultipleNodeOutputGrads) {
test::AsTensor<int>({60, 61, 62, 63, 66, 66, 66, 67}, {4, 2}));
}
TEST_F(GradientsTest, UnreachableEdgeGradOneOutput) {
auto x = Variable(scope_test_, {2, 3}, DT_DOUBLE);
auto x_const = Const(scope_test_, {{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}});
auto x_assign = Assign(scope_test_, x, x_const);
auto y = Variable(scope_test_, {3, 1}, DT_DOUBLE);
auto y_const = Const(scope_test_, {{1.0}, {2.0}, {3.0}});
auto y_assign = Assign(scope_test_, y, y_const);
auto m1 = MatMul(scope_test_, x, y);
auto z = Variable(scope_test_, {1, 3}, DT_DOUBLE);
auto z_const = Const(scope_test_, {{9.0, 10.0, 11.0}});
auto z_assign = Assign(scope_test_, z, z_const);
auto m2 = MatMul(scope_test_, y, z);
auto dm1 = Const(scope_test_, {{0.5}, {0.5}});
std::vector<Output> grad_outputs;
TF_ASSERT_OK(
AddSymbolicGradients(scope_test_, {m1}, {y}, {dm1}, &grad_outputs));
std::vector<Tensor> outputs;
test::GetTensors(scope_test_, {x_assign, y_assign, z_assign},
{grad_outputs[0]}, &outputs);
// dz/dy = xT * dm1
test::ExpectTensorNear<double>(
outputs[0], test::AsTensor<double>({2.5, 3.5, 4.5}, {3, 1}), 1e-5);
}
TEST_F(GradientsTest, UnreachableEdgeGradTwoOutputs) {
auto x = Variable(scope_test_, {2, 3}, DT_DOUBLE);
auto x_const = Const(scope_test_, {{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}});
auto x_assign = Assign(scope_test_, x, x_const);
auto y = Variable(scope_test_, {3, 1}, DT_DOUBLE);
auto y_const = Const(scope_test_, {{1.0}, {2.0}, {3.0}});
auto y_assign = Assign(scope_test_, y, y_const);
auto m1 = MatMul(scope_test_, x, y);
auto z = Variable(scope_test_, {1, 3}, DT_DOUBLE);
auto z_const = Const(scope_test_, {{9.0, 10.0, 11.0}});
auto z_assign = Assign(scope_test_, z, z_const);
auto m2 = MatMul(scope_test_, y, z);
auto dm1 = Const(scope_test_, {{0.5}, {0.5}});
auto dm2 =
Const(scope_test_, {{0.5, 0.5, 0.5}, {0.6, 0.7, 0.8}, {0.6, 0.7, 0.9}});
std::vector<Output> grad_outputs;
TF_ASSERT_OK(AddSymbolicGradients(scope_test_, {m1, m2}, {y}, {dm1, dm2},
&grad_outputs));
std::vector<Tensor> outputs;
test::GetTensors(scope_test_, {x_assign, y_assign, z_assign},
{grad_outputs[0]}, &outputs);
// the gradients from m1 and m2 will be summed to compute the gradient
// w.r.t y
// dz/dy = xT * dm1 + dm2 * zT
test::ExpectTensorNear<double>(
outputs[0], test::AsTensor<double>({17.5, 24.7, 26.8}, {3, 1}), 1e-5);
}
// StopGradientSingleOutputMultiEdgeTest tests combinations of valid and
// 'NoGradient' (induced by StopGradient op) returned along multiple edges from
// a single nodes output.

View File

@ -36,5 +36,19 @@ void GetTensor(const Scope& scope, Output tensor, Tensor* out) {
*out = outputs[0];
}
void GetTensors(const Scope& scope, const std::vector<Output>& assign_vars,
OutputList tensors, std::vector<Tensor>* out) {
ClientSession session(scope);
TF_CHECK_OK(session.Run(assign_vars, nullptr));
TF_CHECK_OK(session.Run(tensors, out));
}
void GetTensor(const Scope& scope, const std::vector<Output>& assign_vars,
Output tensor, Tensor* out) {
std::vector<Tensor> outputs;
GetTensors(scope, assign_vars, {std::move(tensor)}, &outputs);
*out = outputs[0];
}
} // end namespace test
} // end namespace tensorflow

View File

@ -26,9 +26,21 @@ namespace test {
void GetTensors(const Scope& scope, OutputList tensors,
std::vector<Tensor>* out);
// Computes the outputs listed in 'tensors', returns the tensors in 'out'.
// assign_vars are extra outputs that should be run
// e.g. to assign values to variables.
void GetTensors(const Scope& scope, const std::vector<Output>& assign_vars,
OutputList tensors, std::vector<Tensor>* out);
/// Computes the output 'tensor', returning the resulting tensor in 'out'.
void GetTensor(const Scope& scope, Output tensor, Tensor* out);
// Computes the output 'tensor', returning the resulting tensor in 'out'.
// assign_vars are extra outputs that should be run
// e.g. to assign values to variables.
void GetTensor(const Scope& scope, const std::vector<Output>& assign_vars,
Output tensor, Tensor* out);
} // namespace test
} // namespace tensorflow