Fix bug on the gradients graph computation caused by an Assign node - C++ API (#12397)
* Fix bug on the gradients graph computation caused by an Assign node. * Adding a reachable_nodes array to the gradient computation graph. * Adding test cases when the Var has unreachable nodes.
This commit is contained in:
parent
3f43cabbb1
commit
c8981445c5
@ -77,6 +77,10 @@ class SymbolicGradientBuilder {
|
|||||||
Status CallGradFunction(const Operation& op,
|
Status CallGradFunction(const Operation& op,
|
||||||
const std::vector<Output>& grad_inputs,
|
const std::vector<Output>& grad_inputs,
|
||||||
std::vector<Output>* grad_outputs);
|
std::vector<Output>* grad_outputs);
|
||||||
|
|
||||||
|
// Returns a list mapping whether each node in the graph is reachable
|
||||||
|
// from outputs_. Keyed by node id.
|
||||||
|
std::vector<bool> GetReachableNodes();
|
||||||
|
|
||||||
const Scope& scope_;
|
const Scope& scope_;
|
||||||
const ops::GradOpRegistry* registry_;
|
const ops::GradOpRegistry* registry_;
|
||||||
@ -143,11 +147,36 @@ Status SymbolicGradientBuilder::BackpropAlongEdge(const Output& dst_grad,
|
|||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<bool> SymbolicGradientBuilder::GetReachableNodes() {
|
||||||
|
std::vector<bool> reachable_nodes(scope_.graph()->num_node_ids(), false);
|
||||||
|
std::deque<Node*> queue;
|
||||||
|
for (const Output& out : outputs_) {
|
||||||
|
if (!reachable_nodes[out.node()->id()]) {
|
||||||
|
queue.push_back(out.node());
|
||||||
|
reachable_nodes[out.node()->id()] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while (!queue.empty()) {
|
||||||
|
Node* n = queue.front();
|
||||||
|
queue.pop_front();
|
||||||
|
for (const Edge* e : n->in_edges()) {
|
||||||
|
if (e->IsControlEdge()) continue;
|
||||||
|
queue.push_back(e->src());
|
||||||
|
reachable_nodes[e->src()->id()] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return reachable_nodes;
|
||||||
|
}
|
||||||
|
|
||||||
Status SymbolicGradientBuilder::Initialize() {
|
Status SymbolicGradientBuilder::Initialize() {
|
||||||
if (outputs_.size() != grad_inputs_.size()) {
|
if (outputs_.size() != grad_inputs_.size()) {
|
||||||
return errors::InvalidArgument(
|
return errors::InvalidArgument(
|
||||||
"Must specify a gradient input for each output.");
|
"Must specify a gradient input for each output.");
|
||||||
}
|
}
|
||||||
|
std::vector<bool> reachable_nodes = GetReachableNodes();
|
||||||
|
// TODO(theflofly) Check that inputs_ are reachable from
|
||||||
|
// outputs_ using reachable_nodes
|
||||||
grad_outputs_->clear();
|
grad_outputs_->clear();
|
||||||
grad_outputs_->resize(inputs_.size());
|
grad_outputs_->resize(inputs_.size());
|
||||||
// Populate `output_nodes_` from node ids in `outputs_`.
|
// Populate `output_nodes_` from node ids in `outputs_`.
|
||||||
@ -188,12 +217,15 @@ Status SymbolicGradientBuilder::Initialize() {
|
|||||||
if (output_nodes_.find(n->id()) == output_nodes_.end()) {
|
if (output_nodes_.find(n->id()) == output_nodes_.end()) {
|
||||||
// Internal node: continue BFS along connected outputs.
|
// Internal node: continue BFS along connected outputs.
|
||||||
for (const Edge* e : n->out_edges()) {
|
for (const Edge* e : n->out_edges()) {
|
||||||
if (e->IsControlEdge()) continue;
|
// If a node is not reachable from outputs_,
|
||||||
++num_expected_backprops;
|
// we don't expect it to receive a backpropagated gradient.
|
||||||
|
// It will not be counted in num_expected_backprops.
|
||||||
|
if (e->IsControlEdge() || !reachable_nodes[e->dst()->id()]) continue;
|
||||||
if (visited.find(e->dst()) == visited.end()) {
|
if (visited.find(e->dst()) == visited.end()) {
|
||||||
queue.push_back(e->dst());
|
queue.push_back(e->dst());
|
||||||
visited.insert(e->dst());
|
visited.insert(e->dst());
|
||||||
}
|
}
|
||||||
|
++num_expected_backprops;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Output node: stop BFS and update `num_expected_backprops` for
|
// Output node: stop BFS and update `num_expected_backprops` for
|
||||||
|
@ -364,6 +364,73 @@ TEST_F(GradientsTest, MultipleNodeOutputGrads) {
|
|||||||
test::AsTensor<int>({60, 61, 62, 63, 66, 66, 66, 67}, {4, 2}));
|
test::AsTensor<int>({60, 61, 62, 63, 66, 66, 66, 67}, {4, 2}));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(GradientsTest, UnreachableEdgeGradOneOutput) {
|
||||||
|
auto x = Variable(scope_test_, {2, 3}, DT_DOUBLE);
|
||||||
|
auto x_const = Const(scope_test_, {{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}});
|
||||||
|
auto x_assign = Assign(scope_test_, x, x_const);
|
||||||
|
|
||||||
|
auto y = Variable(scope_test_, {3, 1}, DT_DOUBLE);
|
||||||
|
auto y_const = Const(scope_test_, {{1.0}, {2.0}, {3.0}});
|
||||||
|
auto y_assign = Assign(scope_test_, y, y_const);
|
||||||
|
|
||||||
|
auto m1 = MatMul(scope_test_, x, y);
|
||||||
|
|
||||||
|
auto z = Variable(scope_test_, {1, 3}, DT_DOUBLE);
|
||||||
|
auto z_const = Const(scope_test_, {{9.0, 10.0, 11.0}});
|
||||||
|
auto z_assign = Assign(scope_test_, z, z_const);
|
||||||
|
|
||||||
|
auto m2 = MatMul(scope_test_, y, z);
|
||||||
|
|
||||||
|
auto dm1 = Const(scope_test_, {{0.5}, {0.5}});
|
||||||
|
|
||||||
|
std::vector<Output> grad_outputs;
|
||||||
|
TF_ASSERT_OK(
|
||||||
|
AddSymbolicGradients(scope_test_, {m1}, {y}, {dm1}, &grad_outputs));
|
||||||
|
|
||||||
|
std::vector<Tensor> outputs;
|
||||||
|
test::GetTensors(scope_test_, {x_assign, y_assign, z_assign},
|
||||||
|
{grad_outputs[0]}, &outputs);
|
||||||
|
// dz/dy = xT * dm1
|
||||||
|
test::ExpectTensorNear<double>(
|
||||||
|
outputs[0], test::AsTensor<double>({2.5, 3.5, 4.5}, {3, 1}), 1e-5);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(GradientsTest, UnreachableEdgeGradTwoOutputs) {
|
||||||
|
auto x = Variable(scope_test_, {2, 3}, DT_DOUBLE);
|
||||||
|
auto x_const = Const(scope_test_, {{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}});
|
||||||
|
auto x_assign = Assign(scope_test_, x, x_const);
|
||||||
|
|
||||||
|
auto y = Variable(scope_test_, {3, 1}, DT_DOUBLE);
|
||||||
|
auto y_const = Const(scope_test_, {{1.0}, {2.0}, {3.0}});
|
||||||
|
auto y_assign = Assign(scope_test_, y, y_const);
|
||||||
|
|
||||||
|
auto m1 = MatMul(scope_test_, x, y);
|
||||||
|
|
||||||
|
auto z = Variable(scope_test_, {1, 3}, DT_DOUBLE);
|
||||||
|
auto z_const = Const(scope_test_, {{9.0, 10.0, 11.0}});
|
||||||
|
auto z_assign = Assign(scope_test_, z, z_const);
|
||||||
|
|
||||||
|
auto m2 = MatMul(scope_test_, y, z);
|
||||||
|
|
||||||
|
auto dm1 = Const(scope_test_, {{0.5}, {0.5}});
|
||||||
|
auto dm2 =
|
||||||
|
Const(scope_test_, {{0.5, 0.5, 0.5}, {0.6, 0.7, 0.8}, {0.6, 0.7, 0.9}});
|
||||||
|
|
||||||
|
std::vector<Output> grad_outputs;
|
||||||
|
TF_ASSERT_OK(AddSymbolicGradients(scope_test_, {m1, m2}, {y}, {dm1, dm2},
|
||||||
|
&grad_outputs));
|
||||||
|
|
||||||
|
std::vector<Tensor> outputs;
|
||||||
|
test::GetTensors(scope_test_, {x_assign, y_assign, z_assign},
|
||||||
|
{grad_outputs[0]}, &outputs);
|
||||||
|
|
||||||
|
// the gradients from m1 and m2 will be summed to compute the gradient
|
||||||
|
// w.r.t y
|
||||||
|
// dz/dy = xT * dm1 + dm2 * zT
|
||||||
|
test::ExpectTensorNear<double>(
|
||||||
|
outputs[0], test::AsTensor<double>({17.5, 24.7, 26.8}, {3, 1}), 1e-5);
|
||||||
|
}
|
||||||
|
|
||||||
// StopGradientSingleOutputMultiEdgeTest tests combinations of valid and
|
// StopGradientSingleOutputMultiEdgeTest tests combinations of valid and
|
||||||
// 'NoGradient' (induced by StopGradient op) returned along multiple edges from
|
// 'NoGradient' (induced by StopGradient op) returned along multiple edges from
|
||||||
// a single nodes output.
|
// a single nodes output.
|
||||||
|
@ -36,5 +36,19 @@ void GetTensor(const Scope& scope, Output tensor, Tensor* out) {
|
|||||||
*out = outputs[0];
|
*out = outputs[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GetTensors(const Scope& scope, const std::vector<Output>& assign_vars,
|
||||||
|
OutputList tensors, std::vector<Tensor>* out) {
|
||||||
|
ClientSession session(scope);
|
||||||
|
TF_CHECK_OK(session.Run(assign_vars, nullptr));
|
||||||
|
TF_CHECK_OK(session.Run(tensors, out));
|
||||||
|
}
|
||||||
|
|
||||||
|
void GetTensor(const Scope& scope, const std::vector<Output>& assign_vars,
|
||||||
|
Output tensor, Tensor* out) {
|
||||||
|
std::vector<Tensor> outputs;
|
||||||
|
GetTensors(scope, assign_vars, {std::move(tensor)}, &outputs);
|
||||||
|
*out = outputs[0];
|
||||||
|
}
|
||||||
|
|
||||||
} // end namespace test
|
} // end namespace test
|
||||||
} // end namespace tensorflow
|
} // end namespace tensorflow
|
||||||
|
@ -26,9 +26,21 @@ namespace test {
|
|||||||
void GetTensors(const Scope& scope, OutputList tensors,
|
void GetTensors(const Scope& scope, OutputList tensors,
|
||||||
std::vector<Tensor>* out);
|
std::vector<Tensor>* out);
|
||||||
|
|
||||||
|
// Computes the outputs listed in 'tensors', returns the tensors in 'out'.
|
||||||
|
// assign_vars are extra outputs that should be run
|
||||||
|
// e.g. to assign values to variables.
|
||||||
|
void GetTensors(const Scope& scope, const std::vector<Output>& assign_vars,
|
||||||
|
OutputList tensors, std::vector<Tensor>* out);
|
||||||
|
|
||||||
/// Computes the output 'tensor', returning the resulting tensor in 'out'.
|
/// Computes the output 'tensor', returning the resulting tensor in 'out'.
|
||||||
void GetTensor(const Scope& scope, Output tensor, Tensor* out);
|
void GetTensor(const Scope& scope, Output tensor, Tensor* out);
|
||||||
|
|
||||||
|
// Computes the output 'tensor', returning the resulting tensor in 'out'.
|
||||||
|
// assign_vars are extra outputs that should be run
|
||||||
|
// e.g. to assign values to variables.
|
||||||
|
void GetTensor(const Scope& scope, const std::vector<Output>& assign_vars,
|
||||||
|
Output tensor, Tensor* out);
|
||||||
|
|
||||||
} // namespace test
|
} // namespace test
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user