- The main change is to get rid of int64 tensor ids from Tape and directly use AbstractTensorHandles. - Get rid of tensorflow::gradients::Context and directly use AbstractContext*. - Get rid of DefaultGradientFunction and BackwardFunction(which was a wrapper for a GradientFunction and DefaultGradientFunction). We had introduced DefaultGradientFunction in order to support existing python gradient functions which expect all necessary incoming grads to be non-None. This is only relevant for ops with more than one output, which are few. We could handle those by creating a wrapper GradientFunction that builds the zeros if needed. Getting rid of DefaultGradientFunction greatly simplifies the API. - Introduce ForwardOperation::skip_input_indices. This will be filled up in a follow-up change. There is a bug tracking this. - Introduce helpers for implementing behavior of tf.no_gradient and tf.stop_gradient, i.e. RegisterNotDifferentiable and NotDifferentiableGradientFunction. - One slight behavior change: Currently, when an op does not have a GradientFunction registered we silently record a nullptr GradientFunction on the tape. This sometimes leads to uninformative error messages. Now we loudly raise an error when GradientRegistry::Lookup fails in TapeContext::Execute. So any op executing under a TapeContext must have a registered GradientFunction. Non-differentiable ops need to be explicitly registered using RegisterNotDifferentiable e.g. CheckNumerics in gradients_test.cc c/eager/tape.h: I changed the signatures of gradient functions to use `absl::Span<Gradient*>` instead of `vector<Gradient*>*` for the result grads. This makes it consistent with the new Tape API and generally makes things cleaner. PiperOrigin-RevId: 345534016 Change-Id: Ie1bf5dff88f87390e6b470acc379d3852ce68b5c
76 lines
3.3 KiB
C++
76 lines
3.3 KiB
C++
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
==============================================================================*/
|
|
#include <pybind11/stl.h>
|
|
|
|
#include "pybind11/pybind11.h"
|
|
#include "tensorflow/c/eager/gradients.h"
|
|
#include "tensorflow/c/experimental/gradients/math_grad.h"
|
|
#include "tensorflow/c/experimental/gradients/nn_grad.h"
|
|
#include "tensorflow/c/experimental/gradients/tape/tape_context.h"
|
|
#include "tensorflow/core/platform/status.h"
|
|
#include "tensorflow/python/lib/core/pybind11_status.h"
|
|
|
|
namespace py = pybind11;
|
|
|
|
namespace tensorflow {
|
|
namespace gradients {
|
|
|
|
Status RegisterGradients(GradientRegistry* registry) {
|
|
// TODO(srbs): Rename ops::Add and AddRegisterer to AddV2.
|
|
TF_RETURN_IF_ERROR(registry->Register("AddV2", AddRegisterer));
|
|
TF_RETURN_IF_ERROR(registry->Register("Exp", ExpRegisterer));
|
|
TF_RETURN_IF_ERROR(registry->Register("MatMul", MatMulRegisterer));
|
|
TF_RETURN_IF_ERROR(registry->Register("Relu", ReluRegisterer));
|
|
TF_RETURN_IF_ERROR(
|
|
registry->Register("SparseSoftmaxCrossEntropyWithLogits",
|
|
SparseSoftmaxCrossEntropyWithLogitsRegisterer));
|
|
TF_RETURN_IF_ERROR(registry->Register("Neg", NegRegisterer));
|
|
TF_RETURN_IF_ERROR(registry->Register("Sub", SubRegisterer));
|
|
TF_RETURN_IF_ERROR(registry->Register("Mul", MulRegisterer));
|
|
TF_RETURN_IF_ERROR(registry->Register("Log1p", Log1pRegisterer));
|
|
TF_RETURN_IF_ERROR(registry->Register("DivNoNan", DivNoNanRegisterer));
|
|
return Status::OK();
|
|
}
|
|
|
|
PYBIND11_MODULE(_tape, m) {
|
|
py::class_<Tape>(m, "Tape")
|
|
.def(py::init([](bool persistent) { return new Tape(persistent); }))
|
|
.def("Watch", [](Tape* self, AbstractTensorHandle* t) { self->Watch(t); })
|
|
.def("ComputeGradient",
|
|
[](Tape* self, AbstractContext* ctx,
|
|
std::vector<AbstractTensorHandle*> target_tensors,
|
|
std::vector<AbstractTensorHandle*> source_tensors,
|
|
std::vector<AbstractTensorHandle*> output_gradients) {
|
|
std::vector<AbstractTensorHandle*> results(source_tensors.size());
|
|
Status s = self->ComputeGradient(ctx, target_tensors,
|
|
source_tensors, output_gradients,
|
|
absl::MakeSpan(results));
|
|
MaybeRaiseRegisteredFromStatus(s);
|
|
return results;
|
|
});
|
|
py::class_<GradientRegistry>(m, "GradientRegistry").def(py::init([]() {
|
|
auto registry = new GradientRegistry();
|
|
MaybeRaiseRegisteredFromStatus(RegisterGradients(registry));
|
|
return registry;
|
|
}));
|
|
py::class_<TapeContext, AbstractContext>(m, "TapeContext")
|
|
.def(py::init(
|
|
[](AbstractContext* ctx, Tape* tape, GradientRegistry* registry) {
|
|
return new TapeContext(ctx, tape, *registry);
|
|
}));
|
|
}
|
|
} // namespace gradients
|
|
} // namespace tensorflow
|