Merge pull request #45046 from vnvo2409:gradients

PiperOrigin-RevId: 345503187
Change-Id: I068e4c21802e618398fff3a78ea60a89e473a672
This commit is contained in:
TensorFlower Gardener 2020-12-03 12:05:26 -08:00
commit c10504b9fa
10 changed files with 408 additions and 10 deletions

View File

@ -66,7 +66,7 @@ void GetDims(const TF_Tensor* t, int64_t* out_dims) {
// Runs model as is if output is a scalar, // Runs model as is if output is a scalar,
// else sums the output tensor before returning. // else sums the output tensor before returning.
Status RunAndMaybeSum(AbstractContext* ctx, Model forward, Status RunAndMaybeSum(AbstractContext* ctx, Model forward,
absl::Span<AbstractTensorHandle*> inputs, absl::Span<AbstractTensorHandle* const> inputs,
absl::Span<AbstractTensorHandle*> outputs, absl::Span<AbstractTensorHandle*> outputs,
bool use_function) { bool use_function) {
GradientRegistry registry; GradientRegistry registry;
@ -99,19 +99,24 @@ Status RunAndMaybeSum(AbstractContext* ctx, Model forward,
sum_inputs[0] = model_out; sum_inputs[0] = model_out;
sum_inputs[1] = sum_dims.get(); sum_inputs[1] = sum_dims.get();
TF_RETURN_IF_ERROR(ops::Sum(ctx, absl::MakeSpan(sum_inputs), TF_RETURN_IF_ERROR(
absl::MakeSpan(model_outputs), "sum_output")); ops::Sum(ctx, sum_inputs, absl::MakeSpan(model_outputs), "sum_output"));
outputs[0] = model_outputs[0]; outputs[0] = model_outputs[0];
return Status::OK(); return Status::OK();
} }
// ========================= End Helper Functions============================== // ========================= End Helper Functions==============================
Status CalcNumericalGrad(AbstractContext* ctx, Model forward, Status CalcNumericalGrad(AbstractContext* ctx, Model forward,
absl::Span<AbstractTensorHandle*> inputs, absl::Span<AbstractTensorHandle* const> inputs,
int input_index, bool use_function, int input_index, bool use_function,
AbstractTensorHandle** numerical_grad) { AbstractTensorHandle** numerical_grad) {
vector<AbstractTensorHandle*> theta_inputs(inputs.size());
for (int i{}; i < inputs.size(); ++i) {
theta_inputs[i] = inputs[i];
}
AbstractTensorHandle* theta = AbstractTensorHandle* theta =
inputs[input_index]; // parameter we are grad checking theta_inputs[input_index]; // parameter we are grad checking
// Convert from AbstractTensor to TF_Tensor. // Convert from AbstractTensor to TF_Tensor.
TF_Tensor* theta_tensor; TF_Tensor* theta_tensor;
@ -159,14 +164,14 @@ Status CalcNumericalGrad(AbstractContext* ctx, Model forward,
ctx, thetaMinus_data.data(), theta_dims.data(), num_dims); ctx, thetaMinus_data.data(), theta_dims.data(), num_dims);
// Get f(theta + eps): // Get f(theta + eps):
inputs[input_index] = thetaPlus.get(); theta_inputs[input_index] = thetaPlus.get();
TF_RETURN_IF_ERROR(RunAndMaybeSum(ctx, forward, inputs, TF_RETURN_IF_ERROR(RunAndMaybeSum(ctx, forward, theta_inputs,
absl::MakeSpan(f_outputs), use_function)); absl::MakeSpan(f_outputs), use_function));
AbstractTensorHandle* fPlus = f_outputs[0]; AbstractTensorHandle* fPlus = f_outputs[0];
// Get f(theta - eps): // Get f(theta - eps):
inputs[input_index] = thetaMinus.get(); theta_inputs[input_index] = thetaMinus.get();
TF_RETURN_IF_ERROR(RunAndMaybeSum(ctx, forward, inputs, TF_RETURN_IF_ERROR(RunAndMaybeSum(ctx, forward, theta_inputs,
absl::MakeSpan(f_outputs), use_function)); absl::MakeSpan(f_outputs), use_function));
AbstractTensorHandle* fMinus = f_outputs[0]; AbstractTensorHandle* fMinus = f_outputs[0];

View File

@ -45,7 +45,7 @@ namespace gradients {
* hold the numerical gradient data at the end of the function. * hold the numerical gradient data at the end of the function.
*/ */
Status CalcNumericalGrad(AbstractContext* ctx, Model forward, Status CalcNumericalGrad(AbstractContext* ctx, Model forward,
absl::Span<AbstractTensorHandle*> inputs, absl::Span<AbstractTensorHandle* const> inputs,
int input_index, bool use_function, int input_index, bool use_function,
AbstractTensorHandle** numerical_grad); AbstractTensorHandle** numerical_grad);

View File

@ -1,6 +1,20 @@
load("//tensorflow:tensorflow.bzl", "filegroup") load("//tensorflow:tensorflow.bzl", "filegroup")
load("//tensorflow/core/platform:rules_cc.bzl", "cc_library") load("//tensorflow/core/platform:rules_cc.bzl", "cc_library")
# buildifier: disable=same-origin-load
load(
"//tensorflow:tensorflow.bzl",
"tf_cuda_cc_test",
)
load(
"//tensorflow/core/platform:build_config.bzl",
"tf_kernel_tests_linkstatic",
)
load(
"//tensorflow/core/platform:build_config_root.bzl",
"tf_cuda_tests_tags",
)
# Library of gradient functions. # Library of gradient functions.
package( package(
licenses = ["notice"], # Apache 2.0 licenses = ["notice"], # Apache 2.0
@ -95,3 +109,38 @@ filegroup(
"//tensorflow/python:__pkg__", "//tensorflow/python:__pkg__",
], ],
) )
cc_library(
name = "grad_test_helper",
testonly = True,
srcs = ["grad_test_helper.cc"],
hdrs = ["grad_test_helper.h"],
visibility = [
"//tensorflow:internal",
],
deps = [
"//tensorflow/c/eager:gradient_checker",
"//tensorflow/c/eager:gradients_util",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
],
)
tf_cuda_cc_test(
name = "nn_grad_test",
size = "small",
srcs = [
"nn_grad_test.cc",
],
args = ["--heap_check=local"],
linkstatic = tf_kernel_tests_linkstatic(),
tags = tf_cuda_tests_tags() + ["no_cuda_asan"], # b/173654156,
deps = [
":grad_test_helper",
":nn_grad",
"//tensorflow/c/eager:c_api_test_util",
"//tensorflow/c/experimental/gradients/tape:tape_context",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
],
)

View File

@ -0,0 +1,75 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/c/experimental/gradients/grad_test_helper.h"
#include "tensorflow/c/eager/gradient_checker.h"
#include "tensorflow/core/platform/test.h"
namespace tensorflow {
namespace gradients {
namespace internal {
void CompareNumericalAndAutodiffGradients(
Model model, Model grad_model, AbstractContext* ctx,
absl::Span<AbstractTensorHandle* const> inputs, bool use_function,
const GradientRegistry& registry, double abs_error) {
auto num_inputs = inputs.size();
std::vector<AbstractTensorHandle*> outputs(num_inputs);
auto s = RunModel(grad_model, ctx, inputs, absl::MakeSpan(outputs),
/*use_function=*/use_function, registry);
ASSERT_EQ(errors::OK, s.code()) << s.error_message();
for (int i = 0; i < num_inputs; ++i) {
if (!outputs[i]) continue;
AbstractTensorHandle* g; // Will contain numerical approximation data.
s = CalcNumericalGrad(ctx, model, inputs,
/*input_index=*/i,
/*use_function=*/use_function, &g);
ASSERT_EQ(errors::OK, s.code()) << s.error_message();
TF_Tensor* numerical_tensor;
s = GetValue(g, &numerical_tensor);
ASSERT_EQ(errors::OK, s.code()) << s.error_message();
auto num_elem_numerical = TF_TensorElementCount(numerical_tensor);
TF_Tensor* analytical_tensor;
s = GetValue(outputs[i], &analytical_tensor);
ASSERT_EQ(errors::OK, s.code()) << s.error_message();
auto num_elem_analytical = TF_TensorElementCount(analytical_tensor);
ASSERT_EQ(num_elem_numerical, num_elem_analytical);
float* dnumerical = new float[num_elem_numerical]{0};
memcpy(&dnumerical[0], TF_TensorData(numerical_tensor),
TF_TensorByteSize(numerical_tensor));
float* danalytical = new float[num_elem_analytical]{0};
memcpy(&danalytical[0], TF_TensorData(analytical_tensor),
TF_TensorByteSize(analytical_tensor));
for (int j = 0; j < num_elem_numerical; j++) {
ASSERT_NEAR(dnumerical[j], danalytical[j], abs_error);
}
TF_DeleteTensor(analytical_tensor);
TF_DeleteTensor(numerical_tensor);
delete[] danalytical;
delete[] dnumerical;
outputs[i]->Unref();
}
}
} // namespace internal
} // namespace gradients
} // namespace tensorflow

View File

@ -0,0 +1,33 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_C_EXPERIMENTAL_GRADIENTS_GRAD_TEST_HELPER_H_
#define TENSORFLOW_C_EXPERIMENTAL_GRADIENTS_GRAD_TEST_HELPER_H_
#include "tensorflow/c/eager/gradients_util.h"
namespace tensorflow {
namespace gradients {
namespace internal {
void CompareNumericalAndAutodiffGradients(
Model model, Model grad_model, AbstractContext* ctx,
absl::Span<AbstractTensorHandle* const> inputs, bool use_function,
const GradientRegistry& registry, double abs_error = 1e-2);
} // namespace internal
} // namespace gradients
} // namespace tensorflow
#endif // TENSORFLOW_C_EXPERIMENTAL_GRADIENTS_GRAD_TEST_HELPER_H_

View File

@ -25,6 +25,7 @@ limitations under the License.
#include "tensorflow/core/platform/errors.h" #include "tensorflow/core/platform/errors.h"
using std::vector; using std::vector;
using tensorflow::ops::BiasAddGrad;
using tensorflow::ops::Mul; using tensorflow::ops::Mul;
using tensorflow::ops::ReluGrad; using tensorflow::ops::ReluGrad;
@ -110,6 +111,48 @@ class SparseSoftmaxCrossEntropyWithLogitsGradientFunction
vector<AbstractTensorHandle*> forward_outputs; vector<AbstractTensorHandle*> forward_outputs;
}; };
// TODO(vnvo2409): Add python test
class BiasAddGradientFunction : public GradientFunction {
public:
explicit BiasAddGradientFunction(AttrBuilder f_attrs)
: forward_attrs(f_attrs) {}
Status Compute(Context* ctx, const IncomingGradients& grad_inputs,
vector<AbstractTensorHandle*>* grad_outputs) override {
/* Given upstream grad U and a BiasAdd: A + bias, the gradients are:
*
* dA = U
* dbias = reduceSum(U, dims = channel_dim)
*/
AbstractTensorHandle* upstream_grad = grad_inputs[0];
DCHECK(upstream_grad);
grad_outputs->resize(2);
// Recover data format from forward pass for gradient.
std::string data_format;
TF_RETURN_IF_ERROR(forward_attrs.Get("data_format", &data_format));
// Grad for A
(*grad_outputs)[0] = upstream_grad;
(*grad_outputs)[0]->Ref();
// Grad for bias
vector<AbstractTensorHandle*> bias_add_grad_outputs(1);
std::string name = "bias_add_grad";
TF_RETURN_IF_ERROR(BiasAddGrad(ctx->ctx, {upstream_grad},
absl::MakeSpan(bias_add_grad_outputs),
data_format.c_str(), name.c_str()));
(*grad_outputs)[1] = bias_add_grad_outputs[0];
return Status::OK();
}
~BiasAddGradientFunction() override {}
private:
AttrBuilder forward_attrs;
};
} // namespace } // namespace
BackwardFunction* ReluRegisterer(const ForwardOperation& op) { BackwardFunction* ReluRegisterer(const ForwardOperation& op) {
@ -129,5 +172,14 @@ BackwardFunction* SparseSoftmaxCrossEntropyWithLogitsRegisterer(
return new BackwardFunction(gradient_function, default_gradients); return new BackwardFunction(gradient_function, default_gradients);
} }
BackwardFunction* BiasAddRegisterer(const ForwardOperation& op) {
// For ops with a single output, the gradient function is not called if there
// is no incoming gradient. So we do not need to worry about creating zeros
// grads in this case.
auto gradient_function = new BiasAddGradientFunction(op.attrs);
auto default_gradients = new PassThroughDefaultGradients(op);
return new BackwardFunction(gradient_function, default_gradients);
}
} // namespace gradients } // namespace gradients
} // namespace tensorflow } // namespace tensorflow

View File

@ -22,6 +22,7 @@ namespace gradients {
BackwardFunction* ReluRegisterer(const ForwardOperation& op); BackwardFunction* ReluRegisterer(const ForwardOperation& op);
BackwardFunction* SparseSoftmaxCrossEntropyWithLogitsRegisterer( BackwardFunction* SparseSoftmaxCrossEntropyWithLogitsRegisterer(
const ForwardOperation& op); const ForwardOperation& op);
BackwardFunction* BiasAddRegisterer(const ForwardOperation& op);
} // namespace gradients } // namespace gradients
} // namespace tensorflow } // namespace tensorflow

View File

@ -0,0 +1,141 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/c/experimental/gradients/nn_grad.h"
#include "tensorflow/c/eager/c_api_test_util.h"
#include "tensorflow/c/experimental/gradients/grad_test_helper.h"
#include "tensorflow/c/experimental/gradients/tape/tape_context.h"
#include "tensorflow/core/platform/test.h"
namespace tensorflow {
namespace gradients {
namespace internal {
namespace {
using tensorflow::TF_StatusPtr;
using tracing::TracingOperation;
Status BiasAddModel(AbstractContext* ctx,
absl::Span<AbstractTensorHandle* const> inputs,
absl::Span<AbstractTensorHandle*> outputs,
const GradientRegistry& registry) {
return ops::BiasAdd(ctx, inputs, outputs, "BiasAdd");
}
Status BiasAddGradModel(AbstractContext* ctx,
absl::Span<AbstractTensorHandle* const> inputs,
absl::Span<AbstractTensorHandle*> outputs,
const GradientRegistry& registry) {
TapeVSpace vspace(ctx);
auto tape = new Tape(/*persistent=*/false);
tape->Watch(ToId(inputs[0])); // Watch A.
tape->Watch(ToId(inputs[1])); // Watch Bias.
std::vector<AbstractTensorHandle*> temp_outputs(1);
AbstractContextPtr tape_ctx(new TapeContext(ctx, tape, registry));
TF_RETURN_IF_ERROR(ops::BiasAdd(tape_ctx.get(), inputs,
absl::MakeSpan(temp_outputs), "BiasAddGrad"));
std::unordered_map<tensorflow::int64, TapeTensor>
source_tensors_that_are_targets;
std::vector<AbstractTensorHandle*> out_grads;
TF_RETURN_IF_ERROR(tape->ComputeGradient(
vspace, /*target_tensor_ids=*/{ToId(temp_outputs[0])},
/*source_tensor_ids=*/{ToId(inputs[0]), ToId(inputs[1])},
source_tensors_that_are_targets,
/*output_gradients=*/{}, &out_grads,
/*build_default_zeros_grads=*/false));
for (auto temp_output : temp_outputs) {
temp_output->Unref();
}
outputs[0] = out_grads[0];
outputs[1] = out_grads[1];
delete tape;
return Status::OK();
}
Status RegisterGradients(GradientRegistry* registry) {
TF_RETURN_IF_ERROR(registry->Register("BiasAdd", BiasAddRegisterer));
return Status::OK();
}
class CppGradients
: public ::testing::TestWithParam<std::tuple<const char*, bool, bool>> {
protected:
void SetUp() override {
TF_StatusPtr status(TF_NewStatus());
TF_SetTracingImplementation(std::get<0>(GetParam()), status.get());
Status s = StatusFromTF_Status(status.get());
CHECK_EQ(errors::OK, s.code()) << s.error_message();
{
AbstractContext* ctx_raw = nullptr;
Status s =
BuildImmediateExecutionContext(std::get<1>(GetParam()), &ctx_raw);
ASSERT_EQ(errors::OK, s.code()) << s.error_message();
ctx_.reset(ctx_raw);
}
s = RegisterGradients(&registry_);
ASSERT_EQ(errors::OK, s.code()) << s.error_message();
}
GradientRegistry registry_;
AbstractContextPtr ctx_;
public:
bool UseMlir() const { return strcmp(std::get<0>(GetParam()), "mlir") == 0; }
bool UseFunction() const { return std::get<2>(GetParam()); }
};
TEST_P(CppGradients, TestBiasAddGrad) {
if (UseFunction() && UseMlir()) {
GTEST_SKIP() << "SetAttrString has not been implemented yet.\n";
}
// A
float A_vals[] = {1.0f, 2.0f, 3.0f, 4.0f};
int64_t A_dims[] = {2, 2};
AbstractTensorHandlePtr A =
GetTensorHandleUtilFloat(ctx_.get(), A_vals, A_dims, 2);
// Bias
float Bias_vals[] = {2.0f, 3.0f};
int64_t Bias_dims[] = {2};
AbstractTensorHandlePtr Bias =
GetTensorHandleUtilFloat(ctx_.get(), Bias_vals, Bias_dims, 1);
std::vector<AbstractTensorHandle*> inputs{A.get(), Bias.get()};
ASSERT_NO_FATAL_FAILURE(CompareNumericalAndAutodiffGradients(
BiasAddModel, BiasAddGradModel, ctx_.get(), {A.get(), Bias.get()},
/*use_function=*/UseFunction(), registry_));
}
#ifdef PLATFORM_GOOGLE
INSTANTIATE_TEST_SUITE_P(
UnifiedCAPI, CppGradients,
::testing::Combine(::testing::Values("graphdef", "mlir"),
/*tfrt*/ ::testing::Values(false),
/*use_function*/ ::testing::Values(true, false)));
#else
INSTANTIATE_TEST_SUITE_P(
UnifiedCAPI, CppGradients,
::testing::Combine(::testing::Values("graphdef", "mlir"),
/*tfrt*/ ::testing::Values(false),
/*use_function*/ ::testing::Values(true, false)));
#endif
} // namespace
} // namespace internal
} // namespace gradients
} // namespace tensorflow

View File

@ -69,5 +69,38 @@ Status Relu(AbstractContext* ctx,
return Status::OK(); return Status::OK();
} }
Status BiasAdd(AbstractContext* ctx,
absl::Span<AbstractTensorHandle* const> inputs,
absl::Span<AbstractTensorHandle*> outputs, const char* name) {
AbstractOperationPtr bias_add_op(ctx->CreateOperation());
TF_RETURN_IF_ERROR(
bias_add_op->Reset("BiasAdd", /*raw_device_name=*/nullptr));
TF_RETURN_IF_ERROR(MaybeSetOpName(bias_add_op.get(), name));
TF_RETURN_IF_ERROR(bias_add_op->AddInput(inputs[0])); // tensor input
TF_RETURN_IF_ERROR(bias_add_op->AddInput(inputs[1])); // bias
int num_retvals = 1;
TF_RETURN_IF_ERROR(bias_add_op->Execute(outputs, &num_retvals));
return Status::OK();
}
// Computes Bias Add gradient given upstream grads
Status BiasAddGrad(AbstractContext* ctx,
absl::Span<AbstractTensorHandle* const> inputs,
absl::Span<AbstractTensorHandle*> outputs,
const char* data_format, const char* name) {
AbstractOperationPtr bias_add_grad_op(ctx->CreateOperation());
TF_RETURN_IF_ERROR(
bias_add_grad_op->Reset("BiasAddGrad", /*raw_device_name=*/nullptr));
TF_RETURN_IF_ERROR(MaybeSetOpName(bias_add_grad_op.get(), name));
TF_RETURN_IF_ERROR(bias_add_grad_op->SetAttrString("data_format", data_format,
strlen(data_format)));
TF_RETURN_IF_ERROR(bias_add_grad_op->AddInput(inputs[0]));
int num_retvals = 1;
TF_RETURN_IF_ERROR(bias_add_grad_op->Execute(outputs, &num_retvals));
return Status::OK();
}
} // namespace ops } // namespace ops
} // namespace tensorflow } // namespace tensorflow

View File

@ -34,6 +34,15 @@ Status Relu(AbstractContext* ctx,
absl::Span<AbstractTensorHandle* const> inputs, absl::Span<AbstractTensorHandle* const> inputs,
absl::Span<AbstractTensorHandle*> outputs, const char* name); absl::Span<AbstractTensorHandle*> outputs, const char* name);
Status BiasAdd(AbstractContext* ctx,
absl::Span<AbstractTensorHandle* const> inputs,
absl::Span<AbstractTensorHandle*> outputs, const char* name);
Status BiasAddGrad(AbstractContext* ctx,
absl::Span<AbstractTensorHandle* const> inputs,
absl::Span<AbstractTensorHandle*> outputs,
const char* data_format, const char* name);
} // namespace ops } // namespace ops
} // namespace tensorflow } // namespace tensorflow