add BiasAddGradient

2020-11-20 21:48:55 +07:00 · 2020-11-20 21:48:55 +07:00 · 7b9e2ff862
commit 7b9e2ff862
parent 74ecc3ec25
8 changed files with 448 additions and 0 deletions
--- a/tensorflow/c/experimental/gradients/BUILD
+++ b/tensorflow/c/experimental/gradients/BUILD
@ -1,5 +1,21 @@
 load("//tensorflow:tensorflow.bzl", "filegroup")
 load("//tensorflow/core/platform:rules_cc.bzl", "cc_library")
 load(
    "//tensorflow:tensorflow.bzl",
    "if_libtpu",
    "tf_cc_test",
    "tf_copts",
    "tf_cuda_cc_test",
    "tf_cuda_library",
 )
 load(
    "//tensorflow/core/platform:build_config.bzl",
    "tf_kernel_tests_linkstatic",
 )
 load(
    "//tensorflow/core/platform:build_config_root.bzl",
    "tf_cuda_tests_tags",
 )
 # Library of gradient functions.
 package(
@ -95,3 +111,69 @@ filegroup(
        "//tensorflow/python:__pkg__",
    ],
 )
 cc_library(
    name = "nn_grad_testutil",
    srcs = [
        "nn_grad_testutil.cc",
    ],
    hdrs = [
        "nn_grad_testutil.h",
    ],
    visibility = [
        "//tensorflow:internal",
    ],
    deps = [
        "//tensorflow/c/eager:abstract_tensor_handle",
        "//tensorflow/c/eager:c_api_experimental",
        "//tensorflow/c/eager:c_api_unified_internal",
        "//tensorflow/c/eager:gradients_internal",
        "//tensorflow/c/eager:gradients_util",
        "//tensorflow/c/eager:tape",
        "//tensorflow/c/experimental/gradients/tape:tape_context",
        "//tensorflow/c/experimental/ops:array_ops",
        "//tensorflow/c/experimental/ops:math_ops",
        "//tensorflow/c/experimental/ops:nn_ops",
        "//tensorflow/core/lib/llvm_rtti",
        "//tensorflow/core/platform:status",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/types:span",
    ],
 )
 tf_cuda_cc_test(
    name = "nn_grad_test",
    size = "small",
    srcs = [
        "nn_grad_test.cc",
    ],
    args = ["--heap_check=local"],
    linkstatic = tf_kernel_tests_linkstatic(),
    tags = tf_cuda_tests_tags() + ["nomac"],
    deps = [
        "//tensorflow/c:c_api",
        "//tensorflow/c:c_test_util",
        "//tensorflow/c:tf_status_helper",
        "//tensorflow/c/eager:abstract_context",
        "//tensorflow/c/eager:abstract_tensor_handle",
        "//tensorflow/c/eager:c_api_experimental",
        "//tensorflow/c/eager:c_api_test_util",
        "//tensorflow/c/eager:c_api_unified_internal",
        "//tensorflow/c/eager:gradients_internal",
        "//tensorflow/c/eager:unified_api_testutil",
        "//tensorflow/c/experimental/gradients:nn_grad",
        "//tensorflow/c/experimental/gradients:nn_grad_testutil",
        "//tensorflow/c/experimental/gradients/tape:tape_context",
        "//tensorflow/c/experimental/ops",
        "//tensorflow/cc/profiler",
        "//tensorflow/compiler/mlir/tensorflow/c:mlir_c_api_registration",
        "//tensorflow/core:lib",
        "//tensorflow/core:protos_all_cc",
        "//tensorflow/core:test",
        "//tensorflow/core:test_main",
        "//tensorflow/core/lib/llvm_rtti",
        "@com_google_absl//absl/container:flat_hash_set",
        "@com_google_absl//absl/strings",
        "@com_google_absl//absl/types:span",
    ],
 )
--- a/tensorflow/c/experimental/gradients/nn_grad.cc
+++ b/tensorflow/c/experimental/gradients/nn_grad.cc
@ -25,6 +25,7 @@ limitations under the License.
 #include "tensorflow/core/platform/errors.h"
 using std::vector;
 using tensorflow::ops::BiasAddGrad;
 using tensorflow::ops::Mul;
 using tensorflow::ops::ReluGrad;
@ -110,6 +111,48 @@ class SparseSoftmaxCrossEntropyWithLogitsGradientFunction
  vector<AbstractTensorHandle*> forward_outputs;
 };
 // TODO(vnvo2409): Add python test
 class BiasAddGradientFunction : public GradientFunction {
 public:
  explicit BiasAddGradientFunction(AttrBuilder f_attrs)
      : forward_attrs(f_attrs) {}
  Status Compute(Context* ctx, const IncomingGradients& grad_inputs,
                 vector<AbstractTensorHandle*>* grad_outputs) override {
    /* Given upstream grad U and a BiasAdd: A + bias, the gradients are:
     *
     *    dA = U
     *    dbias = reduceSum(U, dims = channel_dim)
     */
    AbstractTensorHandle* upstream_grad = grad_inputs[0];
    DCHECK(upstream_grad);
    grad_outputs->resize(2);
    // Recover data format from forward pass for gradient.
    std::string data_format;
    forward_attrs.Get("data_format", &data_format);
    // Grad for A
    (*grad_outputs)[0] = upstream_grad;
    (*grad_outputs)[0]->Ref();
    // Grad for bias
    vector<AbstractTensorHandle*> bias_add_grad_outputs(1);
    std::string name = "bias_add_grad";
    TF_RETURN_IF_ERROR(BiasAddGrad(ctx->ctx, {upstream_grad},
                                   absl::MakeSpan(bias_add_grad_outputs),
                                   data_format.c_str(), name.c_str()));
    (*grad_outputs)[1] = bias_add_grad_outputs[0];
    return Status::OK();
  }
  ~BiasAddGradientFunction() override {}
 private:
  AttrBuilder forward_attrs;
 };
 }  // namespace
 BackwardFunction* ReluRegisterer(const ForwardOperation& op) {
@ -129,5 +172,14 @@ BackwardFunction* SparseSoftmaxCrossEntropyWithLogitsRegisterer(
  return new BackwardFunction(gradient_function, default_gradients);
 }
 BackwardFunction* BiasAddRegisterer(const ForwardOperation& op) {
  // For ops with a single output, the gradient function is not called if there
  // is no incoming gradient. So we do not need to worry about creating zeros
  // grads in this case.
  auto gradient_function = new BiasAddGradientFunction(op.attrs);
  auto default_gradients = new PassThroughDefaultGradients(op);
  return new BackwardFunction(gradient_function, default_gradients);
 }
 }  // namespace gradients
 }  // namespace tensorflow
--- a/tensorflow/c/experimental/gradients/nn_grad.h
+++ b/tensorflow/c/experimental/gradients/nn_grad.h
@ -22,6 +22,7 @@ namespace gradients {
 BackwardFunction* ReluRegisterer(const ForwardOperation& op);
 BackwardFunction* SparseSoftmaxCrossEntropyWithLogitsRegisterer(
    const ForwardOperation& op);
 BackwardFunction* BiasAddRegisterer(const ForwardOperation& op);
 }  // namespace gradients
 }  // namespace tensorflow
--- a/tensorflow/c/experimental/gradients/nn_grad_test.cc
+++ b/tensorflow/c/experimental/gradients/nn_grad_test.cc
@ -0,0 +1,154 @@
 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include "tensorflow/c/experimental/gradients/nn_grad.h"
 #include <memory>
 #include "absl/container/flat_hash_set.h"
 #include "absl/types/span.h"
 #include "tensorflow/c/eager/abstract_context.h"
 #include "tensorflow/c/eager/abstract_tensor_handle.h"
 #include "tensorflow/c/eager/c_api_experimental.h"
 #include "tensorflow/c/eager/c_api_test_util.h"
 #include "tensorflow/c/eager/c_api_unified_experimental.h"
 #include "tensorflow/c/eager/c_api_unified_experimental_internal.h"
 #include "tensorflow/c/eager/gradients.h"
 #include "tensorflow/c/eager/gradients_internal.h"
 #include "tensorflow/c/eager/gradients_util.h"
 #include "tensorflow/c/experimental/gradients/nn_grad_testutil.h"
 #include "tensorflow/c/experimental/gradients/tape/tape_context.h"
 #include "tensorflow/c/experimental/ops/nn_ops.h"
 #include "tensorflow/c/tf_status_helper.h"
 #include "tensorflow/c/tf_tensor.h"
 #include "tensorflow/core/lib/llvm_rtti/llvm_rtti.h"
 #include "tensorflow/core/platform/errors.h"
 #include "tensorflow/core/platform/test.h"
 namespace tensorflow {
 namespace gradients {
 namespace internal {
 namespace {
 using std::vector;
 using tensorflow::TF_StatusPtr;
 using tracing::TracingOperation;
 class CppGradients
    : public ::testing::TestWithParam<std::tuple<const char*, bool, bool>> {
 protected:
  void SetUp() override {
    TF_StatusPtr status(TF_NewStatus());
    TF_SetTracingImplementation(std::get<0>(GetParam()), status.get());
    Status s = StatusFromTF_Status(status.get());
    CHECK_EQ(errors::OK, s.code()) << s.error_message();
  }
 };
 Status RegisterGradients(GradientRegistry* registry) {
  TF_RETURN_IF_ERROR(registry->Register("BiasAdd", BiasAddRegisterer));
  return Status::OK();
 }
 TEST_P(CppGradients, TestBiasAddGrad) {
  if (std::get<0>(GetParam()) == "mlir" && std::get<2>(GetParam()) == 0) {
    GTEST_SKIP() << "SetAttrString has not been implemented yet.\n";
  }
  std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
      TF_NewStatus(), TF_DeleteStatus);
  AbstractContextPtr ctx;
  {
    AbstractContext* ctx_raw = nullptr;
    Status s =
        BuildImmediateExecutionContext(std::get<1>(GetParam()), &ctx_raw);
    ASSERT_EQ(errors::OK, s.code()) << s.error_message();
    ctx.reset(ctx_raw);
  }
  float A_vals[] = {1.0f, 2.0f, 3.0f, 4.0f};
  int64_t A_dims[] = {2, 2};
  float Bias_vals[] = {2.0f, 3.0f};
  int64_t Bias_dims[] = {2};
  AbstractTensorHandlePtr A =
      GetTensorHandleUtilFloat(ctx.get(), A_vals, A_dims, 2);
  AbstractTensorHandlePtr B =
      GetTensorHandleUtilFloat(ctx.get(), Bias_vals, Bias_dims, 1);
  GradientRegistry registry;
  Status s = RegisterGradients(&registry);
  ASSERT_EQ(errors::OK, s.code()) << s.error_message();
  /* Pseudo-code:
   *
   * tape.watch(A)
   * tape.watch(B)
   * Y = BiasAdd(A, Bias)
   * outputs = tape.gradient(Y, [A, Bias])
   */
  std::vector<AbstractTensorHandle*> outputs(2);
  s = RunModel(BiasAddGradModel, ctx.get(), {A.get(), B.get()},
               absl::MakeSpan(outputs),
               /*use_function=*/!std::get<2>(GetParam()), registry);
  ASSERT_EQ(errors::OK, s.code()) << s.error_message();
  TF_Tensor* dA_tensor;
  s = GetValue(outputs[0], &dA_tensor);
  ASSERT_EQ(errors::OK, s.code()) << s.error_message();
  float result_data_dA[4] = {0};
  memcpy(&result_data_dA[0], TF_TensorData(dA_tensor),
         TF_TensorByteSize(dA_tensor));
  float expected_dA[4] = {1.0f, 1.0f, 1.0f, 1.0f};
  float tolerance = 1e-3;
  for (int j = 0; j < 4; j++) {
    ASSERT_NEAR(result_data_dA[j], expected_dA[j], tolerance);
  }
  TF_Tensor* dBias_tensor;
  s = GetValue(outputs[1], &dBias_tensor);
  ASSERT_EQ(errors::OK, s.code()) << s.error_message();
  float result_data_dBias[2] = {0};
  memcpy(&result_data_dBias[0], TF_TensorData(dBias_tensor),
         TF_TensorByteSize(dBias_tensor));
  float expected_dBias[2] = {2.0f, 2.0f};
  for (int j = 0; j < 2; j++) {
    ASSERT_NEAR(result_data_dBias[j], expected_dBias[j], tolerance);
  }
  outputs[0]->Unref();
  outputs[1]->Unref();
  TF_DeleteTensor(dA_tensor);
  TF_DeleteTensor(dBias_tensor);
 }
 #ifdef PLATFORM_GOOGLE
 INSTANTIATE_TEST_SUITE_P(
    UnifiedCAPI, CppGradients,
    ::testing::Combine(::testing::Values("graphdef", "mlir"),
                       /*tfrt*/ ::testing::Values(false),
                       /*executing_eagerly*/ ::testing::Values(true, false)));
 #else
 INSTANTIATE_TEST_SUITE_P(
    UnifiedCAPI, CppGradients,
    ::testing::Combine(::testing::Values("graphdef", "mlir"),
                       /*tfrt*/ ::testing::Values(false),
                       /*executing_eagerly*/ ::testing::Values(true, false)));
 #endif
 }  // namespace
 }  // namespace internal
 }  // namespace gradients
 }  // namespace tensorflow
--- a/tensorflow/c/experimental/gradients/nn_grad_testutil.cc
+++ b/tensorflow/c/experimental/gradients/nn_grad_testutil.cc
@ -0,0 +1,71 @@
 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include "tensorflow/c/experimental/gradients/nn_grad_testutil.h"
 #include <memory>
 #include "absl/types/span.h"
 #include "tensorflow/c/eager/abstract_tensor_handle.h"
 #include "tensorflow/c/eager/c_api_experimental.h"
 #include "tensorflow/c/eager/c_api_unified_experimental.h"
 #include "tensorflow/c/eager/c_api_unified_experimental_internal.h"
 #include "tensorflow/c/eager/gradients.h"
 #include "tensorflow/c/eager/gradients_internal.h"
 #include "tensorflow/c/experimental/gradients/tape/tape_context.h"
 #include "tensorflow/c/experimental/ops/nn_ops.h"
 #include "tensorflow/core/lib/llvm_rtti/llvm_rtti.h"
 #include "tensorflow/core/platform/status.h"
 namespace tensorflow {
 namespace gradients {
 namespace internal {
 // Computes
 // y = BiasAdd(inputs[0], inputs[1])
 // return grad(y, {inputs[0], inputs[1]})
 Status BiasAddGradModel(AbstractContext* ctx,
                        absl::Span<AbstractTensorHandle* const> inputs,
                        absl::Span<AbstractTensorHandle*> outputs,
                        const GradientRegistry& registry) {
  TapeVSpace vspace(ctx);
  auto tape = new Tape(/*persistent=*/false);
  tape->Watch(ToId(inputs[0]));  // Watch x.
  tape->Watch(ToId(inputs[1]));  // Watch y.
  std::vector<AbstractTensorHandle*> bias_add_outputs(1);
  AbstractContextPtr tape_ctx(new TapeContext(ctx, tape, registry));
  TF_RETURN_IF_ERROR(ops::BiasAdd(tape_ctx.get(), inputs,
                                  absl::MakeSpan(bias_add_outputs), "BiasAdd"));
  std::unordered_map<tensorflow::int64, TapeTensor>
      source_tensors_that_are_targets;
  std::vector<AbstractTensorHandle*> out_grads;
  TF_RETURN_IF_ERROR(tape->ComputeGradient(
      vspace, /*target_tensor_ids=*/{ToId(bias_add_outputs[0])},
      /*source_tensor_ids=*/{ToId(inputs[0]), ToId(inputs[1])},
      source_tensors_that_are_targets,
      /*output_gradients=*/{}, &out_grads,
      /*build_default_zeros_grads=*/false));
  for (auto bias_add_output : bias_add_outputs) {
    bias_add_output->Unref();
  }
  outputs[0] = out_grads[0];
  outputs[1] = out_grads[1];
  delete tape;
  return Status::OK();
 }
 }  // namespace internal
 }  // namespace gradients
 }  // namespace tensorflow
--- a/tensorflow/c/experimental/gradients/nn_grad_testutil.h
+++ b/tensorflow/c/experimental/gradients/nn_grad_testutil.h
@ -0,0 +1,46 @@
 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #ifndef TENSORFLOW_C_EXPERIMENTAL_GRADIENTS_NN_GRAD_TESTUTIL_H_
 #define TENSORFLOW_C_EXPERIMENTAL_GRADIENTS_NN_GRAD_TESTUTIL_H_
 #include <memory>
 #include "absl/types/span.h"
 #include "tensorflow/c/eager/abstract_tensor_handle.h"
 #include "tensorflow/c/eager/c_api_experimental.h"
 #include "tensorflow/c/eager/c_api_unified_experimental.h"
 #include "tensorflow/c/eager/c_api_unified_experimental_internal.h"
 #include "tensorflow/c/eager/gradients.h"
 #include "tensorflow/c/eager/gradients_internal.h"
 #include "tensorflow/c/experimental/ops/nn_ops.h"
 #include "tensorflow/core/lib/llvm_rtti/llvm_rtti.h"
 #include "tensorflow/core/platform/status.h"
 namespace tensorflow {
 namespace gradients {
 namespace internal {
 // Computes
 // y = BiasAdd(inputs[0], inputs[1])
 // return grad(y, {inputs[0], inputs[1]})
 Status BiasAddGradModel(AbstractContext* ctx,
                        absl::Span<AbstractTensorHandle* const> inputs,
                        absl::Span<AbstractTensorHandle*> outputs,
                        const GradientRegistry& registry);
 }  // namespace internal
 }  // namespace gradients
 }  // namespace tensorflow
 #endif  // TENSORFLOW_C_EXPERIMENTAL_GRADIENTS_NN_GRAD_TESTUTIL_H_
--- a/tensorflow/c/experimental/ops/nn_ops.cc
+++ b/tensorflow/c/experimental/ops/nn_ops.cc
@ -69,5 +69,38 @@ Status Relu(AbstractContext* ctx,
  return Status::OK();
 }
 Status BiasAdd(AbstractContext* ctx,
               absl::Span<AbstractTensorHandle* const> inputs,
               absl::Span<AbstractTensorHandle*> outputs, const char* name) {
  AbstractOperationPtr bias_add_op(ctx->CreateOperation());
  TF_RETURN_IF_ERROR(
      bias_add_op->Reset("BiasAdd", /*raw_device_name=*/nullptr));
  TF_RETURN_IF_ERROR(MaybeSetOpName(bias_add_op.get(), name));
  TF_RETURN_IF_ERROR(bias_add_op->AddInput(inputs[0]));  // tensor input
  TF_RETURN_IF_ERROR(bias_add_op->AddInput(inputs[1]));  // bias
  int num_retvals = 1;
  TF_RETURN_IF_ERROR(bias_add_op->Execute(outputs, &num_retvals));
  return Status::OK();
 }
 // Computes Bias Add gradient given upstream grads
 Status BiasAddGrad(AbstractContext* ctx,
                   absl::Span<AbstractTensorHandle* const> inputs,
                   absl::Span<AbstractTensorHandle*> outputs,
                   const char* data_format, const char* name) {
  AbstractOperationPtr bias_add_grad_op(ctx->CreateOperation());
  TF_RETURN_IF_ERROR(
      bias_add_grad_op->Reset("BiasAddGrad", /*raw_device_name=*/nullptr));
  TF_RETURN_IF_ERROR(MaybeSetOpName(bias_add_grad_op.get(), name));
  TF_RETURN_IF_ERROR(bias_add_grad_op->SetAttrString("data_format", data_format,
                                                     strlen(data_format)));
  TF_RETURN_IF_ERROR(bias_add_grad_op->AddInput(inputs[0]));
  int num_retvals = 1;
  TF_RETURN_IF_ERROR(bias_add_grad_op->Execute(outputs, &num_retvals));
  return Status::OK();
 }
 }  // namespace ops
 }  // namespace tensorflow
--- a/tensorflow/c/experimental/ops/nn_ops.h
+++ b/tensorflow/c/experimental/ops/nn_ops.h
@ -34,6 +34,15 @@ Status Relu(AbstractContext* ctx,
            absl::Span<AbstractTensorHandle* const> inputs,
            absl::Span<AbstractTensorHandle*> outputs, const char* name);
 Status BiasAdd(AbstractContext* ctx,
               absl::Span<AbstractTensorHandle* const> inputs,
               absl::Span<AbstractTensorHandle*> outputs, const char* name);
 Status BiasAddGrad(AbstractContext* ctx,
                   absl::Span<AbstractTensorHandle* const> inputs,
                   absl::Span<AbstractTensorHandle*> outputs,
                   const char* data_format, const char* name);
 }  // namespace ops
 }  // namespace tensorflow