Add deterministic mode for resize_bilinear back-prop

2020-04-13 12:07:47 -07:00 · 2020-04-13 12:07:47 -07:00 · 116db3235a
commit 116db3235a
parent 038edfbddd
7 changed files with 468 additions and 103 deletions
--- a/tensorflow/core/kernels/image/resize_bilinear_op_gpu.cu.cc
+++ b/tensorflow/core/kernels/image/resize_bilinear_op_gpu.cu.cc
@ -23,6 +23,7 @@ limitations under the License.
 #include "tensorflow/core/framework/tensor_types.h"
 #include "tensorflow/core/kernels/image/resize_bilinear_op.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/env_var.h"
 #include "tensorflow/core/util/gpu_kernel_helper.h"
 namespace tensorflow {
@ -228,6 +229,56 @@ __global__ void ResizeBilinearGradKernel(const int32 nthreads,
  }
 }
 template <typename T>
 __global__ void ResizeBilinearDeterministicGradKernel(
    const int32 nthreads, const float* __restrict__ input_grad,
    float height_scale, float inverse_height_scale, float width_scale,
    float inverse_width_scale, int batch, int original_height,
    int original_width, int channels, int resized_height, int resized_width,
    T* __restrict__ output_grad) {
  GPU_1D_KERNEL_LOOP(out_idx, nthreads) {
    // out_idx = c + channels * (x + original_width * (y + original_height * b))
    int idx = out_idx;
    const int c = idx % channels;
    idx /= channels;
    const int out_x_center = idx % original_width;
    idx /= original_width;
    const int out_y_center = idx % original_height;
    const int b = idx / original_height;
    int in_y_start = max(0, __float2int_ru(
        (out_y_center - 1 + 0.5) * inverse_height_scale - 0.5));
    const float out_y_start = (in_y_start + 0.5) * height_scale - 0.5;
    int in_x_start = max(0, __float2int_ru(
        (out_x_center - 1 + 0.5) * inverse_width_scale - 0.5));
    const float out_x_start = (in_x_start + 0.5) * width_scale - 0.5;
    T acc = 0;
    // For clarity, prior to C++17, while loops are preferable to for loops here
    float out_y = out_y_start; int in_y = in_y_start;
    while(out_y < out_y_center + 1 && in_y < resized_height) {
      float out_x = out_x_start; int in_x = in_x_start;
      while(out_x < out_x_center + 1 && in_x < resized_width) {
        int in_idx = ((b * resized_height + in_y) * resized_width + in_x) *
                     channels + c;
        // Clamping to zero is necessary because out_x and out_y can be negative
        // due to half-pixel adjustments to out_y_start and out_x_start.
        // Clamping to height/width is necessary when upscaling.
        float out_y_clamped = fmaxf(0, fminf(out_y, original_height - 1));
        float out_x_clamped = fmaxf(0, fminf(out_x, original_width - 1));
        float y_lerp = (1 - fabsf(out_y_clamped - out_y_center));
        float x_lerp = (1 - fabsf(out_x_clamped - out_x_center));
        acc += static_cast<T>(input_grad[in_idx] * y_lerp * x_lerp);
        out_x += width_scale;
        in_x++;
      }
      out_y += height_scale;
      in_y++;
    }
    output_grad[out_idx] = acc;
  }
 }
 template <typename T>
 __global__ void LegacyResizeBilinearKernel(
    const int32 nthreads, const T* __restrict__ images, float height_scale,
@ -338,6 +389,55 @@ __global__ void LegacyResizeBilinearGradKernel(
  }
 }
 template <typename T>
 __global__ void LegacyResizeBilinearDeterministicGradKernel(
    const int32 nthreads, const float* __restrict__ input_grad,
    float height_scale, float inverse_height_scale, float width_scale,
    float inverse_width_scale, int batch, int original_height,
    int original_width, int channels, int resized_height, int resized_width,
    T* __restrict__ output_grad) {
  GPU_1D_KERNEL_LOOP(out_idx, nthreads) {
    // out_idx = c + channels * (x + original_width * (y + original_height * b))
    int idx = out_idx;
    const int c = idx % channels;
    idx /= channels;
    const int out_x_center = idx % original_width;
    idx /= original_width;
    const int out_y_center = idx % original_height;
    const int b = idx / original_height;
    int in_y_start = max(0, __float2int_ru(
        (out_y_center - 1) * inverse_height_scale));
    const float out_y_start = in_y_start * height_scale;
    int in_x_start = max(0, __float2int_ru(
        (out_x_center - 1) * inverse_width_scale));
    const float out_x_start = in_x_start * width_scale;
    T acc = 0;
    // For clarity, prior to C++17, while loops are preferable to for loops here
    float out_y = out_y_start; int in_y = in_y_start;
    while(out_y < out_y_center + 1 && in_y < resized_height) {
      float out_x = out_x_start; int in_x = in_x_start;
      while(out_x < out_x_center + 1 && in_x < resized_width) {
        int in_idx = ((b * resized_height + in_y) * resized_width + in_x) *
                     channels + c;
        // Clamping to zero is unnecessary because out_x and out_y will never
        // be less than zero in legacy mode.
        // Clamping to height/width is necessary when upscaling.
        float out_y_clamped = fminf(out_y, original_height - 1);
        float out_x_clamped = fminf(out_x, original_width - 1);
        float y_lerp = (1 - fabsf(out_y_clamped - out_y_center));
        float x_lerp = (1 - fabsf(out_x_clamped - out_x_center));
        acc += static_cast<T>(input_grad[in_idx] * y_lerp * x_lerp);
        out_x += width_scale;
        in_x++;
      }
      out_y += height_scale;
      in_y++;
    }
    output_grad[out_idx] = acc;
  }
 }
 }  // namespace
 namespace functor {
@ -394,6 +494,17 @@ struct ResizeBilinear<GPUDevice, T> {
  }
 };
 bool RequireDeterminism() {
  static bool require_determinism = [] {
    bool deterministic_ops = false;
    TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar("TF_DETERMINISTIC_OPS",
                                               /*default_val=*/false,
                                               &deterministic_ops));
    return deterministic_ops;
  }();
  return require_determinism;
 }
 // Partial specialization of ResizeBilinearGrad functor for a GPUDevice.
 template <typename T>
 struct ResizeBilinearGrad<GPUDevice, T> {
@ -413,31 +524,53 @@ struct ResizeBilinearGrad<GPUDevice, T> {
    int total_count;
    GpuLaunchConfig config;
    // Initialize output_grad with all zeros.
    total_count = batch * original_height * original_width * channels;
    if (total_count == 0) return;
    config = GetGpuLaunchConfig(total_count, d);
    TF_CHECK_OK(GpuLaunchKernel(
        SetZero<T>, config.block_count, config.thread_per_block, 0, d.stream(),
        config.virtual_thread_count, output_grad.data()));
-    // Accumulate.
+    if (RequireDeterminism()) {
-    total_count = batch * resized_height * resized_width * channels;
+      // The following scale values below should never be zero, enforced by
-    config = GetGpuLaunchConfig(total_count, d);
+      // ImageResizerGradientState
-    if (half_pixel_centers) {
+      float inverse_height_scale = 1 / height_scale;
-      TF_CHECK_OK(GpuLaunchKernel(
+      float inverse_width_scale = 1 / width_scale;
-          ResizeBilinearGradKernel<T>, config.block_count,
+      if (half_pixel_centers) {
-          config.thread_per_block, 0, d.stream(), config.virtual_thread_count,
+        TF_CHECK_OK(GpuLaunchKernel(
-          input_grad.data(), height_scale, width_scale, batch, original_height,
+            ResizeBilinearDeterministicGradKernel<T>, config.block_count,
-          original_width, channels, resized_height, resized_width,
+            config.thread_per_block, 0, d.stream(), config.virtual_thread_count,
-          output_grad.data()));
+            input_grad.data(), height_scale, inverse_height_scale, width_scale,
            inverse_width_scale, batch, original_height, original_width,
            channels, resized_height, resized_width, output_grad.data()));
      } else {
        TF_CHECK_OK(GpuLaunchKernel(
            LegacyResizeBilinearDeterministicGradKernel<T>, config.block_count,
            config.thread_per_block, 0, d.stream(), config.virtual_thread_count,
            input_grad.data(), height_scale, inverse_height_scale, width_scale,
            inverse_width_scale, batch, original_height, original_width,
            channels, resized_height, resized_width, output_grad.data()));
      }
    } else {
      // Initialize output_grad with all zeros.
      TF_CHECK_OK(GpuLaunchKernel(
-          LegacyResizeBilinearGradKernel<T>, config.block_count,
+          SetZero<T>, config.block_count, config.thread_per_block, 0,
-          config.thread_per_block, 0, d.stream(), config.virtual_thread_count,
+          d.stream(), config.virtual_thread_count, output_grad.data()));
-          input_grad.data(), height_scale, width_scale, batch, original_height,
+      // Accumulate.
-          original_width, channels, resized_height, resized_width,
+      total_count = batch * resized_height * resized_width * channels;
-          output_grad.data()));
+      config = GetGpuLaunchConfig(total_count, d);
      if (half_pixel_centers) {
        TF_CHECK_OK(GpuLaunchKernel(
            ResizeBilinearGradKernel<T>, config.block_count,
            config.thread_per_block, 0, d.stream(), config.virtual_thread_count,
            input_grad.data(), height_scale, width_scale, batch,
            original_height, original_width, channels, resized_height,
            resized_width, output_grad.data()));
      } else {
        TF_CHECK_OK(GpuLaunchKernel(
            LegacyResizeBilinearGradKernel<T>, config.block_count,
            config.thread_per_block, 0, d.stream(), config.virtual_thread_count,
            input_grad.data(), height_scale, width_scale, batch,
            original_height, original_width, channels, resized_height,
            resized_width, output_grad.data()));
      }
    }
  }
 };
--- a/tensorflow/core/util/image_resizer_state.h
+++ b/tensorflow/core/util/image_resizer_state.h
@ -192,6 +192,19 @@ struct ImageResizerGradientState {
    original_height = original_image.dim_size(1);
    original_width = original_image.dim_size(2);
    // The following check is also carried out for the forward op. It is added
    // here to prevent a divide-by-zero exception when either height_scale or
    // width_scale is being calculated.
    OP_REQUIRES(context, resized_height > 0 && resized_width > 0,
                errors::InvalidArgument("resized dimensions must be positive"));
    // The following check is also carried out for the forward op. It is added
    // here to prevent either height_scale or width_scale from being set to
    // zero, which would cause a divide-by-zero exception in the deterministic
    // back-prop path.
    OP_REQUIRES(context, original_height > 0 && original_width > 0,
                errors::InvalidArgument("original dimensions must be positive"));
    OP_REQUIRES(
        context,
        FastBoundsCheck(original_height, std::numeric_limits<int32>::max()) &&
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
@ -5174,12 +5174,30 @@ cuda_py_test(
    ],
 )
 cuda_py_test(
    name = "image_grad_deterministic_test",
    size = "medium",
    srcs = ["ops/image_grad_deterministic_test.py"],
    python_version = "PY3",
    deps = [
        ":image_grad_test_base",
    ],
 )
 cuda_py_test(
    name = "image_grad_test",
    size = "medium",
    srcs = ["ops/image_grad_test.py"],
    python_version = "PY3",
    tfrt_enabled = True,
    deps = [
        ":image_grad_test_base",
    ],
 )
 py_library(
    name = "image_grad_test_base",
    srcs = ["ops/image_grad_test_base.py"],
    deps = [
        ":client_testlib",
        ":framework_for_generated_wrappers",
--- a/tensorflow/python/ops/image_grad_deterministic_test.py
+++ b/tensorflow/python/ops/image_grad_deterministic_test.py
@ -0,0 +1,123 @@
 # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Functional tests for deterministic image op gradient functions."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import os
 import numpy as np
 from absl.testing import parameterized
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import image_grad_test_base as test_base
 from tensorflow.python.ops import image_ops
 from tensorflow.python.platform import test
 class ResizeBilinearOpDeterministicTest(test_base.ResizeBilinearOpTestBase):
  def _randomNDArray(self, shape):
    return 2 * np.random.random_sample(shape) - 1
  def _randomDataOp(self, shape, data_type):
    return constant_op.constant(self._randomNDArray(shape), dtype=data_type)
  @parameterized.parameters(
      # Note that there is no 16-bit floating point format registered for GPU
      {'align_corners': False, 'half_pixel_centers': False,
       'data_type': dtypes.float32},
      {'align_corners': False, 'half_pixel_centers': False,
       'data_type': dtypes.float64},
      {'align_corners': True, 'half_pixel_centers': False,
       'data_type': dtypes.float32},
      {'align_corners': False, 'half_pixel_centers': True,
       'data_type': dtypes.float32})
  @test_util.run_in_graph_and_eager_modes
  @test_util.run_cuda_only
  def testDeterministicGradients(self, align_corners, half_pixel_centers,
                                 data_type):
    if not align_corners and test_util.is_xla_enabled():
      # Align corners is deprecated in TF2.0, but align_corners==False is not
      # supported by XLA.
      self.skipTest("align_corners==False not currently supported by XLA")
    with self.session(force_gpu=True):
      seed = (hash(align_corners) % 256 + hash(half_pixel_centers) %256 +
              hash(data_type) % 256)
      np.random.seed(seed)
      input_shape = (1, 25, 12, 3) # NHWC
      output_shape = (1, 200, 250, 3)
      input_image = self._randomDataOp(input_shape, data_type)
      repeat_count = 3
      if context.executing_eagerly():
        def resize_bilinear_gradients(local_seed):
          np.random.seed(local_seed)
          upstream_gradients = self._randomDataOp(output_shape, dtypes.float32)
          with backprop.GradientTape(persistent=True) as tape:
            tape.watch(input_image)
            output_image = image_ops.resize_bilinear(
                input_image, output_shape[1:3], align_corners=align_corners,
                half_pixel_centers=half_pixel_centers)
            gradient_injector_output = output_image * upstream_gradients
          return tape.gradient(gradient_injector_output, input_image)
        for i in range(repeat_count):
          local_seed = seed + i # select different upstream gradients
          result_a = resize_bilinear_gradients(local_seed)
          result_b = resize_bilinear_gradients(local_seed)
          self.assertAllEqual(result_a, result_b)
      else: # graph mode
        upstream_gradients = array_ops.placeholder(
            dtypes.float32, shape=output_shape, name='upstream_gradients')
        output_image = image_ops.resize_bilinear(
            input_image, output_shape[1:3], align_corners=align_corners,
            half_pixel_centers=half_pixel_centers)
        gradient_injector_output = output_image * upstream_gradients
        # The gradient function behaves as if grad_ys is multiplied by the op
        # gradient result, not passing the upstram gradients through the op's
        # gradient generation graph. This is the reason for using the
        # gradient injector
        resize_bilinear_gradients = gradients_impl.gradients(
            gradient_injector_output, input_image, grad_ys=None,
            colocate_gradients_with_ops=True)[0]
        for i in range(repeat_count):
          feed_dict = {upstream_gradients: self._randomNDArray(output_shape)}
          result_a = resize_bilinear_gradients.eval(feed_dict=feed_dict)
          result_b = resize_bilinear_gradients.eval(feed_dict=feed_dict)
          self.assertAllEqual(result_a, result_b)
 if __name__ == '__main__':
  # Note that the effect of setting the following environment variable to
  # 'true' is not tested. Unless we can find a simpler pattern for testing these
  # environment variables, it would require this file to be made into a base
  # and then two more test files to be created.
  #
  # When deterministic op functionality can be enabled and disabled between test
  # cases in the same process, then the tests for deterministic op
  # functionality, for this op and for other ops, will be able to be included in
  # the same file with the regular tests, simplifying the organization of tests
  # and test files.
  os.environ['TF_DETERMINISTIC_OPS'] = '1'
  test.main()
--- a/tensorflow/python/ops/image_grad_test.py
+++ b/tensorflow/python/ops/image_grad_test.py
@ -0,0 +1,32 @@
 # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
 """Functional tests for Image Op Gradients."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 from tensorflow.python.ops import image_grad_test_base as test_base
 from tensorflow.python.platform import test
 ResizeNearestNeighborOpTest = test_base.ResizeNearestNeighborOpTestBase
 ResizeBilinearOpTest = test_base.ResizeBilinearOpTestBase
 ResizeBicubicOpTest = test_base.ResizeBicubicOpTestBase
 ScaleAndTranslateOpTest = test_base.ScaleAndTranslateOpTestBase
 CropAndResizeOpTest = test_base.CropAndResizeOpTestBase
 RGBToHSVOpTest = test_base.RGBToHSVOpTestBase
 if __name__ == "__main__":
  test.main()
--- a/tensorflow/python/ops/image_grad_test_base.py
+++ b/tensorflow/python/ops/image_grad_test_base.py
@ -20,6 +20,8 @@ from __future__ import print_function
 import numpy as np
 from absl.testing import parameterized
 from tensorflow.python.eager import backprop
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import test_util
@ -35,7 +37,7 @@ from tensorflow.python.ops import array_ops
@test_util.for_all_test_methods(test_util.disable_xla,
                                'align_corners=False not supported by XLA')
-class ResizeNearestNeighborOpTest(test.TestCase):
+class ResizeNearestNeighborOpTestBase(test.TestCase):
  TYPES = [np.float32, np.float64]
@ -111,97 +113,140 @@ class ResizeNearestNeighborOpTest(test.TestCase):
        self.assertAllClose(grad_cpu, grad_gpu, rtol=1e-5, atol=1e-5)
-class ResizeBilinearOpTest(test.TestCase):
+class ResizeBilinearOpTestBase(test.TestCase, parameterized.TestCase):
-  def testShapeIsCorrectAfterOp(self):
+  def _itGen(self, smaller_shape, larger_shape):
-    in_shape = [1, 2, 2, 1]
+    up_sample = (smaller_shape, larger_shape)
-    out_shape = [1, 4, 6, 1]
+    down_sample = (larger_shape, smaller_shape)
-
+    pass_through = (larger_shape, larger_shape)
-    x = np.arange(0, 4).reshape(in_shape).astype(np.float32)
+    shape_pairs = (up_sample, down_sample, pass_through)
-
+    # Align corners is deprecated in TF2.0, but align_corners==False is not
    input_tensor = constant_op.constant(x, shape=in_shape)
    resize_out = image_ops.resize_bilinear(input_tensor, out_shape[1:3])
    with self.cached_session():
      self.assertEqual(out_shape, list(resize_out.get_shape()))
      resize_out = self.evaluate(resize_out)
      self.assertEqual(out_shape, list(resize_out.shape))
  @test_util.run_deprecated_v1
  def testGradFromResizeToLargerInBothDims(self):
    in_shape = [1, 2, 3, 1]
    out_shape = [1, 4, 6, 1]
    x = np.arange(0, 6).reshape(in_shape).astype(np.float32)
    with self.cached_session():
      input_tensor = constant_op.constant(x, shape=in_shape)
      resize_out = image_ops.resize_bilinear(input_tensor, out_shape[1:3])
      err = gradient_checker.compute_gradient_error(
          input_tensor, in_shape, resize_out, out_shape, x_init_value=x)
    self.assertLess(err, 1e-3)
  @test_util.run_deprecated_v1
  def testGradFromResizeToSmallerInBothDims(self):
    in_shape = [1, 4, 6, 1]
    out_shape = [1, 2, 3, 1]
    x = np.arange(0, 24).reshape(in_shape).astype(np.float32)
    with self.cached_session():
      input_tensor = constant_op.constant(x, shape=in_shape)
      resize_out = image_ops.resize_bilinear(input_tensor, out_shape[1:3])
      err = gradient_checker.compute_gradient_error(
          input_tensor, in_shape, resize_out, out_shape, x_init_value=x)
    self.assertLess(err, 1e-3)
  @test_util.run_deprecated_v1
  def testCompareGpuVsCpu(self):
    in_shape = [2, 4, 6, 3]
    out_shape = [2, 8, 16, 3]
    size = np.prod(in_shape)
    x = 1.0 / size * np.arange(0, size).reshape(in_shape).astype(np.float32)
    # Align corners will be deprecated for tf2.0 and the false version is not
    # supported by XLA.
-    align_corner_options = [True
+    options = [(True, False)]
-                           ] if test_util.is_xla_enabled() else [True, False]
+    if not test_util.is_xla_enabled():
-    for align_corners in align_corner_options:
+      options += [(False, True), (False, False)]
-      grad = {}
+    for align_corners, half_pixel_centers in options:
-      for use_gpu in [False, True]:
+      for in_shape, out_shape in shape_pairs:
-        with self.cached_session(use_gpu=use_gpu):
+        yield in_shape, out_shape, align_corners, half_pixel_centers
          input_tensor = constant_op.constant(x, shape=in_shape)
          resized_tensor = image_ops.resize_bilinear(
              input_tensor, out_shape[1:3], align_corners=align_corners)
          grad[use_gpu] = gradient_checker.compute_gradient(
              input_tensor, in_shape, resized_tensor, out_shape, x_init_value=x)
-      self.assertAllClose(grad[False], grad[True], rtol=1e-4, atol=1e-4)
+  def _getJacobians(self, in_shape, out_shape, align_corners=False,
                    half_pixel_centers=False, dtype=np.float32, use_gpu=False,
                    force_gpu=False):
    with self.cached_session(use_gpu=use_gpu, force_gpu=force_gpu) as sess:
      # Input values should not influence gradients
      x = np.arange(np.prod(in_shape)).reshape(in_shape).astype(dtype)
      input_tensor = constant_op.constant(x, shape=in_shape)
      resized_tensor = image_ops.resize_bilinear(
          input_tensor, out_shape[1:3], align_corners=align_corners,
          half_pixel_centers=half_pixel_centers)
      # compute_gradient will use a random tensor as the init value
      return gradient_checker.compute_gradient(
          input_tensor, in_shape, resized_tensor, out_shape)
  @parameterized.parameters(
      {'batch_size': 1, 'channel_count': 1},
      {'batch_size': 2, 'channel_count': 3},
      {'batch_size': 5, 'channel_count': 4})
  @test_util.run_deprecated_v1
  def testShapes(self, batch_size, channel_count):
    smaller_shape = [batch_size, 2, 3, channel_count]
    larger_shape = [batch_size, 4, 6, channel_count]
    for in_shape, out_shape, align_corners, half_pixel_centers in \
        self._itGen(smaller_shape, larger_shape):
      # Input values should not influence shapes
      x = np.arange(np.prod(in_shape)).reshape(in_shape).astype(np.float32)
      input_tensor = constant_op.constant(x, shape=in_shape)
      resized_tensor = image_ops.resize_bilinear(input_tensor, out_shape[1:3])
      self.assertEqual(out_shape, list(resized_tensor.get_shape()))
      grad_tensor = gradients_impl.gradients(resized_tensor, input_tensor)[0]
      self.assertEqual(in_shape, list(grad_tensor.get_shape()))
      with self.cached_session():
        resized_values = self.evaluate(resized_tensor)
        self.assertEqual(out_shape, list(resized_values.shape))
        grad_values = self.evaluate(grad_tensor)
        self.assertEqual(in_shape, list(grad_values.shape))
  @parameterized.parameters(
      {'batch_size': 1, 'channel_count': 1},
      {'batch_size': 4, 'channel_count': 3},
      {'batch_size': 3, 'channel_count': 2})
  @test_util.run_deprecated_v1
  def testGradients(self, batch_size, channel_count):
    smaller_shape = [batch_size, 2, 3, channel_count]
    larger_shape = [batch_size, 5, 6, channel_count]
    for in_shape, out_shape, align_corners, half_pixel_centers in \
        self._itGen(smaller_shape, larger_shape):
      jacob_a, jacob_n = self._getJacobians(
          in_shape, out_shape, align_corners, half_pixel_centers)
      threshold = 1e-4
      self.assertAllClose(jacob_a, jacob_n, threshold, threshold)
  @test_util.run_deprecated_v1
  def testTypes(self):
    in_shape = [1, 4, 6, 1]
    out_shape = [1, 2, 3, 1]
    x = np.arange(0, 24).reshape(in_shape)
    for use_gpu in [False, True]:
-      with self.cached_session(use_gpu=use_gpu) as sess:
+      for dtype in [np.float16, np.float32, np.float64]:
-        for dtype in [np.float16, np.float32, np.float64]:
+        jacob_a, jacob_n = self._getJacobians(
-          input_tensor = constant_op.constant(x.astype(dtype), shape=in_shape)
+            in_shape, out_shape, dtype=dtype, use_gpu=use_gpu)
-          resize_out = image_ops.resize_bilinear(input_tensor, out_shape[1:3])
+        if dtype == np.float16:
-          grad = sess.run(gradients_impl.gradients(resize_out, input_tensor))[0]
+          # Compare fp16 analytical gradients to fp32 numerical gradients,
-          self.assertAllEqual(in_shape, grad.shape)
+          # since fp16 numerical gradients are too imprecise unless great
-          # Not using gradient_checker.compute_gradient as I didn't work out
+          # care is taken with choosing the inputs and the delta. This is
-          # the changes required to compensate for the lower precision of
+          # a weaker, but pragmatic, check (in particular, it does not test
-          # float16 when computing the numeric jacobian.
+          # the op itself, only its gradient).
-          # Instead, we just test the theoretical jacobian.
+          _, jacob_n = self._getJacobians(
-          self.assertAllEqual([[[[1.], [0.], [1.], [0.], [1.], [0.]],
+              in_shape, out_shape, dtype=np.float32, use_gpu=use_gpu)
-                                [[0.], [0.], [0.], [0.], [0.], [0.]],
+        threshold = 1e-3
-                                [[1.], [0.], [1.], [0.], [1.], [0.]],
+        if dtype == np.float64:
-                                [[0.], [0.], [0.], [0.], [0.], [0.]]]], grad)
+          threshold = 1e-5
        self.assertAllClose(jacob_a, jacob_n, threshold, threshold)
  @test_util.run_deprecated_v1
  def testGradOnUnsupportedType(self):
    in_shape = [1, 4, 6, 1]
    out_shape = [1, 2, 3, 1]
    x = np.arange(0, 24).reshape(in_shape).astype(np.uint8)
    input_tensor = constant_op.constant(x, shape=in_shape)
    resize_out = image_ops.resize_bilinear(input_tensor, out_shape[1:3])
    with self.cached_session():
      grad = gradients_impl.gradients(resize_out, [input_tensor])
      self.assertEqual([None], grad)
  def _gpuVsCpuCase(self, in_shape, out_shape, align_corners,
                    half_pixel_centers, dtype):
    grad = {}
    for use_gpu in [False, True]:
      grad[use_gpu] = self._getJacobians(
          in_shape, out_shape, align_corners, half_pixel_centers, dtype=dtype,
          use_gpu=use_gpu)
    threshold = 1e-4
    # Note that this is comparing both analytical and numerical Jacobians
    self.assertAllClose(grad[False], grad[True], rtol=threshold, atol=threshold)
  @parameterized.parameters(
      {'batch_size': 1, 'channel_count': 1},
      {'batch_size': 2, 'channel_count': 3},
      {'batch_size': 5, 'channel_count': 4})
  @test_util.run_deprecated_v1
  def testCompareGpuVsCpu(self, batch_size, channel_count):
    smaller_shape = [batch_size, 4, 6, channel_count]
    larger_shape = [batch_size, 8, 16, channel_count]
    for params in self._itGen(smaller_shape, larger_shape):
      self._gpuVsCpuCase(*params, dtype=np.float32)
  @test_util.run_deprecated_v1
  def testCompareGpuVsCpuFloat64(self):
    in_shape = [1, 5, 7, 1]
    out_shape = [1, 9, 11, 1]
    # Note that there is no 16-bit floating-point format registered for GPU
    self._gpuVsCpuCase(in_shape, out_shape, align_corners=True,
                       half_pixel_centers=False, dtype=np.float64)
-class ResizeBicubicOpTest(test.TestCase):
+class ResizeBicubicOpTestBase(test.TestCase):
  def testShapeIsCorrectAfterOp(self):
    in_shape = [1, 2, 2, 1]
@ -264,7 +309,7 @@ class ResizeBicubicOpTest(test.TestCase):
      self.assertEqual([None], grad)
-class ScaleAndTranslateOpTest(test.TestCase):
+class ScaleAndTranslateOpTestBase(test.TestCase):
  @test_util.run_deprecated_v1
  def testGrads(self):
@ -328,7 +373,7 @@ class ScaleAndTranslateOpTest(test.TestCase):
        self.assertAllClose(np.ones_like(grad_v), grad_v)
-class CropAndResizeOpTest(test.TestCase):
+class CropAndResizeOpTestBase(test.TestCase):
  def testShapeIsCorrectAfterOp(self):
    batch = 2
@ -457,7 +502,7 @@ class CropAndResizeOpTest(test.TestCase):
@test_util.run_all_in_graph_and_eager_modes
-class RGBToHSVOpTest(test.TestCase):
+class RGBToHSVOpTestBase(test.TestCase):
  TYPES = [np.float32, np.float64]
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@ -142,6 +142,7 @@ COMMON_PIP_DEPS = [
    "//tensorflow/python/tools:tools_pip",
    "//tensorflow/python/tools/api/generator:create_python_api",
    "//tensorflow/python/tpu",
    "//tensorflow/python:image_grad_test_base",
    "//tensorflow/python:test_ops",
    "//tensorflow/python:while_v2",
    "//tensorflow/tools/common:public_api",