Image Translation Keras layer for preprocessing.

PiperOrigin-RevId: 292262892
Change-Id: Iff5f81b22ed5d39cc4f5b7eca9fa55ca5936b9d8
This commit is contained in:
Zhenyu Tan 2020-01-29 18:55:24 -08:00 committed by TensorFlower Gardener
parent 7b265f2af1
commit 26992815bf
11 changed files with 856 additions and 2 deletions

View File

@ -0,0 +1,51 @@
op {
graph_op_name: "ImageProjectiveTransformV2"
visibility: HIDDEN
in_arg {
name: "images"
description: <<END
4-D with shape `[batch, height, width, channels]`.
END
}
in_arg {
name: "transforms"
description: <<END
2-D Tensor, `[batch, 8]` or `[1, 8]` matrix, where each row corresponds to a 3 x 3
projective transformation matrix, with the last entry assumed to be 1. If there
is one row, the same transformation will be applied to all images.
END
}
in_arg {
name: "output_shape"
description: <<END
1-D Tensor [new_height, new_width].
END
}
out_arg {
name: "transformed_images"
description: <<END
4-D with shape
`[batch, new_height, new_width, channels]`.
END
}
attr {
name: "dtype"
description: <<END
Input dtype.
END
}
attr {
name: "interpolation"
description: <<END
Interpolation method, "NEAREST" or "BILINEAR".
END
}
summary: "Applies the given transform to each of the images."
description: <<END
If one row of `transforms` is `[a0, a1, a2, b0, b1, b2, c0, c1]`, then it maps
the *output* point `(x, y)` to a transformed *input* point
`(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`, where
`k = c0 x + c1 y + 1`. If the transformed point lays outside of the input
image, the output pixel is set to 0.
END
}

View File

@ -2924,6 +2924,7 @@ cc_library(
":encode_png_op",
":extract_jpeg_shape_op",
":generate_box_proposals_op",
":image_ops",
":non_max_suppression_op",
":random_crop_op",
":resize_area_op",
@ -3084,6 +3085,12 @@ tf_kernel_library(
deps = IMAGE_DEPS,
)
tf_kernel_library(
name = "image_ops",
prefix = "image_ops",
deps = IMAGE_DEPS,
)
tf_kernel_library(
name = "encode_wav_op",
prefix = "encode_wav_op",

View File

@ -0,0 +1,180 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#define EIGEN_USE_THREADS
#if GOOGLE_CUDA
#define EIGEN_USE_GPU
#endif // GOOGLE_CUDA
#include "tensorflow/core/kernels/image_ops.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/register_types.h"
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/platform/types.h"
namespace tensorflow {
namespace functor {
// Explicit instantiation of the CPU functor.
typedef Eigen::ThreadPoolDevice CPUDevice;
template struct FillProjectiveTransform<CPUDevice, uint8>;
template struct FillProjectiveTransform<CPUDevice, int32>;
template struct FillProjectiveTransform<CPUDevice, int64>;
template struct FillProjectiveTransform<CPUDevice, Eigen::half>;
template struct FillProjectiveTransform<CPUDevice, float>;
template struct FillProjectiveTransform<CPUDevice, double>;
} // end namespace functor
typedef Eigen::ThreadPoolDevice CPUDevice;
using functor::FillProjectiveTransform;
using generator::Interpolation;
using generator::INTERPOLATION_BILINEAR;
using generator::INTERPOLATION_NEAREST;
using generator::ProjectiveGenerator;
template <typename Device, typename T>
class ImageProjectiveTransform : public OpKernel {
private:
Interpolation interpolation_;
public:
explicit ImageProjectiveTransform(OpKernelConstruction* ctx) : OpKernel(ctx) {
string interpolation_str;
OP_REQUIRES_OK(ctx, ctx->GetAttr("interpolation", &interpolation_str));
if (interpolation_str == "NEAREST") {
interpolation_ = INTERPOLATION_NEAREST;
} else if (interpolation_str == "BILINEAR") {
interpolation_ = INTERPOLATION_BILINEAR;
} else {
LOG(ERROR) << "Invalid interpolation " << interpolation_str
<< ". Supported types: NEAREST, BILINEAR";
}
}
void Compute(OpKernelContext* ctx) override {
const Tensor& images_t = ctx->input(0);
const Tensor& transform_t = ctx->input(1);
OP_REQUIRES(ctx, images_t.shape().dims() == 4,
errors::InvalidArgument("Input images must have rank 4"));
OP_REQUIRES(ctx,
(TensorShapeUtils::IsMatrix(transform_t.shape()) &&
(transform_t.dim_size(0) == images_t.dim_size(0) ||
transform_t.dim_size(0) == 1) &&
transform_t.dim_size(1) ==
ProjectiveGenerator<Device, T>::kNumParameters),
errors::InvalidArgument(
"Input transform should be num_images x 8 or 1 x 8"));
int32 out_height, out_width;
// Kernel is shared by legacy "ImageProjectiveTransform" op with 2 args.
if (ctx->num_inputs() >= 3) {
const Tensor& shape_t = ctx->input(2);
OP_REQUIRES(ctx, shape_t.dims() == 1,
errors::InvalidArgument("output shape must be 1-dimensional",
shape_t.shape().DebugString()));
OP_REQUIRES(ctx, shape_t.NumElements() == 2,
errors::InvalidArgument("output shape must have two elements",
shape_t.shape().DebugString()));
auto shape_vec = shape_t.vec<int32>();
out_height = shape_vec(0);
out_width = shape_vec(1);
OP_REQUIRES(
ctx, out_height > 0 && out_width > 0,
errors::InvalidArgument("output dimensions must be positive"));
} else {
// Shape is N (batch size), H (height), W (width), C (channels).
out_height = images_t.shape().dim_size(1);
out_width = images_t.shape().dim_size(2);
}
Tensor* output_t;
OP_REQUIRES_OK(ctx, ctx->allocate_output(
0,
TensorShape({images_t.dim_size(0), out_height,
out_width, images_t.dim_size(3)}),
&output_t));
auto output = output_t->tensor<T, 4>();
auto images = images_t.tensor<T, 4>();
auto transform = transform_t.matrix<float>();
(FillProjectiveTransform<Device, T>(interpolation_))(
ctx->eigen_device<Device>(), &output, images, transform);
}
};
#define REGISTER(TYPE) \
REGISTER_KERNEL_BUILDER(Name("ImageProjectiveTransformV2") \
.Device(DEVICE_CPU) \
.TypeConstraint<TYPE>("dtype"), \
ImageProjectiveTransform<CPUDevice, TYPE>)
TF_CALL_uint8(REGISTER);
TF_CALL_int32(REGISTER);
TF_CALL_int64(REGISTER);
TF_CALL_half(REGISTER);
TF_CALL_float(REGISTER);
TF_CALL_double(REGISTER);
#undef REGISTER
#if GOOGLE_CUDA
typedef Eigen::GpuDevice GPUDevice;
namespace functor {
// NOTE(ringwalt): We get an undefined symbol error if we don't explicitly
// instantiate the operator() in GCC'd code.
#define DECLARE_FUNCTOR(TYPE) \
template <> \
void FillProjectiveTransform<GPUDevice, TYPE>::operator()( \
const GPUDevice& device, OutputType* output, const InputType& images, \
const TransformsType& transform) const; \
extern template struct FillProjectiveTransform<GPUDevice, TYPE>
TF_CALL_uint8(DECLARE_FUNCTOR);
TF_CALL_int32(DECLARE_FUNCTOR);
TF_CALL_int64(DECLARE_FUNCTOR);
TF_CALL_half(DECLARE_FUNCTOR);
TF_CALL_float(DECLARE_FUNCTOR);
TF_CALL_double(DECLARE_FUNCTOR);
} // end namespace functor
#define REGISTER(TYPE) \
REGISTER_KERNEL_BUILDER(Name("ImageProjectiveTransformV2") \
.Device(DEVICE_GPU) \
.TypeConstraint<TYPE>("dtype") \
.HostMemory("output_shape"), \
ImageProjectiveTransform<GPUDevice, TYPE>)
TF_CALL_uint8(REGISTER);
TF_CALL_int32(REGISTER);
TF_CALL_int64(REGISTER);
TF_CALL_half(REGISTER);
TF_CALL_float(REGISTER);
TF_CALL_double(REGISTER);
#undef REGISTER
#endif // GOOGLE_CUDA
} // end namespace tensorflow

View File

@ -0,0 +1,172 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_CORE_KERNELS_IMAGE_OPS_H_
#define TENSORFLOW_CORE_KERNELS_IMAGE_OPS_H_
// See docs in ../ops/image_ops.cc.
#define EIGEN_USE_THREADS
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/framework/tensor_types.h"
#include "tensorflow/core/platform/types.h"
namespace tensorflow {
namespace generator {
enum Interpolation { INTERPOLATION_NEAREST, INTERPOLATION_BILINEAR };
using Eigen::array;
using Eigen::DenseIndex;
template <typename Device, typename T>
class ProjectiveGenerator {
private:
typename TTypes<T, 4>::ConstTensor input_;
typename TTypes<float>::ConstMatrix transforms_;
const Interpolation interpolation_;
public:
static const int kNumParameters = 8;
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
ProjectiveGenerator(typename TTypes<T, 4>::ConstTensor input,
typename TTypes<float>::ConstMatrix transforms,
const Interpolation interpolation)
: input_(input), transforms_(transforms), interpolation_(interpolation) {}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T
operator()(const array<DenseIndex, 4>& coords) const {
const int64 output_y = coords[1];
const int64 output_x = coords[2];
const float* transform =
transforms_.dimension(0) == 1
? transforms_.data()
: &transforms_.data()[transforms_.dimension(1) * coords[0]];
float projection = transform[6] * output_x + transform[7] * output_y + 1.f;
if (projection == 0) {
// Return the fill value (0) for infinite coordinates,
// which are outside the input image
return T(0);
}
const float input_x =
(transform[0] * output_x + transform[1] * output_y + transform[2]) /
projection;
const float input_y =
(transform[3] * output_x + transform[4] * output_y + transform[5]) /
projection;
const T fill_value = T(0);
switch (interpolation_) {
case INTERPOLATION_NEAREST:
// Switch the order of x and y again for indexing into the image.
return nearest_interpolation(coords[0], input_y, input_x, coords[3],
fill_value);
case INTERPOLATION_BILINEAR:
return bilinear_interpolation(coords[0], input_y, input_x, coords[3],
fill_value);
}
// Unreachable; ImageProjectiveTransform only uses INTERPOLATION_NEAREST
// or INTERPOLATION_BILINEAR.
return T(0);
}
private:
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T
nearest_interpolation(const DenseIndex batch, const float y, const float x,
const DenseIndex channel, const T fill_value) const {
return read_with_fill_value(batch, DenseIndex(std::round(y)),
DenseIndex(std::round(x)), channel, fill_value);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T
bilinear_interpolation(const DenseIndex batch, const float y, const float x,
const DenseIndex channel, const T fill_value) const {
const float y_floor = std::floor(y);
const float x_floor = std::floor(x);
const float y_ceil = y_floor + 1;
const float x_ceil = x_floor + 1;
// f(x, y_floor) = (x_ceil - x) / (x_ceil - x_floor) * f(x_floor, y_floor)
// + (x - x_floor) / (x_ceil - x_floor) * f(x_ceil, y_floor)
const float value_yfloor =
(x_ceil - x) * static_cast<float>(read_with_fill_value(
batch, DenseIndex(y_floor), DenseIndex(x_floor),
channel, fill_value)) +
(x - x_floor) * static_cast<float>(read_with_fill_value(
batch, DenseIndex(y_floor), DenseIndex(x_ceil),
channel, fill_value));
// f(x, y_ceil) = (x_ceil - x) / (x_ceil - x_floor) * f(x_floor, y_ceil)
// + (x - x_floor) / (x_ceil - x_floor) * f(x_ceil, y_ceil)
const float value_yceil =
(x_ceil - x) * static_cast<float>(read_with_fill_value(
batch, DenseIndex(y_ceil), DenseIndex(x_floor),
channel, fill_value)) +
(x - x_floor) * static_cast<float>(read_with_fill_value(
batch, DenseIndex(y_ceil), DenseIndex(x_ceil),
channel, fill_value));
// f(x, y) = (y_ceil - y) / (y_ceil - y_floor) * f(x, y_floor)
// + (y - y_floor) / (y_ceil - y_floor) * f(x, y_ceil)
return T((y_ceil - y) * value_yfloor + (y - y_floor) * value_yceil);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T read_with_fill_value(
const DenseIndex batch, const DenseIndex y, const DenseIndex x,
const DenseIndex channel, const T fill_value) const {
// batch and channel must be correct, because they are passed unchanged from
// the input.
return (0 <= y && y < input_.dimension(1) && 0 <= x &&
x < input_.dimension(2))
? input_(array<DenseIndex, 4>{batch, y, x, channel})
: fill_value;
}
};
} // end namespace generator
// NOTE(ringwalt): We MUST wrap the generate() call in a functor and explicitly
// instantiate the functor in image_ops_gpu.cu.cc. Otherwise, we will be missing
// some Eigen device code.
namespace functor {
using generator::Interpolation;
using generator::ProjectiveGenerator;
template <typename Device, typename T>
struct FillProjectiveTransform {
typedef typename TTypes<T, 4>::Tensor OutputType;
typedef typename TTypes<T, 4>::ConstTensor InputType;
typedef typename TTypes<float, 2>::ConstTensor TransformsType;
const Interpolation interpolation_;
FillProjectiveTransform(Interpolation interpolation)
: interpolation_(interpolation) {}
EIGEN_ALWAYS_INLINE
void operator()(const Device& device, OutputType* output,
const InputType& images,
const TransformsType& transform) const {
output->device(device) = output->generate(
ProjectiveGenerator<Device, T>(images, transform, interpolation_));
}
};
} // end namespace functor
} // end namespace tensorflow
#endif // TENSORFLOW_CORE_KERNELS_IMAGE_OPS_H_

View File

@ -0,0 +1,43 @@
/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#if GOOGLE_CUDA
#define EIGEN_USE_GPU
#include "tensorflow/core/framework/register_types.h"
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/kernels/image_ops.h"
#include "tensorflow/core/platform/types.h"
namespace tensorflow {
namespace functor {
// Explicit instantiation of the GPU functor.
typedef Eigen::GpuDevice GPUDevice;
template class FillProjectiveTransform<GPUDevice, uint8>;
template class FillProjectiveTransform<GPUDevice, int32>;
template class FillProjectiveTransform<GPUDevice, int64>;
template class FillProjectiveTransform<GPUDevice, Eigen::half>;
template class FillProjectiveTransform<GPUDevice, float>;
template class FillProjectiveTransform<GPUDevice, double>;
} // end namespace functor
} // end namespace tensorflow
#endif // GOOGLE_CUDA

View File

@ -1021,4 +1021,22 @@ REGISTER_OP("GenerateBoundingBoxProposals")
c->set_output(1, prob_shape);
return Status::OK();
});
// TODO(ringwalt): Add a "fill_mode" attr with "constant", "mirror", etc.
// TODO(ringwalt): Add a "fill_constant" argument for constant mode (default 0).
// V2 op supports output_shape. V1 op is in contrib.
REGISTER_OP("ImageProjectiveTransformV2")
.Input("images: dtype")
.Input("transforms: float32")
.Input("output_shape: int32")
.Attr("dtype: {uint8, int32, int64, float16, float32, float64}")
.Attr("interpolation: string")
.Output("transformed_images: dtype")
.SetShapeFn([](InferenceContext* c) {
ShapeHandle input;
TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &input));
return SetOutputToSizedImage(c, c->Dim(input, 0), 2 /* size_input_idx */,
c->Dim(input, 3));
});
} // namespace tensorflow

View File

@ -18,9 +18,11 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.python.eager import context
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.framework import tensor_util
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.engine.base_layer import Layer
from tensorflow.python.keras.engine.input_spec import InputSpec
@ -28,7 +30,7 @@ from tensorflow.python.keras.utils import tf_utils
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import check_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.ops import image_ops_impl as image_ops
from tensorflow.python.ops import image_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import stateful_random_ops
from tensorflow.python.ops import stateless_random_ops
@ -382,6 +384,218 @@ class RandomFlip(Layer):
return dict(list(base_config.items()) + list(config.items()))
class RandomTranslation(Layer):
"""Randomly translate each image during training.
Arguments:
height_factor: a positive float represented as fraction of value, or a tuple
of size 2 representing lower and upper bound for shifting vertically. When
represented as a single float, this value is used for both the upper and
lower bound. For instance, `height_factor=(0.2, 0.3)` results in an output
height varying in the range `[original - 20%, original + 30%]`.
`height_factor=0.2` results in an output height varying in the range
`[original - 20%, original + 20%]`.
width_factor: a positive float represented as fraction of value, or a tuple
of size 2 representing lower and upper bound for shifting horizontally.
When represented as a single float, this value is used for both the upper
and lower bound.
fill_mode: Points outside the boundaries of the input are filled according
to the given mode (one of `{'nearest', 'bilinear'}`).
fill_value: Value used for points outside the boundaries of the input if
`mode='constant'`.
seed: Integer. Used to create a random seed.
Input shape:
4D tensor with shape: `(samples, height, width, channels)`,
data_format='channels_last'.
Output shape:
4D tensor with shape: `(samples, height, width, channels)`,
data_format='channels_last'.
Raise:
ValueError: if lower bound is not between [0, 1], or upper bound is
negative.
"""
def __init__(self,
height_factor,
width_factor,
fill_mode='nearest',
fill_value=0.,
seed=None,
**kwargs):
self.height_factor = height_factor
if isinstance(height_factor, (tuple, list)):
self.height_lower = abs(height_factor[0])
self.height_upper = height_factor[1]
else:
self.height_lower = self.height_upper = height_factor
if self.height_upper < 0.:
raise ValueError('`height_factor` cannot have negative values as upper '
'bound, got {}'.format(height_factor))
if abs(self.height_lower) > 1. or abs(self.height_upper) > 1.:
raise ValueError('`height_factor` must have values between [-1, 1], '
'got {}'.format(height_factor))
self.width_factor = width_factor
if isinstance(width_factor, (tuple, list)):
self.width_lower = abs(width_factor[0])
self.width_upper = width_factor[1]
else:
self.width_lower = self.width_upper = width_factor
if self.width_upper < 0.:
raise ValueError('`width_factor` cannot have negative values as upper '
'bound, got {}'.format(width_factor))
if abs(self.width_lower) > 1. or abs(self.width_upper) > 1.:
raise ValueError('`width_factor` must have values between [-1, 1], '
'got {}'.format(width_factor))
if fill_mode not in {'nearest', 'bilinear'}:
raise NotImplementedError(
'`fill_mode` {} is not supported yet.'.format(fill_mode))
self.fill_mode = fill_mode
self.fill_value = fill_value
self.seed = seed
self._rng = make_generator(self.seed)
self.input_spec = InputSpec(ndim=4)
super(RandomTranslation, self).__init__(**kwargs)
def call(self, inputs, training=None):
if training is None:
training = K.learning_phase()
def random_translated_inputs():
"""Translated inputs with random ops."""
inputs_shape = array_ops.shape(inputs)
batch_size = inputs_shape[0]
h_axis, w_axis = 1, 2
img_hd = math_ops.cast(inputs_shape[h_axis], dtypes.float32)
img_wd = math_ops.cast(inputs_shape[w_axis], dtypes.float32)
height_translate = self._rng.uniform(
shape=[batch_size, 1],
minval=-self.height_lower,
maxval=self.height_upper)
height_translate = height_translate * img_hd
width_translate = self._rng.uniform(
shape=[batch_size, 1],
minval=-self.width_lower,
maxval=self.width_upper)
width_translate = width_translate * img_wd
translations = math_ops.cast(
array_ops.concat([height_translate, width_translate], axis=1),
dtype=inputs.dtype)
return transform(
inputs,
get_translation_matrix(translations),
interpolation=self.fill_mode)
output = tf_utils.smart_cond(training, random_translated_inputs,
lambda: inputs)
output.set_shape(inputs.shape)
return output
def compute_output_shape(self, input_shape):
return input_shape
def get_config(self):
config = {
'height_factor': self.height_factor,
'width_factor': self.width_factor,
'fill_mode': self.fill_mode,
'fill_value': self.fill_value,
'seed': self.seed,
}
base_config = super(RandomTranslation, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def get_translation_matrix(translations, name=None):
"""Returns projective transform(s) for the given translation(s).
Args:
translations: A matrix of 2-element lists representing [dx, dy] to translate
for each image (for a batch of images).
name: The name of the op.
Returns:
A tensor of shape (num_images, 8) projective transforms which can be given
to `transform`.
"""
with ops.name_scope(name, 'translation_matrix'):
num_translations = array_ops.shape(translations)[0]
# The translation matrix looks like:
# [[1 0 -dx]
# [0 1 -dy]
# [0 0 1]]
# where the last entry is implicit.
# Translation matrices are always float32.
return array_ops.concat(
values=[
array_ops.ones((num_translations, 1), dtypes.float32),
array_ops.zeros((num_translations, 1), dtypes.float32),
-translations[:, 0, None],
array_ops.zeros((num_translations, 1), dtypes.float32),
array_ops.ones((num_translations, 1), dtypes.float32),
-translations[:, 1, None],
array_ops.zeros((num_translations, 2), dtypes.float32),
],
axis=1)
def transform(images,
transforms,
interpolation='nearest',
output_shape=None,
name=None):
"""Applies the given transform(s) to the image(s).
Args:
images: A tensor of shape (num_images, num_rows, num_columns, num_channels)
(NHWC), (num_rows, num_columns, num_channels) (HWC), or (num_rows,
num_columns) (HW). The rank must be statically known (the shape is not
`TensorShape(None)`.
transforms: Projective transform matrix/matrices. A vector of length 8 or
tensor of size N x 8. If one row of transforms is [a0, a1, a2, b0, b1, b2,
c0, c1], then it maps the *output* point `(x, y)` to a transformed *input*
point `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`, where
`k = c0 x + c1 y + 1`. The transforms are *inverted* compared to the
transform mapping input points to output points. Note that gradients are
not backpropagated into transformation parameters.
interpolation: Interpolation mode. Supported values: "NEAREST", "BILINEAR".
output_shape: Output dimesion after the transform, [height, width]. If None,
output is the same size as input image.
name: The name of the op.
Returns:
Image(s) with the same type and shape as `images`, with the given
transform(s) applied. Transformed coordinates outside of the input image
will be filled with zeros.
Raises:
TypeError: If `image` is an invalid type.
ValueError: If output shape is not 1-D int32 Tensor.
"""
with ops.name_scope(name, 'transform'):
if output_shape is None:
output_shape = array_ops.shape(images)[1:3]
if not context.executing_eagerly():
output_shape_value = tensor_util.constant_value(output_shape)
if output_shape_value is not None:
output_shape = output_shape_value
output_shape = ops.convert_to_tensor(
output_shape, dtypes.int32, name='output_shape')
if not output_shape.get_shape().is_compatible_with([2]):
raise ValueError('output_shape must be a 1-D Tensor of 2 elements: '
'new_height, new_width, instead got '
'{}'.format(output_shape))
return image_ops.image_projective_transform_v2(
images,
output_shape=output_shape,
transforms=transforms,
interpolation=interpolation.upper())
class RandomContrast(Layer):
"""Adjust the contrast of an image or images by a random factor.

View File

@ -27,6 +27,7 @@ from tensorflow.python.keras import keras_parameterized
from tensorflow.python.keras import testing_utils
from tensorflow.python.keras.layers.preprocessing import image_preprocessing
from tensorflow.python.keras.utils.generic_utils import CustomObjectScope
from tensorflow.python.ops import gen_stateful_random_ops
from tensorflow.python.ops import image_ops_impl as image_ops
from tensorflow.python.ops import random_ops
from tensorflow.python.ops import stateless_random_ops
@ -459,5 +460,58 @@ class RandomContrastTest(keras_parameterized.TestCase):
self.assertEqual(layer_1.name, layer.name)
@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
class RandomTranslationTest(keras_parameterized.TestCase):
def _run_test(self, height_factor, width_factor):
np.random.seed(1337)
num_samples = 2
orig_height = 5
orig_width = 8
channels = 3
kwargs = {'height_factor': height_factor, 'width_factor': width_factor}
with tf_test_util.use_gpu():
testing_utils.layer_test(
image_preprocessing.RandomTranslation,
kwargs=kwargs,
input_shape=(num_samples, orig_height, orig_width, channels),
expected_output_shape=(None, orig_height, orig_width, channels))
@parameterized.named_parameters(
('random_translate_4_by_6', .4, .6), ('random_translate_3_by_2', .3, .2),
('random_translate_tuple_factor', (.5, .4), (.2, .3)))
def test_random_translation(self, height_factor, width_factor):
self._run_test(height_factor, width_factor)
def test_random_translation_negative_lower(self):
mock_offset = np.random.random((12, 1))
with test.mock.patch.object(
gen_stateful_random_ops, 'stateful_uniform', return_value=mock_offset):
with self.cached_session(use_gpu=True):
layer = image_preprocessing.RandomTranslation((-0.2, .3), .4)
layer_2 = image_preprocessing.RandomTranslation((0.2, .3), .4)
inp = np.random.random((12, 5, 8, 3)).astype(np.float32)
actual_output = layer(inp, training=1)
actual_output_2 = layer_2(inp, training=1)
self.assertAllClose(actual_output, actual_output_2)
def test_random_translation_inference(self):
with CustomObjectScope(
{'RandomTranslation': image_preprocessing.RandomTranslation}):
input_images = np.random.random((2, 5, 8, 3)).astype(np.float32)
expected_output = input_images
with tf_test_util.use_gpu():
layer = image_preprocessing.RandomTranslation(.5, .5)
actual_output = layer(input_images, training=0)
self.assertAllClose(expected_output, actual_output)
@tf_test_util.run_v2_only
def test_config_with_custom_name(self):
layer = image_preprocessing.RandomTranslation(.5, .6, name='image_preproc')
config = layer.get_config()
layer_1 = image_preprocessing.RandomTranslation.from_config(config)
self.assertEqual(layer_1.name, layer.name)
if __name__ == '__main__':
test.main()

View File

@ -22,7 +22,12 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import gen_image_ops
from tensorflow.python.ops import linalg_ops
# go/tf-wildcard-import
# pylint: disable=wildcard-import
from tensorflow.python.ops.gen_image_ops import *
@ -34,3 +39,105 @@ from tensorflow.python.ops.image_ops_impl import *
from tensorflow.python.ops.image_ops_impl import _Check3DImage
from tensorflow.python.ops.image_ops_impl import _ImageDimensions
# pylint: enable=unused-import
_IMAGE_DTYPES = frozenset([
dtypes.uint8, dtypes.int32, dtypes.int64, dtypes.float16, dtypes.float32,
dtypes.float64
])
def flat_transforms_to_matrices(transforms):
"""Converts `tf.contrib.image` projective transforms to affine matrices.
Note that the output matrices map output coordinates to input coordinates. For
the forward transformation matrix, call `tf.linalg.inv` on the result.
Args:
transforms: Vector of length 8, or batches of transforms with shape `(N,
8)`.
Returns:
3D tensor of matrices with shape `(N, 3, 3)`. The output matrices map the
*output coordinates* (in homogeneous coordinates) of each transform to the
corresponding *input coordinates*.
Raises:
ValueError: If `transforms` have an invalid shape.
"""
with ops.name_scope("flat_transforms_to_matrices"):
transforms = ops.convert_to_tensor(transforms, name="transforms")
if transforms.shape.ndims not in (1, 2):
raise ValueError("Transforms should be 1D or 2D, got: %s" % transforms)
# Make the transform(s) 2D in case the input is a single transform.
transforms = array_ops.reshape(transforms, constant_op.constant([-1, 8]))
num_transforms = array_ops.shape(transforms)[0]
# Add a column of ones for the implicit last entry in the matrix.
return array_ops.reshape(
array_ops.concat(
[transforms, array_ops.ones([num_transforms, 1])], axis=1),
constant_op.constant([-1, 3, 3]))
def matrices_to_flat_transforms(transform_matrices):
"""Converts affine matrices to `tf.contrib.image` projective transforms.
Note that we expect matrices that map output coordinates to input coordinates.
To convert forward transformation matrices, call `tf.linalg.inv` on the
matrices and use the result here.
Args:
transform_matrices: One or more affine transformation matrices, for the
reverse transformation in homogeneous coordinates. Shape `(3, 3)` or `(N,
3, 3)`.
Returns:
2D tensor of flat transforms with shape `(N, 8)`, which may be passed into
`tf.contrib.image.transform`.
Raises:
ValueError: If `transform_matrices` have an invalid shape.
"""
with ops.name_scope("matrices_to_flat_transforms"):
transform_matrices = ops.convert_to_tensor(
transform_matrices, name="transform_matrices")
if transform_matrices.shape.ndims not in (2, 3):
raise ValueError("Matrices should be 2D or 3D, got: %s" %
transform_matrices)
# Flatten each matrix.
transforms = array_ops.reshape(transform_matrices,
constant_op.constant([-1, 9]))
# Divide each matrix by the last entry (normally 1).
transforms /= transforms[:, 8:9]
return transforms[:, :8]
@ops.RegisterGradient("ImageProjectiveTransformV2")
def _image_projective_transform_grad(op, grad):
"""Computes the gradient for ImageProjectiveTransform."""
images = op.inputs[0]
transforms = op.inputs[1]
interpolation = op.get_attr("interpolation")
image_or_images = ops.convert_to_tensor(images, name="images")
transform_or_transforms = ops.convert_to_tensor(
transforms, name="transforms", dtype=dtypes.float32)
if image_or_images.dtype.base_dtype not in _IMAGE_DTYPES:
raise TypeError("Invalid dtype %s." % image_or_images.dtype)
if len(transform_or_transforms.get_shape()) == 1:
transforms = transform_or_transforms[None]
elif len(transform_or_transforms.get_shape()) == 2:
transforms = transform_or_transforms
else:
raise TypeError("Transforms should have rank 1 or 2.")
# Invert transformations
transforms = flat_transforms_to_matrices(transforms=transforms)
inverse = linalg_ops.matrix_inverse(transforms)
transforms = matrices_to_flat_transforms(inverse)
output = gen_image_ops.image_projective_transform_v2(
images=grad,
transforms=transforms,
output_shape=array_ops.shape(image_or_images)[1:3],
interpolation=interpolation)
return [output, None, None]

View File

@ -1764,6 +1764,10 @@ tf_module {
name: "Imag"
argspec: "args=[\'input\', \'Tout\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'float32\'>\", \'None\'], "
}
member_method {
name: "ImageProjectiveTransformV2"
argspec: "args=[\'images\', \'transforms\', \'output_shape\', \'interpolation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
}
member_method {
name: "ImageSummary"
argspec: "args=[\'tag\', \'tensor\', \'max_images\', \'bad_color\', \'name\'], varargs=None, keywords=None, defaults=[\'3\', \'dtype: DT_UINT8\\ntensor_shape {\\n dim {\\n size: 4\\n }\\n}\\nint_val: 255\\nint_val: 0\\nint_val: 0\\nint_val: 255\\n\', \'None\'], "

View File

@ -1764,6 +1764,10 @@ tf_module {
name: "Imag"
argspec: "args=[\'input\', \'Tout\', \'name\'], varargs=None, keywords=None, defaults=[\"<dtype: \'float32\'>\", \'None\'], "
}
member_method {
name: "ImageProjectiveTransformV2"
argspec: "args=[\'images\', \'transforms\', \'output_shape\', \'interpolation\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], "
}
member_method {
name: "ImageSummary"
argspec: "args=[\'tag\', \'tensor\', \'max_images\', \'bad_color\', \'name\'], varargs=None, keywords=None, defaults=[\'3\', \'dtype: DT_UINT8\\ntensor_shape {\\n dim {\\n size: 4\\n }\\n}\\nint_val: 255\\nint_val: 0\\nint_val: 0\\nint_val: 255\\n\', \'None\'], "