Fix DecodeImage issue where decoding animated GIF fails if first frame does not fill the canvas.

PiperOrigin-RevId: 337830066 Change-Id: Iecdf4cb7f6815f65e7abceb85b61e8d85d80fa9e
2020-10-19 04:40:07 -07:00 · 2020-10-19 04:40:07 -07:00 · de10689433
commit de10689433
parent ab4fc59e91
7 changed files with 67 additions and 16 deletions
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@ -2128,6 +2128,8 @@ filegroup(
        # GIF data
        "lib/gif/testdata/lena.gif",
        "lib/gif/testdata/scan.gif",
+        "lib/gif/testdata/red_black.gif",
+        "lib/gif/testdata/squares.gif",
        # GIF data with optimization
        "lib/gif/testdata/optimized.gif",
        # BMP data
--- a/tensorflow/core/api_def/base_api/api_def_DecodeImage.pbtxt
+++ b/tensorflow/core/api_def/base_api/api_def_DecodeImage.pbtxt
@ -47,5 +47,11 @@ constructing your graph if you are intermixing GIF files with BMP, JPEG, and/or
 PNG files. Alternately, set the expand_animations argument of this function to
 False, in which case the op will return 3-dimensional tensors and will truncate
 animated GIF files to the first frame.
+
+*NOTE*: If the first frame of an animated GIF does not occupy the entire
+canvas (maximum frame width x maximum frame height), then it fills the
+unoccupied areas (in the first frame) with zeros (black). For frames after the
+first frame that does not occupy the entire canvas, it uses the previous
+frame to fill the unoccupied areas.
 END
 }
--- a/tensorflow/core/lib/gif/gif_io.cc
+++ b/tensorflow/core/lib/gif/gif_io.cc
@ -85,7 +85,6 @@ uint8* Decode(const void* srcdata, int datasize,
  }

  int target_num_frames = gif_file->ImageCount;
-  if (!expand_animations) target_num_frames = 1;

  // Don't request more memory than needed for each frame, preventing OOM
  int max_frame_width = 0;
@ -101,6 +100,7 @@ uint8* Decode(const void* srcdata, int datasize,
  const int width = max_frame_width;
  const int height = max_frame_height;
  const int channel = 3;
+  if (!expand_animations) target_num_frames = 1;

  uint8* const dstdata =
      allocate_output(target_num_frames, width, height, channel);
@ -118,27 +118,36 @@ uint8* Decode(const void* srcdata, int datasize,

    if (img_desc->Left != 0 || img_desc->Top != 0 || img_desc->Width != width ||
        img_desc->Height != height) {
-      // If the first frame does not fill the entire canvas then return error.
+      // If the first frame does not fill the entire canvas then fill the
+      // unoccupied canvas with zeros (black).
      if (k == 0) {
-        *error_string = "the first frame does not fill the canvas";
-        return nullptr;
+        for (int i = 0; i < height; ++i) {
+          uint8* p_dst = this_dst + i * width * channel;
+          for (int j = 0; j < width; ++j) {
+            p_dst[j * channel + 0] = 0;
+            p_dst[j * channel + 1] = 0;
+            p_dst[j * channel + 2] = 0;
+          }
+        }
+      } else {
+        // Otherwise previous frame will be reused to fill the unoccupied
+        // canvas.
+        uint8* last_dst = dstdata + (k - 1) * width * channel * height;
+        for (int i = 0; i < height; ++i) {
+          uint8* p_dst = this_dst + i * width * channel;
+          uint8* l_dst = last_dst + i * width * channel;
+          for (int j = 0; j < width; ++j) {
+            p_dst[j * channel + 0] = l_dst[j * channel + 0];
+            p_dst[j * channel + 1] = l_dst[j * channel + 1];
+            p_dst[j * channel + 2] = l_dst[j * channel + 2];
+          }
+        }
      }
-      // Otherwise previous frame will be reused to fill the unoccupied canvas.
+
      imgLeft = std::max(imgLeft, 0);
      imgTop = std::max(imgTop, 0);
      imgRight = std::min(imgRight, width);
      imgBottom = std::min(imgBottom, height);
-
-      uint8* last_dst = dstdata + (k - 1) * width * channel * height;
-      for (int i = 0; i < height; ++i) {
-        uint8* p_dst = this_dst + i * width * channel;
-        uint8* l_dst = last_dst + i * width * channel;
-        for (int j = 0; j < width; ++j) {
-          p_dst[j * channel + 0] = l_dst[j * channel + 0];
-          p_dst[j * channel + 1] = l_dst[j * channel + 1];
-          p_dst[j * channel + 2] = l_dst[j * channel + 2];
-        }
-      }
    }

    ColorMapObject* color_map = this_image->ImageDesc.ColorMap
--- a/tensorflow/core/lib/gif/testdata/red_black.gif
+++ b/tensorflow/core/lib/gif/testdata/red_black.gif
--- a/tensorflow/core/lib/gif/testdata/squares.gif
+++ b/tensorflow/core/lib/gif/testdata/squares.gif
--- a/tensorflow/python/ops/image_ops_impl.py
+++ b/tensorflow/python/ops/image_ops_impl.py
@ -2964,6 +2964,12 @@ def decode_image(contents,
  function to `False`, in which case the op will return 3-dimensional tensors
  and will truncate animated GIF files to the first frame.

+  NOTE: If the first frame of an animated GIF does not occupy the entire
+  canvas (maximum frame width x maximum frame height), then it fills the
+  unoccupied areas (in the first frame) with zeros (black). For frames after the
+  first frame that does not occupy the entire canvas, it uses the previous
+  frame to fill the unoccupied areas.
+
  Args:
    contents: 0-D `string`. The encoded image bytes.
    channels: An optional `int`. Defaults to `0`. Number of color channels for
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@ -5756,6 +5756,34 @@ class DecodeImageTest(test_util.TensorFlowTestCase, parameterized.TestCase):
      img = image_ops.decode_and_crop_jpeg(img_bytes, [1, 1, 2, 2])
      self.evaluate(img)

+  def testGifFramesWithDiffSize(self):
+    """Test decoding an animated GIF.
+
+    This test verifies that `decode_image` op can decode animated GIFs whose
+    first frame does not fill the canvas. The unoccupied areas should be filled
+    with zeros (black).
+
+    `squares.gif` is animated with two images of different sizes. It
+    alternates between a smaller image of size 10 x 10 and a larger image of
+    size 16 x 16. Because it starts animating with the smaller image, the first
+    frame does not fill the canvas. (Canvas size is equal to max frame width x
+    max frame height.)
+
+    `red_black.gif` has just a single image in a GIF format. It is the same
+    image as the smaller image (size 10 x 10) of the two images in
+    `squares.gif`. The only difference is that its background (canvas - smaller
+    image) is pre-filled with zeros (black); it is the groundtruth.
+    """
+    base = "tensorflow/core/lib/gif/testdata"
+    gif_bytes0 = io_ops.read_file(os.path.join(base, "squares.gif"))
+    image0 = image_ops.decode_image(gif_bytes0, dtype=dtypes.float32,
+                                    expand_animations=False)
+    gif_bytes1 = io_ops.read_file(os.path.join(base, "red_black.gif"))
+    image1 = image_ops.decode_image(gif_bytes1, dtype=dtypes.float32)
+    image1_0 = array_ops.gather(image1, 0)
+    image0, image1_0 = self.evaluate([image0, image1_0])
+    self.assertAllEqual(image0, image1_0)
+

 if __name__ == "__main__":
  googletest.main()