From 2aeedc38c604f3e9f028d696247512ea1c8522ee Mon Sep 17 00:00:00 2001 From: Marek Kolodziej Date: Mon, 31 Oct 2016 11:39:24 -0700 Subject: [PATCH] Use fast IDCT for JPEG decoding by default (#5072) --- tensorflow/core/lib/jpeg/jpeg_mem.cc | 7 +++++-- tensorflow/core/lib/jpeg/jpeg_mem_unittest.cc | 2 +- tensorflow/python/ops/image_ops_test.py | 4 ++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/lib/jpeg/jpeg_mem.cc b/tensorflow/core/lib/jpeg/jpeg_mem.cc index 426120b2100..56c247560b0 100644 --- a/tensorflow/core/lib/jpeg/jpeg_mem.cc +++ b/tensorflow/core/lib/jpeg/jpeg_mem.cc @@ -139,8 +139,11 @@ uint8* UncompressLow(const void* srcdata, FewerArgsForCompiler* argball) { cinfo.do_fancy_upsampling = boolean(flags.fancy_upscaling); cinfo.scale_num = 1; cinfo.scale_denom = ratio; - // Activating this has a quality/speed trade-off implication: - // cinfo.dct_method = JDCT_IFAST; + // Activating this has a quality/speed trade-off implication. + // However, most JPEGs are already compressed, and so the faster IDCT + // should have no effect on training. The fast setting speeds up training on the + // GPU, e.g. by about 30% for smaller networks such as AlexNet. + cinfo.dct_method = JDCT_IFAST; jpeg_start_decompress(&cinfo); diff --git a/tensorflow/core/lib/jpeg/jpeg_mem_unittest.cc b/tensorflow/core/lib/jpeg/jpeg_mem_unittest.cc index d0b4bef5d47..fc3961ad6ae 100644 --- a/tensorflow/core/lib/jpeg/jpeg_mem_unittest.cc +++ b/tensorflow/core/lib/jpeg/jpeg_mem_unittest.cc @@ -165,7 +165,7 @@ TEST(JpegMemTest, Jpeg2) { // Compare the two images const int totalerr = ComputeSumAbsoluteDifference( imgdata1.get(), refdata1.get(), in_w, in_h, stride1, stride1); - CHECK_LE(totalerr, 85000); + CHECK_LE(totalerr, 120000); } // check the second image too. Should be bitwise identical to the first. diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py index 4d379fb3886..475019f3b41 100644 --- a/tensorflow/python/ops/image_ops_test.py +++ b/tensorflow/python/ops/image_ops_test.py @@ -1803,10 +1803,10 @@ class JpegTest(test_util.TensorFlowTestCase): jpeg0, image0, image1, image2 = sess.run([jpeg0, image0, image1, image2]) # The decoded-encoded image should be similar to the input - self.assertLess(self.averageError(image0, image1), 0.6) + self.assertLess(self.averageError(image0, image1), 0.7) # We should be very close to a fixpoint - self.assertLess(self.averageError(image1, image2), 0.02) + self.assertLess(self.averageError(image1, image2), 0.6) # Smooth ramps compress well (input size is 153600) self.assertGreaterEqual(len(jpeg0), 5000)