Merge pull request #5388 from vrv/master

Revert IDCT jpeg change and nvcc flag change
2016-11-03 15:48:41 -07:00 · 2016-11-03 15:48:41 -07:00 · a507438361
commit a507438361
parent 501b7137a9 14bc025ffb
4 changed files with 6 additions and 9 deletions
--- a/tensorflow/core/lib/jpeg/jpeg_mem.cc
+++ b/tensorflow/core/lib/jpeg/jpeg_mem.cc
@ -139,11 +139,8 @@ uint8* UncompressLow(const void* srcdata, FewerArgsForCompiler* argball) {
  cinfo.do_fancy_upsampling = boolean(flags.fancy_upscaling);
  cinfo.scale_num = 1;
  cinfo.scale_denom = ratio;
-  // Activating this has a quality/speed trade-off implication.
+  // Activating this has a quality/speed trade-off implication:
-  // However, most JPEGs are already compressed, and so the faster IDCT
+  // cinfo.dct_method = JDCT_IFAST;
  // should have no effect on training. The fast setting speeds up training on the
  // GPU, e.g. by about 30% for smaller networks such as AlexNet.
  cinfo.dct_method = JDCT_IFAST;
  jpeg_start_decompress(&cinfo);
--- a/tensorflow/core/lib/jpeg/jpeg_mem_unittest.cc
+++ b/tensorflow/core/lib/jpeg/jpeg_mem_unittest.cc
@ -165,7 +165,7 @@ TEST(JpegMemTest, Jpeg2) {
    // Compare the two images
    const int totalerr = ComputeSumAbsoluteDifference(
        imgdata1.get(), refdata1.get(), in_w, in_h, stride1, stride1);
-    CHECK_LE(totalerr, 120000);
+    CHECK_LE(totalerr, 85000);
  }
  // check the second image too. Should be bitwise identical to the first.
--- a/tensorflow/python/ops/image_ops_test.py
+++ b/tensorflow/python/ops/image_ops_test.py
@ -1803,10 +1803,10 @@ class JpegTest(test_util.TensorFlowTestCase):
      jpeg0, image0, image1, image2 = sess.run([jpeg0, image0, image1, image2])
      # The decoded-encoded image should be similar to the input
-      self.assertLess(self.averageError(image0, image1), 0.7)
+      self.assertLess(self.averageError(image0, image1), 0.6)
      # We should be very close to a fixpoint
-      self.assertLess(self.averageError(image1, image2), 0.6)
+      self.assertLess(self.averageError(image1, image2), 0.02)
      # Smooth ramps compress well (input size is 153600)
      self.assertGreaterEqual(len(jpeg0), 5000)
--- a/tensorflow/tensorflow.bzl
+++ b/tensorflow/tensorflow.bzl
@ -416,7 +416,7 @@ def _cuda_copts():
        "@local_config_cuda//cuda:using_nvcc": (
            common_cuda_opts +
            [
-                "-nvcc_options=expt-relaxed-constexpr",
+                "-nvcc_options=relaxed-constexpr",
                "-nvcc_options=ftz=true",
            ]
        ),