Merge pull request #5388 from vrv/master
Revert IDCT jpeg change and nvcc flag change
This commit is contained in:
commit
a507438361
@ -139,11 +139,8 @@ uint8* UncompressLow(const void* srcdata, FewerArgsForCompiler* argball) {
|
|||||||
cinfo.do_fancy_upsampling = boolean(flags.fancy_upscaling);
|
cinfo.do_fancy_upsampling = boolean(flags.fancy_upscaling);
|
||||||
cinfo.scale_num = 1;
|
cinfo.scale_num = 1;
|
||||||
cinfo.scale_denom = ratio;
|
cinfo.scale_denom = ratio;
|
||||||
// Activating this has a quality/speed trade-off implication.
|
// Activating this has a quality/speed trade-off implication:
|
||||||
// However, most JPEGs are already compressed, and so the faster IDCT
|
// cinfo.dct_method = JDCT_IFAST;
|
||||||
// should have no effect on training. The fast setting speeds up training on the
|
|
||||||
// GPU, e.g. by about 30% for smaller networks such as AlexNet.
|
|
||||||
cinfo.dct_method = JDCT_IFAST;
|
|
||||||
|
|
||||||
jpeg_start_decompress(&cinfo);
|
jpeg_start_decompress(&cinfo);
|
||||||
|
|
||||||
|
@ -165,7 +165,7 @@ TEST(JpegMemTest, Jpeg2) {
|
|||||||
// Compare the two images
|
// Compare the two images
|
||||||
const int totalerr = ComputeSumAbsoluteDifference(
|
const int totalerr = ComputeSumAbsoluteDifference(
|
||||||
imgdata1.get(), refdata1.get(), in_w, in_h, stride1, stride1);
|
imgdata1.get(), refdata1.get(), in_w, in_h, stride1, stride1);
|
||||||
CHECK_LE(totalerr, 120000);
|
CHECK_LE(totalerr, 85000);
|
||||||
}
|
}
|
||||||
|
|
||||||
// check the second image too. Should be bitwise identical to the first.
|
// check the second image too. Should be bitwise identical to the first.
|
||||||
|
@ -1803,10 +1803,10 @@ class JpegTest(test_util.TensorFlowTestCase):
|
|||||||
jpeg0, image0, image1, image2 = sess.run([jpeg0, image0, image1, image2])
|
jpeg0, image0, image1, image2 = sess.run([jpeg0, image0, image1, image2])
|
||||||
|
|
||||||
# The decoded-encoded image should be similar to the input
|
# The decoded-encoded image should be similar to the input
|
||||||
self.assertLess(self.averageError(image0, image1), 0.7)
|
self.assertLess(self.averageError(image0, image1), 0.6)
|
||||||
|
|
||||||
# We should be very close to a fixpoint
|
# We should be very close to a fixpoint
|
||||||
self.assertLess(self.averageError(image1, image2), 0.6)
|
self.assertLess(self.averageError(image1, image2), 0.02)
|
||||||
|
|
||||||
# Smooth ramps compress well (input size is 153600)
|
# Smooth ramps compress well (input size is 153600)
|
||||||
self.assertGreaterEqual(len(jpeg0), 5000)
|
self.assertGreaterEqual(len(jpeg0), 5000)
|
||||||
|
@ -416,7 +416,7 @@ def _cuda_copts():
|
|||||||
"@local_config_cuda//cuda:using_nvcc": (
|
"@local_config_cuda//cuda:using_nvcc": (
|
||||||
common_cuda_opts +
|
common_cuda_opts +
|
||||||
[
|
[
|
||||||
"-nvcc_options=expt-relaxed-constexpr",
|
"-nvcc_options=relaxed-constexpr",
|
||||||
"-nvcc_options=ftz=true",
|
"-nvcc_options=ftz=true",
|
||||||
]
|
]
|
||||||
),
|
),
|
||||||
|
Loading…
Reference in New Issue
Block a user