Turn on CUDNN autotune by default.

* For Soumith's Convnet benchmarks,
  * GoogleNet V1 forward only becomes 2.82% faster.
  * VGG forward+backward becomes 3.48% faster.
  * GoogleNet V1 forwrad+backwar becomes 1.66% faster.
* For Inception model at batch size 32, on Titan-X,
  * The step size is reduced from 1.11 sec to 0.97 sec. A 16.32% improvement.
* For microbenchmarks. Here are the improvement.
Benchmark                          Base (ns)  New (ns) Improvement
------------------------------------------------------------------
BM_ConvFloatFwdGPU_conv0              235083    230767     +1.8%
BM_ConvFloatFwdGPU_conv1             1075211   1107800     -3.0%
BM_ConvFloatFwdGPU_conv2             1015770   1044204     -2.8%
BM_ConvFloatFwdGPU_conv3             1338677   1333840     +0.4%
BM_ConvFloatFwdGPU_conv4             1724488   1727875     -0.2%
BM_ConvFloatFwdGPU_conv5             1504656   1531122     -1.8%
BM_ConvFloatFwdGPU_conv6             1912314   1921835     -0.5%
BM_ConvFloatFwdGPU_conv7              833252    812320     +2.5%
BM_ConvFloatFwdGPU_conv8              704842    742914     -5.4%
BM_ConvFloatFwdGPU_conv9             1181595   1153533     +2.4%
BM_ConvFloatFwdGPU_conv10            1626990   1602748     +1.5%
BM_ConvFloatFwdGPU_conv11            1266993   1334205     -5.3%
BM_ConvFloatFwdGPU_conv12             778462    767860     +1.4%
BM_ConvFloatFwdGPU_conv13            3850331   2107377    +45.3%
BM_ConvFloatFwdGPU_conv14            4126061   4180073     -1.3%
BM_ConvFloatFwdGPU_conv15             678327    675829     +0.4%
BM_ConvFloatFwdGPU_conv16            1337845   1324671     +1.0%
BM_ConvFloatFwdGPU_conv17            1605443   1609892     -0.3%
BM_ConvFloatFwdGPU_conv18            1501101   1504725     -0.2%
BM_ConvFloatFwdGPU_conv19            1591419   1465860     +7.9%
BM_ConvFloatFwdGPU_conv20            3978635   4008382     -0.7%
BM_ConvFloatFwdGPU_conv21            1512956   1491781     +1.4%
BM_ConvFloatFwdGPU_conv22            1512534   1492847     +1.3%
BM_ConvFloatFwdGPU_conv23            4250634   2449213    +42.4%
BM_ConvFloatFwdGPU_conv24            1252755   1250047     +0.2%
BM_ConvFloatFwdGPU_conv25            3771888   3727033     +1.2%
BM_ConvFloatFwdGPU_conv26            1176322   1188693     -1.1%
BM_ConvFloatFwdGPU_conv27            1190219   1166078     +2.0%
BM_ConvFloatFwdGPU_conv28            1736335   1738561     -0.1%
BM_ConvFloatFwdGPU_conv29            2470491   2526576     -2.3%
BM_ConvFloatFwdGPU_conv30             880584    845829     +3.9%
BM_ConvFloatFwdGPU_conv31             950092    971105     -2.2%
BM_ConvFloatFwdGPU_conv32            1968954   1987465     -0.9%
BM_ConvFloatFwdGPU_conv33             918658    900723     +2.0%
BM_ConvFloatFwdGPU_conv34            1458851   1462193     -0.2%
BM_ConvFloatFwdGPU_conv35             687912    667973     +2.9%
BM_ConvFloatFwdGPU_conv36            2475984   2447614     +1.1%
BM_ConvFloatFwdGPU_conv37             691891    693028     -0.2%
BM_ConvFloatFwdGPU_conv38            1024818   1028796     -0.4%
BM_ConvFloatFwdGPU_conv39             792712    804433     -1.5%
BM_ConvFloatFwdGPU_conv40            2866529   2831128     +1.2%
BM_ConvFloatFwdGPU_conv41             825719    821005     +0.6%
BM_ConvFloatFwdGPU_conv42            6178692   6055745     +2.0%
BM_ConvFloatFwdGPU_conv43            1770585   1758200     +0.7%
BM_ConvFloatFwdGPU_conv44            1101019   1121091     -1.8%
BM_ConvFloatFwdGPU_conv45             953025    974867     -2.3%
BM_ConvFloatFwdGPU_conv46            1976171   1907614     +3.5%
BM_ConvFloatFwdGPU_conv47             926263    930521     -0.5%
BM_ConvFloatFwdGPU_conv48            2486172   2451860     +1.4%
BM_ConvFloatFwdGPU_conv49             612463    619752     -1.2%
BM_ConvFloatFwdGPU_conv50             669415    688190     -2.8%
BM_ConvFloatFwdGPU_conv51             669922    642478     +4.1%
BM_ConvFloatFwdGPU_conv52           13375846  13288659     +0.7%
BM_ConvFloatFwdGPU_conv53            1165725   1180657     -1.3%
BM_ConvFloatFwdGPU_conv54            8067519   7854240     +2.6%
BM_ConvFloatBkInGPU_conv0             211400    182926    +13.5%
BM_ConvFloatBkFilterGPU_conv0         202238    167241    +17.3%
BM_ConvFloatBkInGPU_conv1            1498547   1037392    +30.8%
BM_ConvFloatBkFilterGPU_conv1         781429    754140     +3.5%
BM_ConvFloatBkInGPU_conv2            1542272    970764    +37.1%
BM_ConvFloatBkFilterGPU_conv2         827241    812901     +1.7%
BM_ConvFloatBkInGPU_conv3             554840    575366     -3.7%
BM_ConvFloatBkFilterGPU_conv3        1390099   1297130     +6.7%
BM_ConvFloatBkInGPU_conv4            2679557   2629239     +1.9%
BM_ConvFloatBkFilterGPU_conv4        2391921   2415243     -1.0%
BM_ConvFloatBkInGPU_conv5             775991    819557     -5.6%
BM_ConvFloatBkFilterGPU_conv5        1518350   1555425     -2.4%
BM_ConvFloatBkInGPU_conv6            1164620   1124455     +3.4%
BM_ConvFloatBkFilterGPU_conv6        1886563   1878187     +0.4%
BM_ConvFloatBkInGPU_conv7            1014010    997501     +1.6%
BM_ConvFloatBkFilterGPU_conv7         836499    818683     +2.1%
BM_ConvFloatBkInGPU_conv8            1096261    976079    +11.0%
BM_ConvFloatBkFilterGPU_conv8         819271    809679     +1.2%
BM_ConvFloatBkInGPU_conv9             638050    599533     +6.0%
BM_ConvFloatBkFilterGPU_conv9        1204812   1178843     +2.2%
BM_ConvFloatBkInGPU_conv10           1158430   1223196     -5.6%
BM_ConvFloatBkFilterGPU_conv10       1732046   1718558     +0.8%
BM_ConvFloatBkInGPU_conv11            940582    890771     +5.3%
BM_ConvFloatBkFilterGPU_conv11       1538670   1436865     +6.6%
BM_ConvFloatBkInGPU_conv12           6819839    960485    +85.9%
BM_ConvFloatBkFilterGPU_conv12        686978    730785     -6.4%
BM_ConvFloatBkInGPU_conv13           2193316   2206764     -0.6%
BM_ConvFloatBkFilterGPU_conv13       3938868   2091134    +46.9%
BM_ConvFloatBkInGPU_conv14           2035871   2138318     -5.0%
BM_ConvFloatBkFilterGPU_conv14       4029626   4033444     -0.1%
BM_ConvFloatBkInGPU_conv15           6997156    890109    +87.3%
BM_ConvFloatBkFilterGPU_conv15        740402    701366     +5.3%
BM_ConvFloatBkInGPU_conv16           1424744   1406938     +1.2%
BM_ConvFloatBkFilterGPU_conv16       1671854   1462868    +12.5%
BM_ConvFloatBkInGPU_conv17           2700862   1992674    +26.2%
BM_ConvFloatBkFilterGPU_conv17       1305656   1322830     -1.3%
BM_ConvFloatBkInGPU_conv18           2957025   1864698    +36.9%
BM_ConvFloatBkFilterGPU_conv18       1225843   1221011     +0.4%
BM_ConvFloatBkInGPU_conv19           2983442   1838917    +38.4%
BM_ConvFloatBkFilterGPU_conv19       1143908   1181473     -3.3%
BM_ConvFloatBkInGPU_conv20           1746891   1792048     -2.6%
BM_ConvFloatBkFilterGPU_conv20       3858859   3947101     -2.3%
BM_ConvFloatBkInGPU_conv21           1049381   1057465     -0.8%
BM_ConvFloatBkFilterGPU_conv21       1960184   1963597     -0.2%
BM_ConvFloatBkInGPU_conv22           2709485   1962671    +27.6%
BM_ConvFloatBkFilterGPU_conv22       1347473   1337113     +0.8%
BM_ConvFloatBkInGPU_conv23           2488277   2444806     +1.7%
BM_ConvFloatBkFilterGPU_conv23       2393383   2361463     +1.3%
BM_ConvFloatBkInGPU_conv24           2317770   1555267    +32.9%
BM_ConvFloatBkFilterGPU_conv24       1005172    987688     +1.7%
BM_ConvFloatBkInGPU_conv25           1282727   1313422     -2.4%
BM_ConvFloatBkFilterGPU_conv25       3467895   3520604     -1.5%
BM_ConvFloatBkInGPU_conv26            931302    887955     +4.7%
BM_ConvFloatBkFilterGPU_conv26       1413088   1348387     +4.6%
BM_ConvFloatBkInGPU_conv27           2285721   1501425    +34.3%
BM_ConvFloatBkFilterGPU_conv27       1209520   1168316     +3.4%
BM_ConvFloatBkInGPU_conv28           2157998   2157376     +0.0%
BM_ConvFloatBkFilterGPU_conv28       3074795   1853044    +39.7%
BM_ConvFloatBkInGPU_conv29           1144831   1075297     +6.1%
BM_ConvFloatBkFilterGPU_conv29       2340646   2340184     +0.0%
BM_ConvFloatBkInGPU_conv30            858060    837645     +2.4%
BM_ConvFloatBkFilterGPU_conv30       1315830   1353214     -2.8%
BM_ConvFloatBkInGPU_conv31           1224674   1128456     +7.9%
BM_ConvFloatBkFilterGPU_conv31        707870    726953     -2.7%
BM_ConvFloatBkInGPU_conv32            996074   1014381     -1.8%
BM_ConvFloatBkFilterGPU_conv32       2107132   2063072     +2.1%
BM_ConvFloatBkInGPU_conv33           1223802   1110516     +9.3%
BM_ConvFloatBkFilterGPU_conv33        912262    862748     +5.4%
BM_ConvFloatBkInGPU_conv34           1466738   1551351     -5.8%
BM_ConvFloatBkFilterGPU_conv34       1974404   1923035     +2.6%
BM_ConvFloatBkInGPU_conv35            922659    939845     -1.9%
BM_ConvFloatBkFilterGPU_conv35        612561    568035     +7.3%
BM_ConvFloatBkInGPU_conv36            908895    895344     +1.5%
BM_ConvFloatBkFilterGPU_conv36       2953348   2899676     +1.8%
BM_ConvFloatBkInGPU_conv37            938952    892167     +5.0%
BM_ConvFloatBkFilterGPU_conv37        577438    569059     +1.5%
BM_ConvFloatBkInGPU_conv38           1138055   1096089     +3.7%
BM_ConvFloatBkFilterGPU_conv38       1011368   1008415     +0.3%
BM_ConvFloatBkInGPU_conv39            668144    673298     -0.8%
BM_ConvFloatBkFilterGPU_conv39       1358847   1298098     +4.5%
BM_ConvFloatBkInGPU_conv40           1380139   1331826     +3.5%
BM_ConvFloatBkFilterGPU_conv40       3541527   3069069    +13.3%
BM_ConvFloatBkInGPU_conv41           1638383   1595251     +2.6%
BM_ConvFloatBkFilterGPU_conv41       1005443    987946     +1.7%
BM_ConvFloatBkInGPU_conv42          17024559  10725787    +37.0%
BM_ConvFloatBkFilterGPU_conv42       6567765   6515355     +0.8%
BM_ConvFloatBkInGPU_conv43           1780598   1708543     +4.0%
BM_ConvFloatBkFilterGPU_conv43       2356016   2281999     +3.1%
BM_ConvFloatBkInGPU_conv44            931335    971200     -4.3%
BM_ConvFloatBkFilterGPU_conv44       1346236   1339928     +0.5%
BM_ConvFloatBkInGPU_conv45            610336    608156     +0.4%
BM_ConvFloatBkFilterGPU_conv45       1247724   1208773     +3.1%
BM_ConvFloatBkInGPU_conv46           3368269   2161475    +35.8%
BM_ConvFloatBkFilterGPU_conv46       2161988   2140970     +1.0%
BM_ConvFloatBkInGPU_conv47            500600    549664     -9.8%
BM_ConvFloatBkFilterGPU_conv47       1239103   1201332     +3.0%
BM_ConvFloatBkInGPU_conv48           2505748   2487250     +0.7%
BM_ConvFloatBkFilterGPU_conv48       3181887   3196408     -0.5%
BM_ConvFloatBkInGPU_conv49            654636    752578    -15.0%
BM_ConvFloatBkFilterGPU_conv49        614054    640264     -4.3%
BM_ConvFloatBkInGPU_conv50           1046576   1022585     +2.3%
BM_ConvFloatBkFilterGPU_conv50        928998    884173     +4.8%
BM_ConvFloatBkInGPU_conv51            831912    805962     +3.1%
BM_ConvFloatBkFilterGPU_conv51        833301    890314     -6.8%
BM_ConvFloatBkInGPU_conv52          13575989  13244294     +2.4%
BM_ConvFloatBkFilterGPU_conv52      26960865  14528291    +46.1%
BM_ConvFloatBkInGPU_conv53           1212746   1193415     +1.6%
BM_ConvFloatBkFilterGPU_conv53       1617787   1599532     +1.1%
BM_ConvFloatBkInGPU_conv54           7143853   9979079    -39.7%
BM_ConvFloatBkFilterGPU_conv54       7261642   9516172    -31.0%
BM_ConvFloatBkFGPU_128_128_128_3_96_11_11 65676662  67077190     -2.1%
BM_ConvFloatBkFGPU_128_64_64_64_128_9_9 30481444  30479591     +0.0%
BM_ConvFloatBkFGPU_128_32_32_128_128_9_9  9184052   9309441     -1.4%
BM_ConvFloatBkFGPU_128_16_16_128_128_7_7  1783974   1728034     +3.1%
BM_ConvFloatBkFGPU_128_13_13_384_384_3_3  9793620   9728267     +0.7%
BM_ConvFloatDepthwiseFwdGPU_conv0    2485053   2423786     +2.5%
BM_ConvFloatDepthwiseFwdGPU_conv1    9232311   9385025     -1.7%
BM_ConvFloatDepthwiseFwdGPU_conv2    3951763   4081355     -3.3%
BM_ConvFloatDepthwiseFwdGPU_conv3    1072853   1075711     -0.3%
BM_ConvFloatDepthwiseFwdGPU_conv4     857038    834950     +2.6%
BM_ConvFloatDepthwiseFwdGPU_conv5     849175    851622     -0.3%
BM_ConvFloatDepthwiseFwdGPU_conv6     492490    463820     +5.8%
BM_ConvFloatDepthwiseFwdGPU_conv7     699378    715631     -2.3%
BM_ConvFloatDepthwiseFwdGPU_conv8     655144    622416     +5.0%
BM_ConvFloatDepthwiseBkInGPU_conv0   2521530   2564153     -1.7%
BM_ConvFloatDepthwiseBkFilterGPU_conv0 65102549  65026603     +0.1%
BM_ConvFloatDepthwiseBkInGPU_conv1   9572755  11527412    -20.4%
BM_ConvFloatDepthwiseBkFilterGPU_conv1 67341583  66854785     +0.7%
BM_ConvFloatDepthwiseBkInGPU_conv2   5038497   5191688     -3.0%
BM_ConvFloatDepthwiseBkFilterGPU_conv2 17959694  18075388     -0.6%
BM_ConvFloatDepthwiseBkInGPU_conv3   2733609   2583003     +5.5%
BM_ConvFloatDepthwiseBkFilterGPU_conv3  5430360   5479402     -0.9%
BM_ConvFloatDepthwiseBkInGPU_conv4    895653    933757     -4.3%
BM_ConvFloatDepthwiseBkFilterGPU_conv4  8054025   8008655     +0.6%
BM_ConvFloatDepthwiseBkInGPU_conv5    859509    818970     +4.7%
BM_ConvFloatDepthwiseBkFilterGPU_conv5  3151537   3052507     +3.1%
BM_ConvFloatDepthwiseBkInGPU_conv6    482079    474622     +1.5%
BM_ConvFloatDepthwiseBkFilterGPU_conv6  1040890   1048557     -0.7%
BM_ConvFloatDepthwiseBkInGPU_conv7    920856    905214     +1.7%
BM_ConvFloatDepthwiseBkFilterGPU_conv7 16564049  16556264     +0.0%
BM_ConvFloatDepthwiseBkInGPU_conv8    862814    864988     -0.3%
BM_ConvFloatDepthwiseBkFilterGPU_conv8 16168442  16169527     -0.0%
Change: 123168911
This commit is contained in:
Xiaoqiang Zheng 2016-05-24 17:20:32 -08:00 committed by TensorFlower Gardener
parent f577ada969
commit 19d33a1a3e

View File

@ -37,7 +37,7 @@ static bool ReadBoolFromEnvVar(const char* env_var_name, bool default_val) {
bool CanUseCudnn() { return ReadBoolFromEnvVar("TF_USE_CUDNN", true); }
bool CudnnUseAutotune() {
return ReadBoolFromEnvVar("TF_CUDNN_USE_AUTOTUNE", false);
return ReadBoolFromEnvVar("TF_CUDNN_USE_AUTOTUNE", true);
}
} // namespace tensorflow