Use BlasGemv() when autotune is not set.

PiperOrigin-RevId: 163754092
This commit is contained in:
Yangzihao Wang 2017-07-31 15:05:49 -07:00 committed by TensorFlower Gardener
parent 724884f1ca
commit d03ba54f72

View File

@ -212,6 +212,11 @@ bool LaunchBlasGemv<Eigen::half>::IsSupported() {
return false;
}
template <typename T>
bool ShouldUseGemv(uint64 n) {
return (LaunchBlasGemv<T>::IsSupported() && n == 1);
}
} // namespace
bool GetCublasAutotuneComputationType(
@ -339,7 +344,7 @@ struct LaunchMatMul<GPUDevice, T, true /* USE_CUBLAS */> {
}
}
// Try BlasGemvWithProfiling
if (LaunchBlasGemv<T>::IsSupported() && n == 1) {
if (ShouldUseGemv<T>(n)) {
LaunchBlasGemv<T>::Compute(ctx, stream, !transpose_a,
transpose_a ? m : k, transpose_a ? k : m,
a_ptr, b_ptr, &c_ptr, &profile_result);
@ -385,11 +390,17 @@ struct LaunchMatMul<GPUDevice, T, true /* USE_CUBLAS */> {
// 2) compute type does not support autotune;
// 3) no algorithm is found;
// 4) all internal kernels in autotune return invalid results.
// For the following case, we use normal BlasGemv():
// 1) We didn't set the use_autotune flag but LaunchBlasGemv is supported
// and n == 1.
// 2) We set the use_autotune flag and it picked up BlasGemv() and set the
// algorithm_config.algorithm() to be kDefaultBlasGemv.
if (!use_autotune || !compute_type_supported || algorithms->empty() ||
algorithm_config.algorithm() == kNoAlgorithm ||
algorithm_config.algorithm() == kDefaultBlasGemm ||
algorithm_config.algorithm() == kDefaultBlasGemv) {
if (algorithm_config.algorithm() == kDefaultBlasGemv) {
if (algorithm_config.algorithm() == kDefaultBlasGemv ||
ShouldUseGemv<T>(n)) {
// This is a matrix*vector multiply so use GEMV to compute A * b.
// Here we are multiplying in the natural order, so we have to flip
// the transposition flag to compensate for the tensor being stored