147 lines
5.6 KiB
Python
147 lines
5.6 KiB
Python
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ==============================================================================
|
|
"""Benchmark for Matmul operator."""
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
import itertools
|
|
import time
|
|
|
|
import numpy as np
|
|
|
|
from tensorflow.python.client import session as session_lib
|
|
from tensorflow.python.framework import ops
|
|
from tensorflow.python.ops import control_flow_ops
|
|
from tensorflow.python.ops import math_ops
|
|
from tensorflow.python.ops import random_ops
|
|
from tensorflow.python.ops import variables
|
|
from tensorflow.python.platform import test
|
|
|
|
|
|
def build_graph(device, n, m, k, transpose_a, transpose_b, dtype):
|
|
"""Build a graph containing a sequence of matmul operations.
|
|
|
|
Args:
|
|
device: String, the device to run on.
|
|
n: tensor A's first dimension size.
|
|
m: tensor A's second dimension size.
|
|
k: tensor B's second dimension size.
|
|
transpose_a: boolean value to show if tensor A is transposed.
|
|
transpose_b: boolean value to show if tensor B is transposed.
|
|
dtype: numpy data type of the input tensor.
|
|
|
|
Returns:
|
|
A matmul operation to run()
|
|
"""
|
|
with ops.device('%s' % device):
|
|
if not transpose_a:
|
|
x = variables.VariableV1(random_ops.random_uniform([n, m], dtype=dtype),
|
|
use_resource=False)
|
|
else:
|
|
x = variables.VariableV1(random_ops.random_uniform([m, n], dtype=dtype),
|
|
use_resource=False)
|
|
if not transpose_b:
|
|
y = variables.VariableV1(random_ops.random_uniform([m, k], dtype=dtype),
|
|
use_resource=False)
|
|
else:
|
|
y = variables.VariableV1(random_ops.random_uniform([k, m], dtype=dtype),
|
|
use_resource=False)
|
|
|
|
z = math_ops.matmul(x, y, transpose_a=transpose_a, transpose_b=transpose_b)
|
|
return control_flow_ops.group(z)
|
|
|
|
|
|
class MatmulBenchmark(test.Benchmark):
|
|
"""Benchmark matmul!"""
|
|
|
|
def run_graph(self, device, n, m, k, transpose_a, transpose_b, num_iters,
|
|
dtype):
|
|
"""Run the graph and print its execution time.
|
|
|
|
Args:
|
|
device: String, the device to run on.
|
|
n: tensor A's first dimension size.
|
|
m: tensor A's second dimension size.
|
|
k: tensor B's second dimension size.
|
|
transpose_a: boolean value to show if tensor A is transposed.
|
|
transpose_b: boolean value to show if tensor B is transposed.
|
|
num_iters: number of iterations to run the benchmark.
|
|
dtype: numpy data type of the input tensor.
|
|
|
|
Returns:
|
|
The duration of the run in seconds.
|
|
"""
|
|
graph = ops.Graph()
|
|
with graph.as_default():
|
|
output = build_graph(device, n, m, k, transpose_a, transpose_b, dtype)
|
|
with session_lib.Session(graph=graph) as session:
|
|
variables.global_variables_initializer().run()
|
|
for _ in range(500):
|
|
session.run(output)
|
|
start_time = time.time()
|
|
for _ in range(num_iters):
|
|
session.run(output)
|
|
duration = (time.time() - start_time)
|
|
num_items = n * m * k * 2
|
|
throughput = num_items * num_iters / duration / 1e9
|
|
print('%s %s input_info:%s %d %.4fsec, %.4fGitems/s.' %
|
|
(device, str(dtype), str(n) + 'x' + str(m) + 'x' + str(k) +
|
|
',ta:' + str(transpose_a) + '.tb:' + str(transpose_b), num_iters,
|
|
duration, throughput))
|
|
|
|
name_template = ('matmul_{device}_{dtype}_input_info_{inputinfo}')
|
|
|
|
self.report_benchmark(
|
|
name=name_template.format(
|
|
device=device,
|
|
dtype=str(dtype).replace(' ', ''),
|
|
inputinfo=str(n) + 'x' + str(m) + 'x' + str(k) + ',ta:' +
|
|
str(transpose_a) + ',tb:' + str(transpose_b)).replace(' ', ''),
|
|
iters=num_iters,
|
|
wall_time=duration)
|
|
return duration
|
|
|
|
def run_test_gpu(self, n, m, k, transpose_a, transpose_b, dtype, num_iters):
|
|
self.run_graph(test.gpu_device_name(), n, m, k, transpose_a, transpose_b,
|
|
num_iters, dtype)
|
|
|
|
def test_round(self, num_iters):
|
|
dtypes = [np.float32, np.float64]
|
|
for dtype in dtypes:
|
|
for n, m, (transpose_a, transpose_b) in itertools.product(
|
|
[512, 1024], [1, 8, 16, 128], [(False, False), (True, False),
|
|
(False, True)]):
|
|
k = n
|
|
self.run_test_gpu(n, m, k, transpose_a, transpose_b, dtype, num_iters)
|
|
|
|
for n, m, k, (transpose_a, transpose_b) in itertools.product(
|
|
[200], [1, 8, 20], [10000], [(False, False), (True, False),
|
|
(False, True)]):
|
|
self.run_test_gpu(n, m, k, transpose_a, transpose_b, dtype, num_iters)
|
|
|
|
for (n, m, k), (transpose_a, transpose_b) in itertools.product(
|
|
[(200, 20, 20000), (1, 10000, 200)], [(False, False), (True, False),
|
|
(False, True)]):
|
|
self.run_test_gpu(n, m, k, transpose_a, transpose_b, dtype, num_iters)
|
|
|
|
def benchmark_matmul(self):
|
|
self.test_round(num_iters=200)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
test.main()
|