124 lines
4.3 KiB
Python
124 lines
4.3 KiB
Python
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ==============================================================================
|
|
"""Benchmark for split and grad of split."""
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
import numpy as np
|
|
|
|
from tensorflow.core.protobuf import config_pb2
|
|
from tensorflow.python.client import session as session_lib
|
|
from tensorflow.python.framework import ops
|
|
from tensorflow.python.ops import array_ops
|
|
from tensorflow.python.ops import control_flow_ops
|
|
from tensorflow.python.ops import variables
|
|
from tensorflow.python.platform import benchmark
|
|
from tensorflow.python.platform import test
|
|
from tensorflow.python.platform import tf_logging as logging
|
|
|
|
|
|
def build_graph(device, input_shape, output_sizes, axis):
|
|
"""Build a graph containing a sequence of split operations.
|
|
|
|
Args:
|
|
device: string, the device to run on.
|
|
input_shape: shape of the input tensor.
|
|
output_sizes: size of each output along axis.
|
|
axis: axis to be split along.
|
|
|
|
Returns:
|
|
An array of tensors to run()
|
|
"""
|
|
with ops.device("/%s:0" % device):
|
|
inp = array_ops.zeros(input_shape)
|
|
|
|
outputs = []
|
|
for _ in range(100):
|
|
outputs.extend(array_ops.split(inp, output_sizes, axis))
|
|
return control_flow_ops.group(*outputs)
|
|
|
|
|
|
class SplitBenchmark(test.Benchmark):
|
|
"""Benchmark split!"""
|
|
|
|
def _run_graph(self, device, output_shape, variable, num_outputs, axis):
|
|
"""Run the graph and print its execution time.
|
|
|
|
Args:
|
|
device: string, the device to run on.
|
|
output_shape: shape of each output tensors.
|
|
variable: whether or not the output shape should be fixed
|
|
num_outputs: the number of outputs to split the input into
|
|
axis: axis to be split
|
|
|
|
Returns:
|
|
The duration of the run in seconds.
|
|
"""
|
|
graph = ops.Graph()
|
|
with graph.as_default():
|
|
if not variable:
|
|
if axis == 0:
|
|
input_shape = [output_shape[0] * num_outputs, output_shape[1]]
|
|
sizes = [output_shape[0] for _ in range(num_outputs)]
|
|
else:
|
|
input_shape = [output_shape[0], output_shape[1] * num_outputs]
|
|
sizes = [output_shape[1] for _ in range(num_outputs)]
|
|
else:
|
|
sizes = np.random.randint(
|
|
low=max(1, output_shape[axis] - 2),
|
|
high=output_shape[axis] + 2,
|
|
size=num_outputs)
|
|
total_size = np.sum(sizes)
|
|
if axis == 0:
|
|
input_shape = [total_size, output_shape[1]]
|
|
else:
|
|
input_shape = [output_shape[0], total_size]
|
|
|
|
outputs = build_graph(device, input_shape, sizes, axis)
|
|
config = config_pb2.ConfigProto(graph_options=config_pb2.GraphOptions(
|
|
optimizer_options=config_pb2.OptimizerOptions(
|
|
opt_level=config_pb2.OptimizerOptions.L0)))
|
|
with session_lib.Session(graph=graph, config=config) as session:
|
|
logging.set_verbosity("info")
|
|
variables.global_variables_initializer().run()
|
|
bench = benchmark.TensorFlowBenchmark()
|
|
bench.run_op_benchmark(
|
|
session,
|
|
outputs,
|
|
mbs=input_shape[0] * input_shape[1] * 4 * 2 * 100 / 1e6,
|
|
extras={
|
|
"input_shape": input_shape,
|
|
"variable": variable,
|
|
"axis": axis
|
|
})
|
|
|
|
def benchmark_split(self):
|
|
print("Forward vs backward concat")
|
|
shapes = [[2000, 8], [8, 2000], [100, 18], [1000, 18], [10000, 18],
|
|
[100, 97], [1000, 97], [10000, 1], [1, 10000]]
|
|
axis_ = [1] # 0 is very fast because it doesn't actually do any copying
|
|
num_outputs = 100
|
|
variable = [False, True] # fixed input size or not
|
|
for shape in shapes:
|
|
for axis in axis_:
|
|
for v in variable:
|
|
self._run_graph("gpu", shape, v, num_outputs, axis)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
test.main()
|