ROCm platform currently does not support the ability to do GPU stream level tracing / profiling. This commit skip subtests (within python unit-tests) that this functionality. The "skip" is guarded by the call to "is_buil_with_rocm()", and hence these unit-tests will not be affected in any way when running with TF which was not built with ROCm support (i.e. `--config=rocm`)
209 lines
8.5 KiB
Python
209 lines
8.5 KiB
Python
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ==============================================================================
|
|
"""Tests for tensorflow.python.client.Timeline."""
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
import json
|
|
|
|
from tensorflow.core.protobuf import config_pb2
|
|
from tensorflow.python.client import session
|
|
from tensorflow.python.client import timeline
|
|
from tensorflow.python.framework import constant_op
|
|
from tensorflow.python.framework import test_util
|
|
from tensorflow.python.framework import ops
|
|
from tensorflow.python.ops import math_ops
|
|
from tensorflow.python.ops import variables
|
|
from tensorflow.python.platform import test
|
|
|
|
|
|
class TimelineTest(test.TestCase):
|
|
|
|
def _validateTrace(self, chrome_trace_format):
|
|
# Check that the supplied string is valid JSON.
|
|
trace = json.loads(chrome_trace_format)
|
|
# It should have a top-level key containing events.
|
|
self.assertTrue('traceEvents' in trace)
|
|
# Every event in the list should have a 'ph' field.
|
|
for event in trace['traceEvents']:
|
|
self.assertTrue('ph' in event)
|
|
|
|
def testSimpleTimeline(self):
|
|
run_options = config_pb2.RunOptions(
|
|
trace_level=config_pb2.RunOptions.FULL_TRACE)
|
|
run_metadata = config_pb2.RunMetadata()
|
|
|
|
with ops.device('/cpu:0'):
|
|
with session.Session() as sess:
|
|
sess.run(constant_op.constant(1.0),
|
|
options=run_options,
|
|
run_metadata=run_metadata)
|
|
self.assertTrue(run_metadata.HasField('step_stats'))
|
|
tl = timeline.Timeline(run_metadata.step_stats)
|
|
ctf = tl.generate_chrome_trace_format()
|
|
self._validateTrace(ctf)
|
|
|
|
@test_util.deprecated_graph_mode_only
|
|
def testTimelineCpu(self):
|
|
run_options = config_pb2.RunOptions(
|
|
trace_level=config_pb2.RunOptions.FULL_TRACE)
|
|
run_metadata = config_pb2.RunMetadata()
|
|
|
|
with self.session(use_gpu=False) as sess:
|
|
const1 = constant_op.constant(1.0, name='const1')
|
|
const2 = constant_op.constant(2.0, name='const2')
|
|
result = math_ops.add(const1, const2) + const1 * const2
|
|
sess.run(result, options=run_options, run_metadata=run_metadata)
|
|
self.assertTrue(run_metadata.HasField('step_stats'))
|
|
step_stats = run_metadata.step_stats
|
|
devices = [d.device for d in step_stats.dev_stats]
|
|
self.assertTrue('/job:localhost/replica:0/task:0/device:CPU:0' in devices)
|
|
tl = timeline.Timeline(step_stats)
|
|
ctf = tl.generate_chrome_trace_format()
|
|
self._validateTrace(ctf)
|
|
tl = timeline.Timeline(step_stats)
|
|
ctf = tl.generate_chrome_trace_format(show_dataflow=False)
|
|
self._validateTrace(ctf)
|
|
tl = timeline.Timeline(step_stats)
|
|
ctf = tl.generate_chrome_trace_format(show_memory=False)
|
|
self._validateTrace(ctf)
|
|
tl = timeline.Timeline(step_stats)
|
|
ctf = tl.generate_chrome_trace_format(
|
|
show_memory=False, show_dataflow=False)
|
|
self._validateTrace(ctf)
|
|
|
|
@test_util.deprecated_graph_mode_only
|
|
def testTimelineGpu(self):
|
|
if not test.is_gpu_available(cuda_only=True):
|
|
return
|
|
|
|
run_options = config_pb2.RunOptions(
|
|
trace_level=config_pb2.RunOptions.FULL_TRACE)
|
|
run_metadata = config_pb2.RunMetadata()
|
|
|
|
with self.session(force_gpu=True) as sess:
|
|
const1 = constant_op.constant(1.0, name='const1')
|
|
const2 = constant_op.constant(2.0, name='const2')
|
|
result = math_ops.add(const1, const2) + const1 * const2
|
|
sess.run(result, options=run_options, run_metadata=run_metadata)
|
|
self.assertTrue(run_metadata.HasField('step_stats'))
|
|
step_stats = run_metadata.step_stats
|
|
devices = [d.device for d in step_stats.dev_stats]
|
|
self.assertTrue('/job:localhost/replica:0/task:0/device:GPU:0' in devices)
|
|
if not test.is_built_with_rocm():
|
|
# skip this check for the ROCm platform
|
|
# stream level tracing is not yet supported on the ROCm platform
|
|
self.assertTrue('/device:GPU:0/stream:all' in devices)
|
|
tl = timeline.Timeline(step_stats)
|
|
ctf = tl.generate_chrome_trace_format()
|
|
self._validateTrace(ctf)
|
|
tl = timeline.Timeline(step_stats)
|
|
ctf = tl.generate_chrome_trace_format(show_dataflow=False)
|
|
self._validateTrace(ctf)
|
|
tl = timeline.Timeline(step_stats)
|
|
ctf = tl.generate_chrome_trace_format(show_memory=False)
|
|
self._validateTrace(ctf)
|
|
tl = timeline.Timeline(step_stats)
|
|
ctf = tl.generate_chrome_trace_format(
|
|
show_memory=False, show_dataflow=False)
|
|
self._validateTrace(ctf)
|
|
|
|
def testTimelineWithRPCs(self):
|
|
"""Tests that Timeline can handle RPC tracing."""
|
|
metadata = config_pb2.RunMetadata()
|
|
step_stats = metadata.step_stats
|
|
dev_stats = step_stats.dev_stats.add()
|
|
dev_stats.device = '/job:worker/replica:0/task:0/cpu:0'
|
|
node_stats = dev_stats.node_stats.add()
|
|
node_stats.node_name = 'RecvTensor'
|
|
node_stats.all_start_micros = 12345
|
|
node_stats.op_end_rel_micros = 42
|
|
node_stats.timeline_label = ('[1024B] edge_160_conv2/biases/read from '
|
|
'/job:ps/replica:0/task:3/cpu:0 to '
|
|
'/job:worker/replica:0/task:0/cpu:0')
|
|
tl = timeline.Timeline(step_stats)
|
|
ctf = tl.generate_chrome_trace_format()
|
|
self._validateTrace(ctf)
|
|
|
|
def testAnalysisAndAllocations(self):
|
|
run_options = config_pb2.RunOptions(
|
|
trace_level=config_pb2.RunOptions.FULL_TRACE)
|
|
run_metadata = config_pb2.RunMetadata()
|
|
config = config_pb2.ConfigProto(device_count={'CPU': 3})
|
|
|
|
with session.Session(config=config) as sess:
|
|
with ops.device('/cpu:0'):
|
|
num1 = variables.Variable(1.0, name='num1')
|
|
with ops.device('/cpu:1'):
|
|
num2 = variables.Variable(2.0, name='num2')
|
|
with ops.device('/cpu:2'):
|
|
result = num1 + num2 + num1 * num2
|
|
self.evaluate(variables.global_variables_initializer())
|
|
sess.run(result, options=run_options, run_metadata=run_metadata)
|
|
|
|
self.assertTrue(run_metadata.HasField('step_stats'))
|
|
tl = timeline.Timeline(run_metadata.step_stats)
|
|
step_analysis = tl.analyze_step_stats()
|
|
ctf = step_analysis.chrome_trace.format_to_string()
|
|
self._validateTrace(ctf)
|
|
maximums = step_analysis.allocator_maximums
|
|
cpuname = 'mklcpu' if test_util.IsMklEnabled() else 'cpu'
|
|
self.assertTrue(cpuname in maximums)
|
|
cpu_max = maximums[
|
|
'cuda_host_bfc'] if 'cuda_host_bfc' in maximums else maximums[cpuname]
|
|
# At least num1 + num2, both float32s (4 bytes each)
|
|
self.assertGreaterEqual(cpu_max.num_bytes, 8)
|
|
self.assertGreater(cpu_max.timestamp, 0)
|
|
|
|
def testManyCPUs(self):
|
|
run_options = config_pb2.RunOptions(
|
|
trace_level=config_pb2.RunOptions.FULL_TRACE)
|
|
run_metadata = config_pb2.RunMetadata()
|
|
config = config_pb2.ConfigProto(device_count={'CPU': 3})
|
|
with session.Session(config=config) as sess:
|
|
with ops.device('/cpu:0'):
|
|
num1 = variables.Variable(1.0, name='num1')
|
|
with ops.device('/cpu:1'):
|
|
num2 = variables.Variable(2.0, name='num2')
|
|
with ops.device('/cpu:2'):
|
|
result = num1 + num2 + num1 * num2
|
|
self.evaluate(variables.global_variables_initializer())
|
|
sess.run(result, options=run_options, run_metadata=run_metadata)
|
|
self.assertTrue(run_metadata.HasField('step_stats'))
|
|
step_stats = run_metadata.step_stats
|
|
devices = [d.device for d in step_stats.dev_stats]
|
|
self.assertTrue('/job:localhost/replica:0/task:0/device:CPU:0' in devices)
|
|
self.assertTrue('/job:localhost/replica:0/task:0/device:CPU:1' in devices)
|
|
self.assertTrue('/job:localhost/replica:0/task:0/device:CPU:2' in devices)
|
|
tl = timeline.Timeline(step_stats)
|
|
ctf = tl.generate_chrome_trace_format()
|
|
self._validateTrace(ctf)
|
|
tl = timeline.Timeline(step_stats)
|
|
ctf = tl.generate_chrome_trace_format(show_dataflow=False)
|
|
self._validateTrace(ctf)
|
|
tl = timeline.Timeline(step_stats)
|
|
ctf = tl.generate_chrome_trace_format(show_memory=False)
|
|
self._validateTrace(ctf)
|
|
tl = timeline.Timeline(step_stats)
|
|
ctf = tl.generate_chrome_trace_format(
|
|
show_memory=False, show_dataflow=False)
|
|
self._validateTrace(ctf)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
test.main()
|