[OpenCL] Extends matmul_benchmark.py to cover SYCL (#11697)
* [OpenCL] Extends matmul_benchmark.py to cover SYCL * Fixed typo * /gpu:0 -> /device:GPU:0 * Fixes control_flow_ops_py_test * /gpu: -> /device:GPU: * Fixes //tensorflow/python/profiler/internal:run_metadata_test * gpu: -> GPU: * Fixes tfprof_node * [OpenCL] Fixes device path to name with many colons (#123) The device path is constructed from a device name by replacing all colons with underscores. Some device names contain more than one colon, for example 'device:SYCL:0' which gives a path 'device_SYCL_0'. The previous code would not convert this back to the original device name, but rather to 'device:SYCL_0'. An alternative fix would be to convert all underscores to colons in the device name (i.e. remove the restriction inside `replace("_", ":", 1)`), however I'm not sure if there are any device names which contain underscores. * If no gpu device aviable fake one * gpu: -> device:GPU * Fixes profiler test * /gpu:x -> /device:GPU:x * Fixes debug_io_utils_test.cc test * Fixes device_name_utils_test.cc
This commit is contained in:
parent
35e7a36658
commit
ab96f41fb4
@ -101,7 +101,7 @@ void ConcurrentSteps(const Options* opts, int session_index) {
|
|||||||
std::unique_ptr<Session> session(NewSession(options));
|
std::unique_ptr<Session> session(NewSession(options));
|
||||||
GraphDef def = CreateGraphDef();
|
GraphDef def = CreateGraphDef();
|
||||||
if (options.target.empty()) {
|
if (options.target.empty()) {
|
||||||
graph::SetDefaultDevice(opts->use_gpu ? "/gpu:0" : "/cpu:0", &def);
|
graph::SetDefaultDevice(opts->use_gpu ? "/device:GPU:0" : "/cpu:0", &def);
|
||||||
}
|
}
|
||||||
|
|
||||||
TF_CHECK_OK(session->Create(def));
|
TF_CHECK_OK(session->Create(def));
|
||||||
|
@ -93,7 +93,7 @@ class CudnnRNNBenchmark(test.Benchmark):
|
|||||||
batch_size = config["batch_size"]
|
batch_size = config["batch_size"]
|
||||||
seq_length = config["seq_length"]
|
seq_length = config["seq_length"]
|
||||||
|
|
||||||
with ops.Graph().as_default(), ops.device("/gpu:0"):
|
with ops.Graph().as_default(), ops.device("/device:GPU:0"):
|
||||||
model = cudnn_rnn_ops.CudnnLSTM(num_layers, num_units, num_units)
|
model = cudnn_rnn_ops.CudnnLSTM(num_layers, num_units, num_units)
|
||||||
params_size_t = model.params_size()
|
params_size_t = model.params_size()
|
||||||
input_data = variables.Variable(
|
input_data = variables.Variable(
|
||||||
@ -125,7 +125,7 @@ class CudnnRNNBenchmark(test.Benchmark):
|
|||||||
batch_size = config["batch_size"]
|
batch_size = config["batch_size"]
|
||||||
seq_length = config["seq_length"]
|
seq_length = config["seq_length"]
|
||||||
|
|
||||||
with ops.Graph().as_default(), ops.device("/gpu:0"):
|
with ops.Graph().as_default(), ops.device("/device:GPU:0"):
|
||||||
inputs = seq_length * [
|
inputs = seq_length * [
|
||||||
array_ops.zeros([batch_size, num_units], dtypes.float32)
|
array_ops.zeros([batch_size, num_units], dtypes.float32)
|
||||||
]
|
]
|
||||||
@ -153,7 +153,7 @@ class CudnnRNNBenchmark(test.Benchmark):
|
|||||||
batch_size = config["batch_size"]
|
batch_size = config["batch_size"]
|
||||||
seq_length = config["seq_length"]
|
seq_length = config["seq_length"]
|
||||||
|
|
||||||
with ops.Graph().as_default(), ops.device("/gpu:0"):
|
with ops.Graph().as_default(), ops.device("/device:GPU:0"):
|
||||||
inputs = seq_length * [
|
inputs = seq_length * [
|
||||||
array_ops.zeros([batch_size, num_units], dtypes.float32)
|
array_ops.zeros([batch_size, num_units], dtypes.float32)
|
||||||
]
|
]
|
||||||
|
@ -634,7 +634,7 @@ class MixtureBenchmark(test.Benchmark):
|
|||||||
np.random.seed(127)
|
np.random.seed(127)
|
||||||
with session.Session(config=config, graph=ops.Graph()) as sess:
|
with session.Session(config=config, graph=ops.Graph()) as sess:
|
||||||
random_seed.set_random_seed(0)
|
random_seed.set_random_seed(0)
|
||||||
with ops.device("/gpu:0" if use_gpu else "/cpu:0"):
|
with ops.device("/device:GPU:0" if use_gpu else "/cpu:0"):
|
||||||
mixture = create_distribution(
|
mixture = create_distribution(
|
||||||
num_components=num_components,
|
num_components=num_components,
|
||||||
batch_size=batch_size,
|
batch_size=batch_size,
|
||||||
|
@ -443,19 +443,19 @@ class VariablesTest(test.TestCase):
|
|||||||
e = variables_lib2.variable('e', initializer=e_init)
|
e = variables_lib2.variable('e', initializer=e_init)
|
||||||
# The values below highlight how the VariableDeviceChooser puts initial
|
# The values below highlight how the VariableDeviceChooser puts initial
|
||||||
# values on the same device as the variable job.
|
# values on the same device as the variable job.
|
||||||
self.assertDeviceEqual(a.device, '/gpu:0')
|
self.assertDeviceEqual(a.device, '/device:GPU:0')
|
||||||
self.assertEqual(a.initial_value.op.colocation_groups(),
|
self.assertEqual(a.initial_value.op.colocation_groups(),
|
||||||
a.op.colocation_groups())
|
a.op.colocation_groups())
|
||||||
self.assertDeviceEqual(b.device, '/gpu:0')
|
self.assertDeviceEqual(b.device, '/device:GPU:0')
|
||||||
self.assertEqual(b.initial_value.op.colocation_groups(),
|
self.assertEqual(b.initial_value.op.colocation_groups(),
|
||||||
b.op.colocation_groups())
|
b.op.colocation_groups())
|
||||||
self.assertDeviceEqual(c.device, '/cpu:12')
|
self.assertDeviceEqual(c.device, '/cpu:12')
|
||||||
self.assertEqual(c.initial_value.op.colocation_groups(),
|
self.assertEqual(c.initial_value.op.colocation_groups(),
|
||||||
c.op.colocation_groups())
|
c.op.colocation_groups())
|
||||||
self.assertDeviceEqual(d.device, '/gpu:0')
|
self.assertDeviceEqual(d.device, '/device:GPU:0')
|
||||||
self.assertEqual(d.initial_value.op.colocation_groups(),
|
self.assertEqual(d.initial_value.op.colocation_groups(),
|
||||||
d.op.colocation_groups())
|
d.op.colocation_groups())
|
||||||
self.assertDeviceEqual(e.device, '/gpu:0')
|
self.assertDeviceEqual(e.device, '/device:GPU:0')
|
||||||
self.assertDeviceEqual(e.initial_value.device, '/cpu:99')
|
self.assertDeviceEqual(e.initial_value.device, '/cpu:99')
|
||||||
|
|
||||||
|
|
||||||
|
@ -43,7 +43,7 @@ class AllReduceTest(test.TestCase):
|
|||||||
self._testSingleAllReduce(sess, dtype, nccl.all_max, np.maximum)
|
self._testSingleAllReduce(sess, dtype, nccl.all_max, np.maximum)
|
||||||
|
|
||||||
def _testSingleAllReduce(self, sess, np_type, nccl_fn, numpy_accumulation_fn):
|
def _testSingleAllReduce(self, sess, np_type, nccl_fn, numpy_accumulation_fn):
|
||||||
for devices in [['/gpu:0', '/gpu:0', '/gpu:0'], ['/gpu:0', '/gpu:0']]:
|
for devices in [['/device:GPU:0', '/device:GPU:0', '/device:GPU:0'], ['/device:GPU:0', '/device:GPU:0']]:
|
||||||
shape = (3, 4)
|
shape = (3, 4)
|
||||||
np_ans = None
|
np_ans = None
|
||||||
tensors = []
|
tensors = []
|
||||||
@ -84,7 +84,7 @@ class BroadcastTest(test.TestCase):
|
|||||||
# Create session inside outer loop to test use of
|
# Create session inside outer loop to test use of
|
||||||
# same communicator across multiple sessions.
|
# same communicator across multiple sessions.
|
||||||
with self.test_session(use_gpu=True) as sess:
|
with self.test_session(use_gpu=True) as sess:
|
||||||
for devices in [['/gpu:0', '/gpu:0', '/gpu:0'], ['/gpu:0', '/gpu:0']]:
|
for devices in [['/device:GPU:0', '/device:GPU:0', '/device:GPU:0'], ['/device:GPU:0', '/device:GPU:0']]:
|
||||||
shape = (3, 4)
|
shape = (3, 4)
|
||||||
sender = np.random.randint(0, len(devices) - 1)
|
sender = np.random.randint(0, len(devices) - 1)
|
||||||
with ops.device(devices[sender]):
|
with ops.device(devices[sender]):
|
||||||
@ -115,7 +115,7 @@ class CombinedTest(test.TestCase):
|
|||||||
# Create session inside outer loop to test use of
|
# Create session inside outer loop to test use of
|
||||||
# same communicator across multiple sessions.
|
# same communicator across multiple sessions.
|
||||||
with self.test_session(use_gpu=True) as sess:
|
with self.test_session(use_gpu=True) as sess:
|
||||||
for devices in [['/gpu:0', '/gpu:0', '/gpu:0'], ['/gpu:0', '/gpu:0']]:
|
for devices in [['/device:GPU:0', '/device:GPU:0', '/device:GPU:0'], ['/device:GPU:0', '/device:GPU:0']]:
|
||||||
shape = (3, 4)
|
shape = (3, 4)
|
||||||
|
|
||||||
# all-reduce
|
# all-reduce
|
||||||
|
@ -446,12 +446,12 @@ class RNNCellTest(test.TestCase):
|
|||||||
# Can't perform this test w/o a GPU
|
# Can't perform this test w/o a GPU
|
||||||
return
|
return
|
||||||
|
|
||||||
|
gpu_dev = test.gpu_device_name()
|
||||||
with self.test_session(use_gpu=True) as sess:
|
with self.test_session(use_gpu=True) as sess:
|
||||||
with variable_scope.variable_scope(
|
with variable_scope.variable_scope(
|
||||||
"root", initializer=init_ops.constant_initializer(0.5)):
|
"root", initializer=init_ops.constant_initializer(0.5)):
|
||||||
x = array_ops.zeros([1, 1, 3])
|
x = array_ops.zeros([1, 1, 3])
|
||||||
cell = rnn_cell_impl.DeviceWrapper(
|
cell = rnn_cell_impl.DeviceWrapper(rnn_cell_impl.GRUCell(3), gpu_dev)
|
||||||
rnn_cell_impl.GRUCell(3), test_util.gpu_device_name())
|
|
||||||
with ops.device("/cpu:0"):
|
with ops.device("/cpu:0"):
|
||||||
outputs, _ = rnn.dynamic_rnn(
|
outputs, _ = rnn.dynamic_rnn(
|
||||||
cell=cell, inputs=x, dtype=dtypes.float32)
|
cell=cell, inputs=x, dtype=dtypes.float32)
|
||||||
@ -463,8 +463,7 @@ class RNNCellTest(test.TestCase):
|
|||||||
_ = sess.run(outputs, options=opts, run_metadata=run_metadata)
|
_ = sess.run(outputs, options=opts, run_metadata=run_metadata)
|
||||||
|
|
||||||
step_stats = run_metadata.step_stats
|
step_stats = run_metadata.step_stats
|
||||||
ix = 0 if (("gpu" in step_stats.dev_stats[0].device) or
|
ix = 0 if gpu_dev in step_stats.dev_stats[0].device else 1
|
||||||
("sycl" in step_stats.dev_stats[0].device)) else 1
|
|
||||||
gpu_stats = step_stats.dev_stats[ix].node_stats
|
gpu_stats = step_stats.dev_stats[ix].node_stats
|
||||||
cpu_stats = step_stats.dev_stats[1 - ix].node_stats
|
cpu_stats = step_stats.dev_stats[1 - ix].node_stats
|
||||||
self.assertFalse([s for s in cpu_stats if "gru_cell" in s.node_name])
|
self.assertFalse([s for s in cpu_stats if "gru_cell" in s.node_name])
|
||||||
|
@ -42,7 +42,6 @@ from tensorflow.python.ops import variables as variables_lib
|
|||||||
from tensorflow.python.platform import test
|
from tensorflow.python.platform import test
|
||||||
from tensorflow.python.platform import tf_logging
|
from tensorflow.python.platform import tf_logging
|
||||||
from tensorflow.python.util import nest
|
from tensorflow.python.util import nest
|
||||||
from tensorflow.python.framework import test_util
|
|
||||||
|
|
||||||
class Plus1RNNCell(rnn_lib.RNNCell):
|
class Plus1RNNCell(rnn_lib.RNNCell):
|
||||||
"""RNN Cell generating (output, new_state) = (input + 1, state + 1)."""
|
"""RNN Cell generating (output, new_state) = (input + 1, state + 1)."""
|
||||||
@ -2208,11 +2207,11 @@ class TensorArrayOnCorrectDeviceTest(test.TestCase):
|
|||||||
if not test.is_gpu_available():
|
if not test.is_gpu_available():
|
||||||
return # Test requires access to a GPU
|
return # Test requires access to a GPU
|
||||||
|
|
||||||
|
gpu_dev = test.gpu_device_name()
|
||||||
run_metadata = self._execute_rnn_on(
|
run_metadata = self._execute_rnn_on(
|
||||||
rnn_device="/cpu:0", cell_device=test_util.gpu_device_name())
|
rnn_device="/cpu:0", cell_device=gpu_dev)
|
||||||
step_stats = run_metadata.step_stats
|
step_stats = run_metadata.step_stats
|
||||||
ix = 0 if (("gpu" in step_stats.dev_stats[0].device) or
|
ix = 0 if (gpu_dev in step_stats.dev_stats[0].device) else 1
|
||||||
("sycl" in step_stats.dev_stats[0].device)) else 1
|
|
||||||
gpu_stats = step_stats.dev_stats[ix].node_stats
|
gpu_stats = step_stats.dev_stats[ix].node_stats
|
||||||
cpu_stats = step_stats.dev_stats[1 - ix].node_stats
|
cpu_stats = step_stats.dev_stats[1 - ix].node_stats
|
||||||
|
|
||||||
@ -2233,12 +2232,12 @@ class TensorArrayOnCorrectDeviceTest(test.TestCase):
|
|||||||
if not test.is_gpu_available():
|
if not test.is_gpu_available():
|
||||||
return # Test requires access to a GPU
|
return # Test requires access to a GPU
|
||||||
|
|
||||||
|
gpu_dev = test.gpu_device_name()
|
||||||
run_metadata = self._execute_rnn_on(
|
run_metadata = self._execute_rnn_on(
|
||||||
rnn_device="/cpu:0", cell_device="/cpu:0",
|
rnn_device="/cpu:0", cell_device="/cpu:0",
|
||||||
input_device=test_util.gpu_device_name())
|
input_device=gpu_dev)
|
||||||
step_stats = run_metadata.step_stats
|
step_stats = run_metadata.step_stats
|
||||||
ix = 0 if (("gpu" in step_stats.dev_stats[0].device) or
|
ix = 0 if (gpu_dev in step_stats.dev_stats[0].device) else 1
|
||||||
("sycl" in step_stats.dev_stats[0].device)) else 1
|
|
||||||
gpu_stats = step_stats.dev_stats[ix].node_stats
|
gpu_stats = step_stats.dev_stats[ix].node_stats
|
||||||
cpu_stats = step_stats.dev_stats[1 - ix].node_stats
|
cpu_stats = step_stats.dev_stats[1 - ix].node_stats
|
||||||
|
|
||||||
@ -2253,11 +2252,11 @@ class TensorArrayOnCorrectDeviceTest(test.TestCase):
|
|||||||
if not test.is_gpu_available():
|
if not test.is_gpu_available():
|
||||||
return # Test requires access to a GPU
|
return # Test requires access to a GPU
|
||||||
|
|
||||||
|
gpu_dev = test.gpu_device_name()
|
||||||
run_metadata = self._execute_rnn_on(
|
run_metadata = self._execute_rnn_on(
|
||||||
input_device=test_util.gpu_device_name())
|
input_device=gpu_dev)
|
||||||
step_stats = run_metadata.step_stats
|
step_stats = run_metadata.step_stats
|
||||||
ix = 0 if (("gpu" in step_stats.dev_stats[0].device) or
|
ix = 0 if (gpu_dev in step_stats.dev_stats[0].device) else 1
|
||||||
("sycl" in step_stats.dev_stats[0].device)) else 1
|
|
||||||
gpu_stats = step_stats.dev_stats[ix].node_stats
|
gpu_stats = step_stats.dev_stats[ix].node_stats
|
||||||
cpu_stats = step_stats.dev_stats[1 - ix].node_stats
|
cpu_stats = step_stats.dev_stats[1 - ix].node_stats
|
||||||
|
|
||||||
|
@ -357,7 +357,7 @@ def training_gru_block_vs_gru_cell(batch_size,
|
|||||||
ops.reset_default_graph()
|
ops.reset_default_graph()
|
||||||
with session.Session(graph=ops.Graph()) as sess:
|
with session.Session(graph=ops.Graph()) as sess:
|
||||||
# Specify the device which is been used.
|
# Specify the device which is been used.
|
||||||
with ops.device("/cpu:0" if not use_gpu else "/gpu:0"):
|
with ops.device("/cpu:0" if not use_gpu else "/device:GPU:0"):
|
||||||
|
|
||||||
# Random initializers.
|
# Random initializers.
|
||||||
seed = 1994
|
seed = 1994
|
||||||
@ -429,7 +429,7 @@ def inference_gru_block_vs_gru_cell(batch_size,
|
|||||||
"""Benchmark inference speed between GRUBlockCell vs GRUCell."""
|
"""Benchmark inference speed between GRUBlockCell vs GRUCell."""
|
||||||
ops.reset_default_graph()
|
ops.reset_default_graph()
|
||||||
with session.Session(graph=ops.Graph()) as sess:
|
with session.Session(graph=ops.Graph()) as sess:
|
||||||
with ops.device("/cpu:0" if not use_gpu else "/gpu:0"):
|
with ops.device("/cpu:0" if not use_gpu else "/device:GPU:0"):
|
||||||
|
|
||||||
# Random initializers.
|
# Random initializers.
|
||||||
seed = 1994
|
seed = 1994
|
||||||
@ -484,7 +484,7 @@ def single_bprop_step_gru_block_vs_gru_cell(batch_size,
|
|||||||
"""Benchmark single bprop step speed between GRUBlockCell vs GRUCell."""
|
"""Benchmark single bprop step speed between GRUBlockCell vs GRUCell."""
|
||||||
ops.reset_default_graph()
|
ops.reset_default_graph()
|
||||||
with session.Session(graph=ops.Graph()) as sess:
|
with session.Session(graph=ops.Graph()) as sess:
|
||||||
with ops.device("/cpu:0" if not use_gpu else "/gpu:0"):
|
with ops.device("/cpu:0" if not use_gpu else "/device:GPU:0"):
|
||||||
initializer = init_ops.random_uniform_initializer(-1, 1, seed=1989)
|
initializer = init_ops.random_uniform_initializer(-1, 1, seed=1989)
|
||||||
# Inputs
|
# Inputs
|
||||||
x = vs.get_variable("x", [batch_size, input_size])
|
x = vs.get_variable("x", [batch_size, input_size])
|
||||||
|
@ -78,7 +78,7 @@ class GatherTreeTest(test.TestCase):
|
|||||||
sequence_length = [[3, 3, 3]]
|
sequence_length = [[3, 3, 3]]
|
||||||
expected_result = _transpose_batch_time(
|
expected_result = _transpose_batch_time(
|
||||||
[[[2, -1, 2], [6, 5, 6], [7, 8, 9], [-1, -1, -1]]])
|
[[[2, -1, 2], [6, 5, 6], [7, 8, 9], [-1, -1, -1]]])
|
||||||
with ops.device("/gpu:0"):
|
with ops.device("/device:GPU:0"):
|
||||||
beams = beam_search_ops.gather_tree(
|
beams = beam_search_ops.gather_tree(
|
||||||
step_ids=step_ids, parent_ids=parent_ids,
|
step_ids=step_ids, parent_ids=parent_ids,
|
||||||
sequence_length=sequence_length)
|
sequence_length=sequence_length)
|
||||||
|
@ -22,7 +22,7 @@ limitations under the License.
|
|||||||
// Device names
|
// Device names
|
||||||
// * Every Device should have a unique name with the format:
|
// * Every Device should have a unique name with the format:
|
||||||
// /job:___/replica:___/task:___/(gpu|cpu):___
|
// /job:___/replica:___/task:___/(gpu|cpu):___
|
||||||
// An example name would be "/job:train/replica:0/task:3/gpu:2".
|
// An example name would be "/job:train/replica:0/task:3/device:GPU:2".
|
||||||
// * Task numbers are within the specified replica, so there are as
|
// * Task numbers are within the specified replica, so there are as
|
||||||
// many "task zeros" as replicas.
|
// many "task zeros" as replicas.
|
||||||
|
|
||||||
|
@ -476,7 +476,7 @@ TEST(DirectSessionTest, PlacePrunedGraph) {
|
|||||||
vx.scalar<float>()() = 1.0;
|
vx.scalar<float>()() = 1.0;
|
||||||
Node* x = test::graph::Constant(&g, vx);
|
Node* x = test::graph::Constant(&g, vx);
|
||||||
Node* y = test::graph::Unary(&g, "Darth", x);
|
Node* y = test::graph::Unary(&g, "Darth", x);
|
||||||
y->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0");
|
y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0");
|
||||||
GraphDef def;
|
GraphDef def;
|
||||||
test::graph::ToGraphDef(&g, &def);
|
test::graph::ToGraphDef(&g, &def);
|
||||||
|
|
||||||
@ -494,7 +494,7 @@ TEST(DirectSessionTest, PlacePrunedGraph) {
|
|||||||
vx.scalar<float>()() = 1.0;
|
vx.scalar<float>()() = 1.0;
|
||||||
Node* x = test::graph::Constant(&g, vx);
|
Node* x = test::graph::Constant(&g, vx);
|
||||||
Node* y = test::graph::Unary(&g, "Darth", x);
|
Node* y = test::graph::Unary(&g, "Darth", x);
|
||||||
y->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0");
|
y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0");
|
||||||
GraphDef def;
|
GraphDef def;
|
||||||
test::graph::ToGraphDef(&g, &def);
|
test::graph::ToGraphDef(&g, &def);
|
||||||
|
|
||||||
|
@ -154,14 +154,14 @@ static void TestHWAccelerator(bool enableHWTrace) {
|
|||||||
Tensor x_tensor(DT_FLOAT, TensorShape({2, 1}));
|
Tensor x_tensor(DT_FLOAT, TensorShape({2, 1}));
|
||||||
test::FillValues<float>(&x_tensor, {1, 1});
|
test::FillValues<float>(&x_tensor, {1, 1});
|
||||||
Node* x = test::graph::Constant(&graph, x_tensor);
|
Node* x = test::graph::Constant(&graph, x_tensor);
|
||||||
x->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0");
|
x->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0");
|
||||||
#ifdef TENSORFLOW_USE_SYCL
|
#ifdef TENSORFLOW_USE_SYCL
|
||||||
x->set_assigned_device_name("/job:localhost/replica:0/task:0/device:SYCL:0");
|
x->set_assigned_device_name("/job:localhost/replica:0/task:0/device:SYCL:0");
|
||||||
#endif // TENSORFLOW_USE_SYCL
|
#endif // TENSORFLOW_USE_SYCL
|
||||||
|
|
||||||
// y = A * x
|
// y = A * x
|
||||||
Node* y = test::graph::Matmul(&graph, a, x, false, false);
|
Node* y = test::graph::Matmul(&graph, a, x, false, false);
|
||||||
y->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0");
|
y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0");
|
||||||
#ifdef TENSORFLOW_USE_SYCL
|
#ifdef TENSORFLOW_USE_SYCL
|
||||||
y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:SYCL:0");
|
y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:SYCL:0");
|
||||||
#endif // TENSORFLOW_USE_SYCL
|
#endif // TENSORFLOW_USE_SYCL
|
||||||
|
@ -588,7 +588,7 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options,
|
|||||||
for (int i = 0; i < n; i++) {
|
for (int i = 0; i < n; i++) {
|
||||||
BaseGPUDevice* gpu_device;
|
BaseGPUDevice* gpu_device;
|
||||||
TF_RETURN_IF_ERROR(CreateGPUDevice(options,
|
TF_RETURN_IF_ERROR(CreateGPUDevice(options,
|
||||||
strings::StrCat(name_prefix, "/gpu:", i),
|
strings::StrCat(name_prefix, "/device:GPU:", i),
|
||||||
valid_gpu_ids[i], &gpu_device));
|
valid_gpu_ids[i], &gpu_device));
|
||||||
TF_RETURN_IF_ERROR(gpu_device->Init(options));
|
TF_RETURN_IF_ERROR(gpu_device->Init(options));
|
||||||
devices->push_back(gpu_device);
|
devices->push_back(gpu_device);
|
||||||
@ -1049,7 +1049,7 @@ Status BaseGPUDeviceFactory::GetValidDeviceIds(
|
|||||||
size_t new_id = ids->size();
|
size_t new_id = ids->size();
|
||||||
ids->push_back(visible_gpu_id);
|
ids->push_back(visible_gpu_id);
|
||||||
|
|
||||||
LOG(INFO) << "Creating TensorFlow device (/gpu:" << new_id << ") -> "
|
LOG(INFO) << "Creating TensorFlow device (/device:GPU:" << new_id << ") -> "
|
||||||
<< "(" << GetShortDeviceDescription(visible_gpu_id, desc) << ")";
|
<< "(" << GetShortDeviceDescription(visible_gpu_id, desc) << ")";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -141,7 +141,7 @@ class BaseGPUDeviceFactory : public DeviceFactory {
|
|||||||
Allocator* cpu_allocator) = 0;
|
Allocator* cpu_allocator) = 0;
|
||||||
|
|
||||||
// Returns into 'ids' the list of valid GPU ids, in the order that
|
// Returns into 'ids' the list of valid GPU ids, in the order that
|
||||||
// they should map to logical gpu ids "/gpu:0", "/gpu:1", etc, based
|
// they should map to logical gpu ids "/device:GPU:0", "/device:GPU:1", etc, based
|
||||||
// upon 'visible_device_list', a comma-separated list of 'visible
|
// upon 'visible_device_list', a comma-separated list of 'visible
|
||||||
// gpu ids'.
|
// gpu ids'.
|
||||||
Status GetValidDeviceIds(const string& visible_device_list,
|
Status GetValidDeviceIds(const string& visible_device_list,
|
||||||
|
@ -106,9 +106,9 @@ TEST_F(GpuStreamUtilTest, SimpleGraphManyStreams) {
|
|||||||
TEST_F(GpuStreamUtilTest, StreamOverrides) {
|
TEST_F(GpuStreamUtilTest, StreamOverrides) {
|
||||||
auto root = Scope::NewRootScope().ExitOnError();
|
auto root = Scope::NewRootScope().ExitOnError();
|
||||||
ops::_Recv(root.WithOpName("input"), DT_FLOAT, "input", "/cpu:0", 0,
|
ops::_Recv(root.WithOpName("input"), DT_FLOAT, "input", "/cpu:0", 0,
|
||||||
"/gpu:0");
|
"/device:GPU:0");
|
||||||
Output n = ops::MatMul(root, {}, {});
|
Output n = ops::MatMul(root, {}, {});
|
||||||
ops::_Send(root.WithOpName("output"), n, "output", "/gpu:0", 0, "/cpu:0");
|
ops::_Send(root.WithOpName("output"), n, "output", "/device:GPU:0", 0, "/cpu:0");
|
||||||
Graph g(OpRegistry::Global());
|
Graph g(OpRegistry::Global());
|
||||||
TF_ASSERT_OK(root.ToGraph(&g));
|
TF_ASSERT_OK(root.ToGraph(&g));
|
||||||
|
|
||||||
|
@ -53,7 +53,7 @@ TEST(MemoryTypeChecker, Int32NotOk) {
|
|||||||
EXPECT_TRUE(errors::IsInternal(ValidateMemoryTypes(DEVICE_GPU, g)));
|
EXPECT_TRUE(errors::IsInternal(ValidateMemoryTypes(DEVICE_GPU, g)));
|
||||||
|
|
||||||
// But we can insert _HostSend/_HostRecv to ensure the invariant.
|
// But we can insert _HostSend/_HostRecv to ensure the invariant.
|
||||||
TF_EXPECT_OK(EnsureMemoryTypes(DEVICE_GPU, "/gpu:0", g));
|
TF_EXPECT_OK(EnsureMemoryTypes(DEVICE_GPU, "/device:GPU:0", g));
|
||||||
TF_EXPECT_OK(ValidateMemoryTypes(DEVICE_GPU, g));
|
TF_EXPECT_OK(ValidateMemoryTypes(DEVICE_GPU, g));
|
||||||
#endif // GOOGLE_CUDA
|
#endif // GOOGLE_CUDA
|
||||||
#ifdef TENSORFLOW_USE_SYCL
|
#ifdef TENSORFLOW_USE_SYCL
|
||||||
|
@ -86,7 +86,7 @@ void DebugGateway::CopyTensor(const string& node_name, const int output_slot,
|
|||||||
// Determine if the tensor is on device (GPU) or host (CPU).
|
// Determine if the tensor is on device (GPU) or host (CPU).
|
||||||
// The second part of the check is necessary because even an OpKernel on
|
// The second part of the check is necessary because even an OpKernel on
|
||||||
// may have output tensors allocated on CPU.
|
// may have output tensors allocated on CPU.
|
||||||
if ((device->name().find("gpu:") != string::npos || device->name().find("SYCL:") != string::npos) &&
|
if ((device->name().find("GPU:") != string::npos || device->name().find("SYCL:") != string::npos) &&
|
||||||
!ctx->output_alloc_attr(output_slot).on_host()) {
|
!ctx->output_alloc_attr(output_slot).on_host()) {
|
||||||
// GPU tensors: Copy it to host (CPU).
|
// GPU tensors: Copy it to host (CPU).
|
||||||
DeviceContext* device_ctxt = ctx->op_device_context();
|
DeviceContext* device_ctxt = ctx->op_device_context();
|
||||||
|
@ -47,7 +47,7 @@ class SessionDebugMinusAXTest : public ::testing::Test {
|
|||||||
Graph graph(OpRegistry::Global());
|
Graph graph(OpRegistry::Global());
|
||||||
|
|
||||||
#if GOOGLE_CUDA
|
#if GOOGLE_CUDA
|
||||||
const string kDeviceName = "/job:localhost/replica:0/task:0/gpu:0";
|
const string kDeviceName = "/job:localhost/replica:0/task:0/device:GPU:0";
|
||||||
#elif defined(TENSORFLOW_USE_SYCL)
|
#elif defined(TENSORFLOW_USE_SYCL)
|
||||||
const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0";
|
const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0";
|
||||||
#else
|
#else
|
||||||
@ -505,7 +505,7 @@ class SessionDebugOutputSlotWithoutOngoingEdgeTest : public ::testing::Test {
|
|||||||
Graph graph(OpRegistry::Global());
|
Graph graph(OpRegistry::Global());
|
||||||
|
|
||||||
#if GOOGLE_CUDA
|
#if GOOGLE_CUDA
|
||||||
const string kDeviceName = "/job:localhost/replica:0/task:0/gpu:0";
|
const string kDeviceName = "/job:localhost/replica:0/task:0/device:GPU:0";
|
||||||
#elif defined(TENSORFLOW_USE_SYCL)
|
#elif defined(TENSORFLOW_USE_SYCL)
|
||||||
const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0";
|
const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0";
|
||||||
#else
|
#else
|
||||||
@ -607,7 +607,7 @@ class SessionDebugVariableTest : public ::testing::Test {
|
|||||||
Graph graph(OpRegistry::Global());
|
Graph graph(OpRegistry::Global());
|
||||||
|
|
||||||
#if GOOGLE_CUDA
|
#if GOOGLE_CUDA
|
||||||
const string kDeviceName = "/job:localhost/replica:0/task:0/gpu:0";
|
const string kDeviceName = "/job:localhost/replica:0/task:0/device:GPU:0";
|
||||||
#elif defined(TENSORFLOW_USE_SYCL)
|
#elif defined(TENSORFLOW_USE_SYCL)
|
||||||
const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0";
|
const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0";
|
||||||
#else
|
#else
|
||||||
@ -879,7 +879,7 @@ class SessionDebugGPUSwitchTest : public ::testing::Test {
|
|||||||
Graph graph(OpRegistry::Global());
|
Graph graph(OpRegistry::Global());
|
||||||
|
|
||||||
#ifdef GOOGLE_CUDA
|
#ifdef GOOGLE_CUDA
|
||||||
const string kDeviceName = "/job:localhost/replica:0/task:0/gpu:0";
|
const string kDeviceName = "/job:localhost/replica:0/task:0/device:GPU:0";
|
||||||
#elif TENSORFLOW_USE_SYCL
|
#elif TENSORFLOW_USE_SYCL
|
||||||
const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0";
|
const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0";
|
||||||
#endif
|
#endif
|
||||||
|
@ -51,14 +51,14 @@ class DebugIOUtilsTest : public ::testing::Test {
|
|||||||
};
|
};
|
||||||
|
|
||||||
TEST_F(DebugIOUtilsTest, ConstructDebugNodeKey) {
|
TEST_F(DebugIOUtilsTest, ConstructDebugNodeKey) {
|
||||||
DebugNodeKey debug_node_key("/job:worker/replica:1/task:0/gpu:2",
|
DebugNodeKey debug_node_key("/job:worker/replica:1/task:0/device:GPU:2",
|
||||||
"hidden_1/MatMul", 0, "DebugIdentity");
|
"hidden_1/MatMul", 0, "DebugIdentity");
|
||||||
EXPECT_EQ("/job:worker/replica:1/task:0/gpu:2", debug_node_key.device_name);
|
EXPECT_EQ("/job:worker/replica:1/task:0/device:GPU:2", debug_node_key.device_name);
|
||||||
EXPECT_EQ("hidden_1/MatMul", debug_node_key.node_name);
|
EXPECT_EQ("hidden_1/MatMul", debug_node_key.node_name);
|
||||||
EXPECT_EQ(0, debug_node_key.output_slot);
|
EXPECT_EQ(0, debug_node_key.output_slot);
|
||||||
EXPECT_EQ("DebugIdentity", debug_node_key.debug_op);
|
EXPECT_EQ("DebugIdentity", debug_node_key.debug_op);
|
||||||
EXPECT_EQ("hidden_1/MatMul:0:DebugIdentity", debug_node_key.debug_node_name);
|
EXPECT_EQ("hidden_1/MatMul:0:DebugIdentity", debug_node_key.debug_node_name);
|
||||||
EXPECT_EQ("_tfdbg_device_,job_worker,replica_1,task_0,gpu_2",
|
EXPECT_EQ("_tfdbg_device_,job_worker,replica_1,task_0,device_GPU_2",
|
||||||
debug_node_key.device_path);
|
debug_node_key.device_path);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -140,7 +140,7 @@ Rendezvous::ParsedKey Key(const string& sender, const uint64 incarnation,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define ALICE "/job:j/replica:0/task:0/cpu:0"
|
#define ALICE "/job:j/replica:0/task:0/cpu:0"
|
||||||
#define BOB "/job:j/replica:0/task:0/gpu:0"
|
#define BOB "/job:j/replica:0/task:0/device:GPU:0"
|
||||||
|
|
||||||
TEST_F(ExecutorTest, SimpleAdd) {
|
TEST_F(ExecutorTest, SimpleAdd) {
|
||||||
// c = a + b
|
// c = a + b
|
||||||
|
@ -31,9 +31,9 @@ TEST(GrpcChannelTest, IsSameAddressSpace) {
|
|||||||
EXPECT_TRUE(IsSameAddrSp("/job:mnist/replica:10/task:10/cpu:0",
|
EXPECT_TRUE(IsSameAddrSp("/job:mnist/replica:10/task:10/cpu:0",
|
||||||
"/job:mnist/replica:10/task:10/cpu:1"));
|
"/job:mnist/replica:10/task:10/cpu:1"));
|
||||||
EXPECT_TRUE(IsSameAddrSp("/job:mnist/replica:10/task:10/cpu:0",
|
EXPECT_TRUE(IsSameAddrSp("/job:mnist/replica:10/task:10/cpu:0",
|
||||||
"/job:mnist/replica:10/task:10/gpu:2"));
|
"/job:mnist/replica:10/task:10/device:GPU:2"));
|
||||||
EXPECT_TRUE(IsSameAddrSp("/job:mnist/replica:10/task:10",
|
EXPECT_TRUE(IsSameAddrSp("/job:mnist/replica:10/task:10",
|
||||||
"/job:mnist/replica:10/task:10/gpu:2"));
|
"/job:mnist/replica:10/task:10/device:GPU:2"));
|
||||||
EXPECT_TRUE(IsSameAddrSp("/job:mnist/replica:10/task:10/cpu:1",
|
EXPECT_TRUE(IsSameAddrSp("/job:mnist/replica:10/task:10/cpu:1",
|
||||||
"/job:mnist/replica:10/task:10"));
|
"/job:mnist/replica:10/task:10"));
|
||||||
|
|
||||||
|
@ -38,8 +38,8 @@ message NodeDef {
|
|||||||
// | ( ("gpu" | "cpu") ":" ([1-9][0-9]* | "*") )
|
// | ( ("gpu" | "cpu") ":" ([1-9][0-9]* | "*") )
|
||||||
//
|
//
|
||||||
// Valid values for this string include:
|
// Valid values for this string include:
|
||||||
// * "/job:worker/replica:0/task:1/gpu:3" (full specification)
|
// * "/job:worker/replica:0/task:1/device:GPU:3" (full specification)
|
||||||
// * "/job:worker/gpu:3" (partial specification)
|
// * "/job:worker/device:GPU:3" (partial specification)
|
||||||
// * "" (no specification)
|
// * "" (no specification)
|
||||||
//
|
//
|
||||||
// If the constraints do not resolve to a single device (or if this
|
// If the constraints do not resolve to a single device (or if this
|
||||||
|
@ -39,11 +39,11 @@ namespace {
|
|||||||
TEST(RendezvousTest, Key) {
|
TEST(RendezvousTest, Key) {
|
||||||
const string key = Rendezvous::CreateKey(
|
const string key = Rendezvous::CreateKey(
|
||||||
"/job:mnist/replica:1/task:2/CPU:0", 7890,
|
"/job:mnist/replica:1/task:2/CPU:0", 7890,
|
||||||
"/job:mnist/replica:1/task:2/GPU:0", "var0", FrameAndIter(0, 0));
|
"/job:mnist/replica:1/task:2/device:GPU:0", "var0", FrameAndIter(0, 0));
|
||||||
EXPECT_EQ(key,
|
EXPECT_EQ(key,
|
||||||
"/job:mnist/replica:1/task:2/CPU:0;"
|
"/job:mnist/replica:1/task:2/CPU:0;"
|
||||||
"0000000000001ed2;" // 7890 = 0x1ed2
|
"0000000000001ed2;" // 7890 = 0x1ed2
|
||||||
"/job:mnist/replica:1/task:2/GPU:0;"
|
"/job:mnist/replica:1/task:2/device:GPU:0;"
|
||||||
"var0;"
|
"var0;"
|
||||||
"0:0");
|
"0:0");
|
||||||
Rendezvous::ParsedKey parsed;
|
Rendezvous::ParsedKey parsed;
|
||||||
@ -51,12 +51,12 @@ TEST(RendezvousTest, Key) {
|
|||||||
EXPECT_EQ(parsed.src_device, "/job:mnist/replica:1/task:2/CPU:0");
|
EXPECT_EQ(parsed.src_device, "/job:mnist/replica:1/task:2/CPU:0");
|
||||||
EXPECT_EQ(parsed.src_incarnation, 7890);
|
EXPECT_EQ(parsed.src_incarnation, 7890);
|
||||||
EXPECT_EQ(parsed.src.type, "CPU");
|
EXPECT_EQ(parsed.src.type, "CPU");
|
||||||
EXPECT_EQ(parsed.dst_device, "/job:mnist/replica:1/task:2/GPU:0");
|
EXPECT_EQ(parsed.dst_device, "/job:mnist/replica:1/task:2/device:GPU:0");
|
||||||
EXPECT_EQ(parsed.dst.type, "GPU");
|
EXPECT_EQ(parsed.dst.type, "GPU");
|
||||||
|
|
||||||
EXPECT_FALSE(Rendezvous::ParseKey("foo;bar;baz", &parsed).ok());
|
EXPECT_FALSE(Rendezvous::ParseKey("foo;bar;baz", &parsed).ok());
|
||||||
EXPECT_FALSE(Rendezvous::ParseKey("/job:mnist/replica:1/task:2/CPU:0;"
|
EXPECT_FALSE(Rendezvous::ParseKey("/job:mnist/replica:1/task:2/CPU:0;"
|
||||||
"/job:mnist/replica:1/task:2/GPU:0;",
|
"/job:mnist/replica:1/task:2/device:GPU:0;",
|
||||||
&parsed)
|
&parsed)
|
||||||
.ok());
|
.ok());
|
||||||
EXPECT_FALSE(
|
EXPECT_FALSE(
|
||||||
@ -99,7 +99,7 @@ string V(const Tensor& tensor) {
|
|||||||
|
|
||||||
Rendezvous::ParsedKey MakeKey(const string& name) {
|
Rendezvous::ParsedKey MakeKey(const string& name) {
|
||||||
string s = Rendezvous::CreateKey("/job:mnist/replica:1/task:2/CPU:0", 7890,
|
string s = Rendezvous::CreateKey("/job:mnist/replica:1/task:2/CPU:0", 7890,
|
||||||
"/job:mnist/replica:1/task:2/GPU:0", name,
|
"/job:mnist/replica:1/task:2/device:GPU:0", name,
|
||||||
FrameAndIter(0, 0));
|
FrameAndIter(0, 0));
|
||||||
Rendezvous::ParsedKey k;
|
Rendezvous::ParsedKey k;
|
||||||
TF_EXPECT_OK(Rendezvous::ParseKey(s, &k));
|
TF_EXPECT_OK(Rendezvous::ParseKey(s, &k));
|
||||||
|
@ -50,7 +50,7 @@ extern Status TopologicalSortNodesWithTimePriority(
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
const char gpu_device[] = "/job:a/replica:0/task:0/gpu:0";
|
const char gpu_device[] = "/job:a/replica:0/task:0/device:GPU:0";
|
||||||
|
|
||||||
string SplitByDevice(const Node* node) { return node->assigned_device_name(); }
|
string SplitByDevice(const Node* node) { return node->assigned_device_name(); }
|
||||||
|
|
||||||
|
@ -40,7 +40,7 @@ namespace tensorflow {
|
|||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
const char kCPUDevice[] = "/job:a/replica:0/task:0/cpu:0";
|
const char kCPUDevice[] = "/job:a/replica:0/task:0/cpu:0";
|
||||||
const char kGPUDevice[] = "/job:a/replica:0/task:0/gpu:0";
|
const char kGPUDevice[] = "/job:a/replica:0/task:0/device:GPU:0";
|
||||||
|
|
||||||
static void InitGraph(const string& s, Graph* graph,
|
static void InitGraph(const string& s, Graph* graph,
|
||||||
const string& device = kCPUDevice) {
|
const string& device = kCPUDevice) {
|
||||||
|
@ -89,7 +89,7 @@ Status SingleMachine::Provision() {
|
|||||||
VLOG(1) << "Number of GPUs: " << num_gpus_;
|
VLOG(1) << "Number of GPUs: " << num_gpus_;
|
||||||
for (int i = 0; i < num_gpus_; ++i) {
|
for (int i = 0; i < num_gpus_; ++i) {
|
||||||
string device_name =
|
string device_name =
|
||||||
strings::StrCat("/job:localhost/replica:0/task:0/gpu:", i);
|
strings::StrCat("/job:localhost/replica:0/task:0/device:GPU:", i);
|
||||||
VLOG(1) << "Adding GPU device " << device_name;
|
VLOG(1) << "Adding GPU device " << device_name;
|
||||||
devices_[device_name] = GetLocalGPUInfo(i);
|
devices_[device_name] = GetLocalGPUInfo(i);
|
||||||
}
|
}
|
||||||
|
@ -42,7 +42,7 @@ class AnalyticalCostEstimatorTest : public ::testing::Test {
|
|||||||
gpu_device.set_frequency(1100);
|
gpu_device.set_frequency(1100);
|
||||||
gpu_device.set_bandwidth(180 * 1024 * 1024);
|
gpu_device.set_bandwidth(180 * 1024 * 1024);
|
||||||
(*gpu_device.mutable_environment())["architecture"] = "6";
|
(*gpu_device.mutable_environment())["architecture"] = "6";
|
||||||
devices["/job:localhost/replica:0/task:0/gpu:0"] = gpu_device;
|
devices["/job:localhost/replica:0/task:0/device:GPU:0"] = gpu_device;
|
||||||
|
|
||||||
cluster_.reset(new VirtualCluster(devices));
|
cluster_.reset(new VirtualCluster(devices));
|
||||||
}
|
}
|
||||||
|
@ -30,14 +30,14 @@ TEST(VirtualPlacerTest, LocalDevices) {
|
|||||||
devices["/job:localhost/replica:0/task:0/cpu:0"] = cpu_device;
|
devices["/job:localhost/replica:0/task:0/cpu:0"] = cpu_device;
|
||||||
DeviceProperties gpu_device;
|
DeviceProperties gpu_device;
|
||||||
gpu_device.set_type("GPU");
|
gpu_device.set_type("GPU");
|
||||||
devices["/job:localhost/replica:0/task:0/gpu:0"] = gpu_device;
|
devices["/job:localhost/replica:0/task:0/device:GPU:0"] = gpu_device;
|
||||||
VirtualCluster cluster(devices);
|
VirtualCluster cluster(devices);
|
||||||
VirtualPlacer placer(&cluster);
|
VirtualPlacer placer(&cluster);
|
||||||
|
|
||||||
NodeDef node;
|
NodeDef node;
|
||||||
node.set_op("Conv2D");
|
node.set_op("Conv2D");
|
||||||
EXPECT_EQ("GPU", placer.get_device(node).type());
|
EXPECT_EQ("GPU", placer.get_device(node).type());
|
||||||
EXPECT_EQ("/job:localhost/replica:0/task:0/gpu:0",
|
EXPECT_EQ("/job:localhost/replica:0/task:0/device:GPU:0",
|
||||||
placer.get_canonical_device_name(node));
|
placer.get_canonical_device_name(node));
|
||||||
|
|
||||||
node.set_device("CPU");
|
node.set_device("CPU");
|
||||||
@ -47,7 +47,7 @@ TEST(VirtualPlacerTest, LocalDevices) {
|
|||||||
|
|
||||||
node.set_device("GPU:0");
|
node.set_device("GPU:0");
|
||||||
EXPECT_EQ("GPU", placer.get_device(node).type());
|
EXPECT_EQ("GPU", placer.get_device(node).type());
|
||||||
EXPECT_EQ("/job:localhost/replica:0/task:0/gpu:0",
|
EXPECT_EQ("/job:localhost/replica:0/task:0/device:GPU:0",
|
||||||
placer.get_canonical_device_name(node));
|
placer.get_canonical_device_name(node));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -60,7 +60,7 @@ TEST(VirtualPlacerTest, EmptyJobBecomesLocalhost) {
|
|||||||
devices["/job:localhost/replica:0/task:0/cpu:0"] = cpu_device;
|
devices["/job:localhost/replica:0/task:0/cpu:0"] = cpu_device;
|
||||||
DeviceProperties gpu_device;
|
DeviceProperties gpu_device;
|
||||||
gpu_device.set_type("GPU");
|
gpu_device.set_type("GPU");
|
||||||
devices["/job:localhost/replica:0/task:0/gpu:0"] = gpu_device;
|
devices["/job:localhost/replica:0/task:0/device:GPU:0"] = gpu_device;
|
||||||
VirtualCluster cluster(devices);
|
VirtualCluster cluster(devices);
|
||||||
VirtualPlacer placer(&cluster);
|
VirtualPlacer placer(&cluster);
|
||||||
|
|
||||||
@ -70,7 +70,7 @@ TEST(VirtualPlacerTest, EmptyJobBecomesLocalhost) {
|
|||||||
EXPECT_EQ("/job:localhost/replica:0/task:0/cpu:0",
|
EXPECT_EQ("/job:localhost/replica:0/task:0/cpu:0",
|
||||||
placer.get_canonical_device_name(node));
|
placer.get_canonical_device_name(node));
|
||||||
node.set_device("/device:GPU:0");
|
node.set_device("/device:GPU:0");
|
||||||
EXPECT_EQ("/job:localhost/replica:0/task:0/gpu:0",
|
EXPECT_EQ("/job:localhost/replica:0/task:0/device:GPU:0",
|
||||||
placer.get_canonical_device_name(node));
|
placer.get_canonical_device_name(node));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -113,7 +113,7 @@ TEST(VirtualPlacerTest, RemoteDevices) {
|
|||||||
devices["/job:my_job/replica:0/task:0/cpu:0"] = cpu_device;
|
devices["/job:my_job/replica:0/task:0/cpu:0"] = cpu_device;
|
||||||
DeviceProperties gpu_device;
|
DeviceProperties gpu_device;
|
||||||
gpu_device.set_type("GPU");
|
gpu_device.set_type("GPU");
|
||||||
devices["/job:my_job/replica:0/task:0/gpu:0"] = gpu_device;
|
devices["/job:my_job/replica:0/task:0/device:GPU:0"] = gpu_device;
|
||||||
VirtualCluster cluster(devices);
|
VirtualCluster cluster(devices);
|
||||||
VirtualPlacer placer(&cluster);
|
VirtualPlacer placer(&cluster);
|
||||||
|
|
||||||
@ -122,7 +122,7 @@ TEST(VirtualPlacerTest, RemoteDevices) {
|
|||||||
|
|
||||||
// Device falls back to GPU.
|
// Device falls back to GPU.
|
||||||
EXPECT_EQ("GPU", placer.get_device(node).type());
|
EXPECT_EQ("GPU", placer.get_device(node).type());
|
||||||
EXPECT_EQ("/job:my_job/replica:0/task:0/gpu:0",
|
EXPECT_EQ("/job:my_job/replica:0/task:0/device:GPU:0",
|
||||||
placer.get_canonical_device_name(node));
|
placer.get_canonical_device_name(node));
|
||||||
|
|
||||||
node.set_device("/job:my_job/replica:0/task:0/cpu:0");
|
node.set_device("/job:my_job/replica:0/task:0/cpu:0");
|
||||||
@ -130,27 +130,27 @@ TEST(VirtualPlacerTest, RemoteDevices) {
|
|||||||
EXPECT_EQ("/job:my_job/replica:0/task:0/cpu:0",
|
EXPECT_EQ("/job:my_job/replica:0/task:0/cpu:0",
|
||||||
placer.get_canonical_device_name(node));
|
placer.get_canonical_device_name(node));
|
||||||
|
|
||||||
node.set_device("/job:my_job/replica:0/task:0/gpu:0");
|
node.set_device("/job:my_job/replica:0/task:0/device:GPU:0");
|
||||||
EXPECT_EQ("GPU", placer.get_device(node).type());
|
EXPECT_EQ("GPU", placer.get_device(node).type());
|
||||||
EXPECT_EQ("/job:my_job/replica:0/task:0/gpu:0",
|
EXPECT_EQ("/job:my_job/replica:0/task:0/device:GPU:0",
|
||||||
placer.get_canonical_device_name(node));
|
placer.get_canonical_device_name(node));
|
||||||
|
|
||||||
// There is no local cpu available. Device falls back to GPU.
|
// There is no local cpu available. Device falls back to GPU.
|
||||||
node.set_device("CPU");
|
node.set_device("CPU");
|
||||||
EXPECT_EQ("GPU", placer.get_device(node).type());
|
EXPECT_EQ("GPU", placer.get_device(node).type());
|
||||||
EXPECT_EQ("/job:my_job/replica:0/task:0/gpu:0",
|
EXPECT_EQ("/job:my_job/replica:0/task:0/device:GPU:0",
|
||||||
placer.get_canonical_device_name(node));
|
placer.get_canonical_device_name(node));
|
||||||
|
|
||||||
node.set_device("GPU:0");
|
node.set_device("GPU:0");
|
||||||
// There is no local GPU available. Fall back to default GPU.
|
// There is no local GPU available. Fall back to default GPU.
|
||||||
EXPECT_EQ("GPU", placer.get_device(node).type());
|
EXPECT_EQ("GPU", placer.get_device(node).type());
|
||||||
EXPECT_EQ("/job:my_job/replica:0/task:0/gpu:0",
|
EXPECT_EQ("/job:my_job/replica:0/task:0/device:GPU:0",
|
||||||
placer.get_canonical_device_name(node));
|
placer.get_canonical_device_name(node));
|
||||||
|
|
||||||
// This isn't a valid name. Fall back to GPU.
|
// This isn't a valid name. Fall back to GPU.
|
||||||
node.set_device("/job:my_job/replica:0/task:0");
|
node.set_device("/job:my_job/replica:0/task:0");
|
||||||
EXPECT_EQ("GPU", placer.get_device(node).type());
|
EXPECT_EQ("GPU", placer.get_device(node).type());
|
||||||
EXPECT_EQ("/job:my_job/replica:0/task:0/gpu:0",
|
EXPECT_EQ("/job:my_job/replica:0/task:0/device:GPU:0",
|
||||||
placer.get_canonical_device_name(node));
|
placer.get_canonical_device_name(node));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -320,14 +320,14 @@ TEST_F(ModelPrunerTest, PruningPerservesCrossDeviceIdentity) {
|
|||||||
Output c = ops::Const(s.WithOpName("c").WithDevice("/cpu:0"), 0.0f, {10, 10});
|
Output c = ops::Const(s.WithOpName("c").WithDevice("/cpu:0"), 0.0f, {10, 10});
|
||||||
|
|
||||||
// Node i1 should be preserved.
|
// Node i1 should be preserved.
|
||||||
Output i1 = ops::Identity(s.WithOpName("i1").WithDevice("/gpu:0"), c);
|
Output i1 = ops::Identity(s.WithOpName("i1").WithDevice("/device:GPU:0"), c);
|
||||||
Output a1 = ops::Sqrt(s.WithOpName("a1").WithDevice("/gpu:0"), {i1});
|
Output a1 = ops::Sqrt(s.WithOpName("a1").WithDevice("/device:GPU:0"), {i1});
|
||||||
Output a2 = ops::Sqrt(s.WithOpName("a2").WithDevice("/gpu:0"), {i1});
|
Output a2 = ops::Sqrt(s.WithOpName("a2").WithDevice("/device:GPU:0"), {i1});
|
||||||
|
|
||||||
// Node i2 should be pruned since it resides on the sender's device.
|
// Node i2 should be pruned since it resides on the sender's device.
|
||||||
Output i2 = ops::Identity(s.WithOpName("i2").WithDevice("/cpu:0"), c);
|
Output i2 = ops::Identity(s.WithOpName("i2").WithDevice("/cpu:0"), c);
|
||||||
Output a3 = ops::Sqrt(s.WithOpName("a3").WithDevice("/gpu:0"), {i2});
|
Output a3 = ops::Sqrt(s.WithOpName("a3").WithDevice("/device:GPU:0"), {i2});
|
||||||
Output a4 = ops::Sqrt(s.WithOpName("a4").WithDevice("/gpu:0"), {i2});
|
Output a4 = ops::Sqrt(s.WithOpName("a4").WithDevice("/device:GPU:0"), {i2});
|
||||||
|
|
||||||
GrapplerItem item;
|
GrapplerItem item;
|
||||||
TF_CHECK_OK(s.ToGraphDef(&item.graph));
|
TF_CHECK_OK(s.ToGraphDef(&item.graph));
|
||||||
|
@ -579,8 +579,8 @@ Status GPUTracerImpl::Collect(StepStatsCollector *collector) {
|
|||||||
// TODO(pbar) Handle device IDs and prefix properly.
|
// TODO(pbar) Handle device IDs and prefix properly.
|
||||||
const string prefix = "";
|
const string prefix = "";
|
||||||
const int id = 0;
|
const int id = 0;
|
||||||
const string stream_device = strings::StrCat(prefix, "/gpu:", id, "/stream:");
|
const string stream_device = strings::StrCat(prefix, "/device:GPU:", id, "/stream:");
|
||||||
const string memcpy_device = strings::StrCat(prefix, "/gpu:", id, "/memcpy");
|
const string memcpy_device = strings::StrCat(prefix, "/device:GPU:", id, "/memcpy");
|
||||||
|
|
||||||
mutex_lock l2(trace_mu_);
|
mutex_lock l2(trace_mu_);
|
||||||
for (const auto &rec : kernel_records_) {
|
for (const auto &rec : kernel_records_) {
|
||||||
|
@ -63,12 +63,12 @@ class GPUTracerTest : public ::testing::Test {
|
|||||||
Tensor x_tensor(DT_FLOAT, TensorShape({2, 1}));
|
Tensor x_tensor(DT_FLOAT, TensorShape({2, 1}));
|
||||||
test::FillValues<float>(&x_tensor, {1, 1});
|
test::FillValues<float>(&x_tensor, {1, 1});
|
||||||
Node* x = test::graph::Constant(&graph, x_tensor);
|
Node* x = test::graph::Constant(&graph, x_tensor);
|
||||||
x->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0");
|
x->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0");
|
||||||
x_ = x->name();
|
x_ = x->name();
|
||||||
|
|
||||||
// y = A * x
|
// y = A * x
|
||||||
Node* y = test::graph::Matmul(&graph, a, x, false, false);
|
Node* y = test::graph::Matmul(&graph, a, x, false, false);
|
||||||
y->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0");
|
y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0");
|
||||||
y_ = y->name();
|
y_ = y->name();
|
||||||
|
|
||||||
// Use an Identity op to force a memcpy to CPU and back to GPU.
|
// Use an Identity op to force a memcpy to CPU and back to GPU.
|
||||||
@ -77,7 +77,7 @@ class GPUTracerTest : public ::testing::Test {
|
|||||||
|
|
||||||
Node* y_neg = test::graph::Unary(&graph, "Neg", i);
|
Node* y_neg = test::graph::Unary(&graph, "Neg", i);
|
||||||
y_neg_ = y_neg->name();
|
y_neg_ = y_neg->name();
|
||||||
y_neg->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0");
|
y_neg->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0");
|
||||||
|
|
||||||
test::graph::ToGraphDef(&graph, &def_);
|
test::graph::ToGraphDef(&graph, &def_);
|
||||||
}
|
}
|
||||||
|
@ -127,10 +127,10 @@ tfprof> advise
|
|||||||
Not running under xxxx. Skip JobChecker.
|
Not running under xxxx. Skip JobChecker.
|
||||||
|
|
||||||
AcceleratorUtilizationChecker:
|
AcceleratorUtilizationChecker:
|
||||||
device: /job:worker/replica:0/task:0/gpu:0 low utilization: 0.03
|
device: /job:worker/replica:0/task:0/device:GPU:0 low utilization: 0.03
|
||||||
device: /job:worker/replica:0/task:0/gpu:1 low utilization: 0.08
|
device: /job:worker/replica:0/task:0/device:GPU:1 low utilization: 0.08
|
||||||
device: /job:worker/replica:0/task:0/gpu:2 low utilization: 0.04
|
device: /job:worker/replica:0/task:0/device:GPU:2 low utilization: 0.04
|
||||||
device: /job:worker/replica:0/task:0/gpu:3 low utilization: 0.21
|
device: /job:worker/replica:0/task:0/device:GPU:3 low utilization: 0.21
|
||||||
|
|
||||||
OperationChecker:
|
OperationChecker:
|
||||||
Found operation using NHWC data_format on GPU. Maybe NCHW is faster.
|
Found operation using NHWC data_format on GPU. Maybe NCHW is faster.
|
||||||
|
@ -31,10 +31,10 @@ tfprof --graph_path=graph.pbtxt \
|
|||||||
|
|
||||||
tfprof> advise
|
tfprof> advise
|
||||||
AcceleratorUtilizationChecker:
|
AcceleratorUtilizationChecker:
|
||||||
device: /job:worker/replica:0/task:0/gpu:0 low utilization: 0.03
|
device: /job:worker/replica:0/task:0/device:GPU:0 low utilization: 0.03
|
||||||
device: /job:worker/replica:0/task:0/gpu:1 low utilization: 0.08
|
device: /job:worker/replica:0/task:0/device:GPU:1 low utilization: 0.08
|
||||||
device: /job:worker/replica:0/task:0/gpu:2 low utilization: 0.04
|
device: /job:worker/replica:0/task:0/device:GPU:2 low utilization: 0.04
|
||||||
device: /job:worker/replica:0/task:0/gpu:3 low utilization: 0.21
|
device: /job:worker/replica:0/task:0/device:GPU:3 low utilization: 0.21
|
||||||
|
|
||||||
OperationChecker:
|
OperationChecker:
|
||||||
Found operation using NHWC data_format on GPU. Maybe NCHW is faster.
|
Found operation using NHWC data_format on GPU. Maybe NCHW is faster.
|
||||||
|
@ -134,7 +134,7 @@ AddN 50.10ms (17.33%, 1.34%), 5481
|
|||||||
tfprof> op -select micros,device -order_by micros
|
tfprof> op -select micros,device -order_by micros
|
||||||
node name | execution time | assigned devices
|
node name | execution time | assigned devices
|
||||||
SoftmaxCrossEntropyWithLogits 1.37sec (100.00%, 36.44%), /job:worker/replica:0/task:0/cpu:0
|
SoftmaxCrossEntropyWithLogits 1.37sec (100.00%, 36.44%), /job:worker/replica:0/task:0/cpu:0
|
||||||
MatMul 618.97ms (63.56%, 16.51%), |/job:worker/replica:0/task:0/cpu:0|/job:worker/replica:0/task:0/gpu:0|/job:worker/replica:0/task:0/gpu:1|/job:worker/replica:0/task:0/gpu:2|/job:worker/replica:0/task:0/gpu:3
|
MatMul 618.97ms (63.56%, 16.51%), |/job:worker/replica:0/task:0/cpu:0|/job:worker/replica:0/task:0/device:GPU:0|/job:worker/replica:0/task:0/device:GPU:1|/job:worker/replica:0/task:0/device:GPU:2|/job:worker/replica:0/task:0/device:GPU:3
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
@ -53,10 +53,10 @@ class TFProfAdvisorTest : public ::testing::Test {
|
|||||||
NodeExecStats node_stat;
|
NodeExecStats node_stat;
|
||||||
node_stat.set_all_start_micros(start_miros);
|
node_stat.set_all_start_micros(start_miros);
|
||||||
node_stat.set_op_end_rel_micros(end_rel_micros);
|
node_stat.set_op_end_rel_micros(end_rel_micros);
|
||||||
node->AddStepStat(step, "/job:localhost/replica:0/task:0/gpu:0", node_stat);
|
node->AddStepStat(step, "/job:localhost/replica:0/task:0/device:GPU:0", node_stat);
|
||||||
node->AddStepStat(step, "/job:localhost/replica:0/task:0/gpu:0:stream:all",
|
node->AddStepStat(step, "/job:localhost/replica:0/task:0/device:GPU:0:stream:all",
|
||||||
node_stat);
|
node_stat);
|
||||||
node->AddStepStat(step, "/job:localhost/replica:0/task:0/gpu:0:stream:0",
|
node->AddStepStat(step, "/job:localhost/replica:0/task:0/device:GPU:0:stream:0",
|
||||||
node_stat);
|
node_stat);
|
||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
|
@ -25,7 +25,7 @@ bool CountAsAcceleratorTime(const string& device) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool CountAsCPUTime(const string& device) {
|
bool CountAsCPUTime(const string& device) {
|
||||||
return RE2::FullMatch(device, ".*/(gpu|cpu|device:sycl):\\d+");
|
return RE2::FullMatch(device, ".*/(device:gpu|gpu|cpu|device:sycl):\\d+");
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsCanonicalDevice(const string& device) { return CountAsCPUTime(device); }
|
bool IsCanonicalDevice(const string& device) { return CountAsCPUTime(device); }
|
||||||
@ -143,7 +143,7 @@ void TFGraphNode::AddStepStat(int64 step, const string& device,
|
|||||||
|
|
||||||
// TODO(xpan): Make this more robust?
|
// TODO(xpan): Make this more robust?
|
||||||
// See run_metadata_test.py
|
// See run_metadata_test.py
|
||||||
// It can be /job:0/replica:0/xxxx/gpu:0, or simply /gpu:0.
|
// It can be /job:0/replica:0/xxxx/device:GPU:0, or simply /device:GPU:0.
|
||||||
// It can has some ad-hoc suffix, such as /stream:xx or /memcpy:xx.
|
// It can has some ad-hoc suffix, such as /stream:xx or /memcpy:xx.
|
||||||
if (IsCanonicalDevice(dev)) {
|
if (IsCanonicalDevice(dev)) {
|
||||||
if (!canonical_device_.empty()) {
|
if (!canonical_device_.empty()) {
|
||||||
|
@ -42,7 +42,7 @@ message GPUOptions {
|
|||||||
// A comma-separated list of GPU ids that determines the 'visible'
|
// A comma-separated list of GPU ids that determines the 'visible'
|
||||||
// to 'virtual' mapping of GPU devices. For example, if TensorFlow
|
// to 'virtual' mapping of GPU devices. For example, if TensorFlow
|
||||||
// can see 8 GPU devices in the process, and one wanted to map
|
// can see 8 GPU devices in the process, and one wanted to map
|
||||||
// visible GPU devices 5 and 3 as "/gpu:0", and "/gpu:1", then one
|
// visible GPU devices 5 and 3 as "/device:GPU:0", and "/device:GPU:1", then one
|
||||||
// would specify this field as "5,3". This field is similar in
|
// would specify this field as "5,3". This field is similar in
|
||||||
// spirit to the CUDA_VISIBLE_DEVICES environment variable, except
|
// spirit to the CUDA_VISIBLE_DEVICES environment variable, except
|
||||||
// it applies to the visible GPU devices in the process.
|
// it applies to the visible GPU devices in the process.
|
||||||
|
@ -76,21 +76,21 @@ TEST(DeviceNameUtilsTest, Basic) {
|
|||||||
DeviceNameUtils::ParsedName p;
|
DeviceNameUtils::ParsedName p;
|
||||||
EXPECT_FALSE(DeviceNameUtils::ParseFullName("foobar", &p));
|
EXPECT_FALSE(DeviceNameUtils::ParseFullName("foobar", &p));
|
||||||
EXPECT_FALSE(
|
EXPECT_FALSE(
|
||||||
DeviceNameUtils::ParseFullName("/job:123/replica:1/task:2/gpu:3", &p));
|
DeviceNameUtils::ParseFullName("/job:123/replica:1/task:2/device:GPU:3", &p));
|
||||||
EXPECT_FALSE(
|
EXPECT_FALSE(
|
||||||
DeviceNameUtils::ParseFullName("/job:123/replica:1/task:2/gpu:", &p));
|
DeviceNameUtils::ParseFullName("/job:123/replica:1/task:2/gpu:", &p));
|
||||||
EXPECT_FALSE(DeviceNameUtils::ParseFullName(
|
EXPECT_FALSE(DeviceNameUtils::ParseFullName(
|
||||||
"/job:123/replica:1/task:2/device:gpu:", &p));
|
"/job:123/replica:1/task:2/device:gpu:", &p));
|
||||||
EXPECT_FALSE(
|
EXPECT_FALSE(
|
||||||
DeviceNameUtils::ParseFullName("/job:foo/replica:-1/task:2/gpu:3", &p));
|
DeviceNameUtils::ParseFullName("/job:foo/replica:-1/task:2/device:GPU:3", &p));
|
||||||
EXPECT_FALSE(
|
EXPECT_FALSE(
|
||||||
DeviceNameUtils::ParseFullName("/job:foo/replica:1/task:-2/gpu:3", &p));
|
DeviceNameUtils::ParseFullName("/job:foo/replica:1/task:-2/device:GPU:3", &p));
|
||||||
EXPECT_FALSE(
|
EXPECT_FALSE(
|
||||||
DeviceNameUtils::ParseFullName("/job:foo/replica:1/task:2/bar:3", &p));
|
DeviceNameUtils::ParseFullName("/job:foo/replica:1/task:2/bar:3", &p));
|
||||||
EXPECT_FALSE(DeviceNameUtils::ParseFullName(
|
EXPECT_FALSE(DeviceNameUtils::ParseFullName(
|
||||||
"/job:foo/replica:1/task:2/gpu:3/extra", &p));
|
"/job:foo/replica:1/task:2/device:GPU:3/extra", &p));
|
||||||
EXPECT_TRUE(
|
EXPECT_TRUE(
|
||||||
DeviceNameUtils::ParseFullName("/job:foo/replica:1/task:2/gpu:3", &p));
|
DeviceNameUtils::ParseFullName("/job:foo/replica:1/task:2/device:GPU:3", &p));
|
||||||
EXPECT_TRUE(p.has_job);
|
EXPECT_TRUE(p.has_job);
|
||||||
EXPECT_TRUE(p.has_replica);
|
EXPECT_TRUE(p.has_replica);
|
||||||
EXPECT_TRUE(p.has_task);
|
EXPECT_TRUE(p.has_task);
|
||||||
@ -106,7 +106,7 @@ TEST(DeviceNameUtilsTest, Basic) {
|
|||||||
// Allow _ in job names.
|
// Allow _ in job names.
|
||||||
DeviceNameUtils::ParsedName p;
|
DeviceNameUtils::ParsedName p;
|
||||||
EXPECT_TRUE(DeviceNameUtils::ParseFullName(
|
EXPECT_TRUE(DeviceNameUtils::ParseFullName(
|
||||||
"/job:foo_bar/replica:1/task:2/gpu:3", &p));
|
"/job:foo_bar/replica:1/task:2/device:GPU:3", &p));
|
||||||
EXPECT_TRUE(p.has_job);
|
EXPECT_TRUE(p.has_job);
|
||||||
EXPECT_TRUE(p.has_replica);
|
EXPECT_TRUE(p.has_replica);
|
||||||
EXPECT_TRUE(p.has_task);
|
EXPECT_TRUE(p.has_task);
|
||||||
@ -193,7 +193,7 @@ TEST(DeviceNameUtilsTest, Basic) {
|
|||||||
}
|
}
|
||||||
{
|
{
|
||||||
DeviceNameUtils::ParsedName p;
|
DeviceNameUtils::ParsedName p;
|
||||||
EXPECT_TRUE(DeviceNameUtils::ParseFullName("/job:*/replica:4/gpu:5", &p));
|
EXPECT_TRUE(DeviceNameUtils::ParseFullName("/job:*/replica:4/device:GPU:5", &p));
|
||||||
EXPECT_FALSE(p.has_job);
|
EXPECT_FALSE(p.has_job);
|
||||||
EXPECT_TRUE(p.has_replica);
|
EXPECT_TRUE(p.has_replica);
|
||||||
EXPECT_FALSE(p.has_task);
|
EXPECT_FALSE(p.has_task);
|
||||||
@ -216,13 +216,13 @@ TEST(DeviceNameUtilsTest, Basic) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
EXPECT_TRUE(DeviceNameUtils::IsSameAddressSpace(
|
EXPECT_TRUE(DeviceNameUtils::IsSameAddressSpace(
|
||||||
"/job:foo/replica:1/task:2/cpu:3", "/job:foo/replica:1/task:2/gpu:4"));
|
"/job:foo/replica:1/task:2/cpu:3", "/job:foo/replica:1/task:2/device:GPU:4"));
|
||||||
EXPECT_FALSE(DeviceNameUtils::IsSameAddressSpace(
|
EXPECT_FALSE(DeviceNameUtils::IsSameAddressSpace(
|
||||||
"/job:foo/replica:1/task:2/cpu:3", "/job:foo/replica:1/task:3/gpu:4"));
|
"/job:foo/replica:1/task:2/cpu:3", "/job:foo/replica:1/task:3/device:GPU:4"));
|
||||||
EXPECT_FALSE(DeviceNameUtils::IsSameAddressSpace(
|
EXPECT_FALSE(DeviceNameUtils::IsSameAddressSpace(
|
||||||
"/job:foo/replica:1/task:2/cpu:3", "/job:foo/replica:10/task:2/gpu:4"));
|
"/job:foo/replica:1/task:2/cpu:3", "/job:foo/replica:10/task:2/device:GPU:4"));
|
||||||
EXPECT_FALSE(DeviceNameUtils::IsSameAddressSpace(
|
EXPECT_FALSE(DeviceNameUtils::IsSameAddressSpace(
|
||||||
"/job:foo/replica:1/task:2/cpu:3", "/job:bar/replica:1/task:2/gpu:4"));
|
"/job:foo/replica:1/task:2/cpu:3", "/job:bar/replica:1/task:2/device:GPU:4"));
|
||||||
|
|
||||||
EXPECT_EQ(DeviceNameUtils::LocalName("CPU", 1), "CPU:1");
|
EXPECT_EQ(DeviceNameUtils::LocalName("CPU", 1), "CPU:1");
|
||||||
EXPECT_EQ(DeviceNameUtils::LocalName("GPU", 2), "GPU:2");
|
EXPECT_EQ(DeviceNameUtils::LocalName("GPU", 2), "GPU:2");
|
||||||
@ -284,17 +284,17 @@ static bool IsCSHelper(StringPiece pattern, StringPiece actual) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(DeviceNameUtilsTest, IsCompleteSpecification) {
|
TEST(DeviceNameUtilsTest, IsCompleteSpecification) {
|
||||||
EXPECT_TRUE(IsCSHelper("/job:*", "/job:work/replica:1/task:2/gpu:3"));
|
EXPECT_TRUE(IsCSHelper("/job:*", "/job:work/replica:1/task:2/device:GPU:3"));
|
||||||
EXPECT_TRUE(
|
EXPECT_TRUE(
|
||||||
IsCSHelper("/job:*/replica:*", "/job:work/replica:1/task:2/gpu:3"));
|
IsCSHelper("/job:*/replica:*", "/job:work/replica:1/task:2/device:GPU:3"));
|
||||||
EXPECT_TRUE(IsCSHelper("/job:*/task:*", "/job:work/replica:1/task:2/gpu:3"));
|
EXPECT_TRUE(IsCSHelper("/job:*/task:*", "/job:work/replica:1/task:2/device:GPU:3"));
|
||||||
EXPECT_TRUE(IsCSHelper("/job:*/replica:*/task:*",
|
EXPECT_TRUE(IsCSHelper("/job:*/replica:*/task:*",
|
||||||
"/job:work/replica:1/task:2/gpu:3"));
|
"/job:work/replica:1/task:2/device:GPU:3"));
|
||||||
EXPECT_TRUE(
|
EXPECT_TRUE(
|
||||||
IsCSHelper("/job:*/replica:*/gpu:*", "/job:work/replica:1/task:2/gpu:3"));
|
IsCSHelper("/job:*/replica:*/gpu:*", "/job:work/replica:1/task:2/device:GPU:3"));
|
||||||
EXPECT_FALSE(IsCSHelper("/cpu:*", "/job:worker/replica:1/task:2/gpu:3"));
|
EXPECT_FALSE(IsCSHelper("/cpu:*", "/job:worker/replica:1/task:2/device:GPU:3"));
|
||||||
EXPECT_FALSE(IsCSHelper("/gpu:2", "/job:worker/replica:1/task:2/gpu:1"));
|
EXPECT_FALSE(IsCSHelper("/device:GPU:2", "/job:worker/replica:1/task:2/device:GPU:1"));
|
||||||
EXPECT_TRUE(IsCSHelper("/gpu:*", "/job:worker/replica:1/task:2/gpu:3"));
|
EXPECT_TRUE(IsCSHelper("/gpu:*", "/job:worker/replica:1/task:2/device:GPU:3"));
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool IsSpecHelper(StringPiece pattern, StringPiece actual) {
|
static bool IsSpecHelper(StringPiece pattern, StringPiece actual) {
|
||||||
@ -305,36 +305,36 @@ static bool IsSpecHelper(StringPiece pattern, StringPiece actual) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(DeviceNameUtilsTest, IsSpecification) {
|
TEST(DeviceNameUtilsTest, IsSpecification) {
|
||||||
EXPECT_TRUE(IsSpecHelper("/job:*", "/job:work/replica:1/task:2/gpu:3"));
|
EXPECT_TRUE(IsSpecHelper("/job:*", "/job:work/replica:1/task:2/device:GPU:3"));
|
||||||
EXPECT_TRUE(IsSpecHelper("/job:*", "/job:work/replica:1/gpu:3"));
|
EXPECT_TRUE(IsSpecHelper("/job:*", "/job:work/replica:1/device:GPU:3"));
|
||||||
EXPECT_TRUE(IsSpecHelper("/job:*", "/job:work/replica:1"));
|
EXPECT_TRUE(IsSpecHelper("/job:*", "/job:work/replica:1"));
|
||||||
EXPECT_TRUE(IsSpecHelper("/job:*", "/replica:1"));
|
EXPECT_TRUE(IsSpecHelper("/job:*", "/replica:1"));
|
||||||
EXPECT_TRUE(IsSpecHelper("/job:*", "/job:work"));
|
EXPECT_TRUE(IsSpecHelper("/job:*", "/job:work"));
|
||||||
EXPECT_TRUE(
|
EXPECT_TRUE(
|
||||||
IsSpecHelper("/job:*/replica:*", "/job:work/replica:1/task:2/gpu:3"));
|
IsSpecHelper("/job:*/replica:*", "/job:work/replica:1/task:2/device:GPU:3"));
|
||||||
EXPECT_TRUE(IsSpecHelper("/job:work/replica:1/gpu:*",
|
EXPECT_TRUE(IsSpecHelper("/job:work/replica:1/gpu:*",
|
||||||
"/job:work/replica:1/task:2/gpu:3"));
|
"/job:work/replica:1/task:2/device:GPU:3"));
|
||||||
EXPECT_TRUE(IsSpecHelper("/job:work/replica:1/gpu:3",
|
EXPECT_TRUE(IsSpecHelper("/job:work/replica:1/device:GPU:3",
|
||||||
"/job:work/replica:1/task:2/gpu:3"));
|
"/job:work/replica:1/task:2/device:GPU:3"));
|
||||||
EXPECT_TRUE(IsSpecHelper("/job:work/replica:1/task:2",
|
EXPECT_TRUE(IsSpecHelper("/job:work/replica:1/task:2",
|
||||||
"/job:work/replica:1/task:2/gpu:3"));
|
"/job:work/replica:1/task:2/device:GPU:3"));
|
||||||
EXPECT_TRUE(IsSpecHelper("/job:work/replica:*/task:2",
|
EXPECT_TRUE(IsSpecHelper("/job:work/replica:*/task:2",
|
||||||
"/job:work/replica:1/task:2/gpu:3"));
|
"/job:work/replica:1/task:2/device:GPU:3"));
|
||||||
EXPECT_TRUE(IsSpecHelper("/task:*", "/job:*/replica:1/task:2/gpu:3"));
|
EXPECT_TRUE(IsSpecHelper("/task:*", "/job:*/replica:1/task:2/device:GPU:3"));
|
||||||
EXPECT_TRUE(IsSpecHelper("/task:2", "/job:*/replica:1/task:2/gpu:3"));
|
EXPECT_TRUE(IsSpecHelper("/task:2", "/job:*/replica:1/task:2/device:GPU:3"));
|
||||||
EXPECT_TRUE(IsSpecHelper("/cpu:*", "/job:*/replica:1/task:2/cpu:1"));
|
EXPECT_TRUE(IsSpecHelper("/cpu:*", "/job:*/replica:1/task:2/cpu:1"));
|
||||||
EXPECT_TRUE(IsSpecHelper("/cpu:0", "/cpu:0"));
|
EXPECT_TRUE(IsSpecHelper("/cpu:0", "/cpu:0"));
|
||||||
EXPECT_TRUE(IsSpecHelper("/gpu:*", "/job:worker/replica:1/task:2/gpu:3"));
|
EXPECT_TRUE(IsSpecHelper("/gpu:*", "/job:worker/replica:1/task:2/device:GPU:3"));
|
||||||
|
|
||||||
EXPECT_FALSE(IsSpecHelper("/job:worker/replica:1/task:2/gpu:3", "/gpu:*"));
|
EXPECT_FALSE(IsSpecHelper("/job:worker/replica:1/task:2/device:GPU:3", "/gpu:*"));
|
||||||
EXPECT_FALSE(IsSpecHelper("/cpu:*", "/job:*/replica:1/task:2"));
|
EXPECT_FALSE(IsSpecHelper("/cpu:*", "/job:*/replica:1/task:2"));
|
||||||
EXPECT_FALSE(IsSpecHelper("/cpu:*", "/job:*/replica:1/task:2/gpu:1"));
|
EXPECT_FALSE(IsSpecHelper("/cpu:*", "/job:*/replica:1/task:2/device:GPU:1"));
|
||||||
EXPECT_FALSE(IsSpecHelper("/cpu:*", "/job:worker/replica:1/task:2/gpu:3"));
|
EXPECT_FALSE(IsSpecHelper("/cpu:*", "/job:worker/replica:1/task:2/device:GPU:3"));
|
||||||
EXPECT_FALSE(IsSpecHelper("/gpu:2", "/job:worker/replica:1/task:2/gpu:1"));
|
EXPECT_FALSE(IsSpecHelper("/device:GPU:2", "/job:worker/replica:1/task:2/device:GPU:1"));
|
||||||
EXPECT_FALSE(IsSpecHelper("/job:work/replica:*/task:0",
|
EXPECT_FALSE(IsSpecHelper("/job:work/replica:*/task:0",
|
||||||
"/job:work/replica:1/task:2/gpu:3"));
|
"/job:work/replica:1/task:2/device:GPU:3"));
|
||||||
EXPECT_FALSE(IsSpecHelper("/job:work/replica:0/task:2",
|
EXPECT_FALSE(IsSpecHelper("/job:work/replica:0/task:2",
|
||||||
"/job:work/replica:*/task:2/gpu:3"));
|
"/job:work/replica:*/task:2/device:GPU:3"));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(DeviceNameUtilsTest, SplitDeviceName) {
|
TEST(DeviceNameUtilsTest, SplitDeviceName) {
|
||||||
@ -348,7 +348,7 @@ TEST(DeviceNameUtilsTest, SplitDeviceName) {
|
|||||||
"/job:foo/cpu:1/task:2/replica:1", &task, &device));
|
"/job:foo/cpu:1/task:2/replica:1", &task, &device));
|
||||||
EXPECT_EQ("/job:foo/replica:1/task:2", task);
|
EXPECT_EQ("/job:foo/replica:1/task:2", task);
|
||||||
EXPECT_EQ("CPU:1", device);
|
EXPECT_EQ("CPU:1", device);
|
||||||
EXPECT_TRUE(DeviceNameUtils::SplitDeviceName("/gpu:3", &task, &device));
|
EXPECT_TRUE(DeviceNameUtils::SplitDeviceName("/device:GPU:3", &task, &device));
|
||||||
EXPECT_EQ("", task);
|
EXPECT_EQ("", task);
|
||||||
EXPECT_EQ("GPU:3", device);
|
EXPECT_EQ("GPU:3", device);
|
||||||
EXPECT_FALSE(DeviceNameUtils::SplitDeviceName("gpu:3", &task, &device));
|
EXPECT_FALSE(DeviceNameUtils::SplitDeviceName("gpu:3", &task, &device));
|
||||||
@ -413,11 +413,11 @@ TEST(DeviceNameUtilsTest, MergeDevNames) {
|
|||||||
MergeDevNamesHelper("", "/job:foo", "/job:foo");
|
MergeDevNamesHelper("", "/job:foo", "/job:foo");
|
||||||
MergeDevNamesHelper("", "/replica:2", "/replica:2");
|
MergeDevNamesHelper("", "/replica:2", "/replica:2");
|
||||||
MergeDevNamesHelper("", "/task:7", "/task:7");
|
MergeDevNamesHelper("", "/task:7", "/task:7");
|
||||||
// MergeDevNamesHelper("", "/gpu:1", "/gpu:1");
|
// MergeDevNamesHelper("", "/device:GPU:1", "/device:GPU:1");
|
||||||
|
|
||||||
// Combining disjoint names.
|
// Combining disjoint names.
|
||||||
MergeDevNamesHelper("/job:foo", "/task:7", "/job:foo/task:7");
|
MergeDevNamesHelper("/job:foo", "/task:7", "/job:foo/task:7");
|
||||||
MergeDevNamesHelper("/job:foo", "/gpu:1", "/job:foo/gpu:1");
|
MergeDevNamesHelper("/job:foo", "/device:GPU:1", "/job:foo/device:GPU:1");
|
||||||
|
|
||||||
// Combining overlapping names.
|
// Combining overlapping names.
|
||||||
MergeDevNamesHelper("/job:foo/replica:0", "/replica:0/task:1",
|
MergeDevNamesHelper("/job:foo/replica:0", "/replica:0/task:1",
|
||||||
@ -426,25 +426,25 @@ TEST(DeviceNameUtilsTest, MergeDevNames) {
|
|||||||
// Wildcard tests.
|
// Wildcard tests.
|
||||||
MergeDevNamesHelper("", "/gpu:*", "/gpu:*");
|
MergeDevNamesHelper("", "/gpu:*", "/gpu:*");
|
||||||
MergeDevNamesHelper("/gpu:*", "/gpu:*", "/gpu:*");
|
MergeDevNamesHelper("/gpu:*", "/gpu:*", "/gpu:*");
|
||||||
MergeDevNamesHelper("/gpu:1", "/gpu:*", "/gpu:1");
|
MergeDevNamesHelper("/device:GPU:1", "/gpu:*", "/device:GPU:1");
|
||||||
|
|
||||||
// Incompatible components.
|
// Incompatible components.
|
||||||
MergeDevNamesError("/job:foo", "/job:bar", "incompatible jobs");
|
MergeDevNamesError("/job:foo", "/job:bar", "incompatible jobs");
|
||||||
MergeDevNamesError("/replica:0", "/replica:1", "incompatible replicas");
|
MergeDevNamesError("/replica:0", "/replica:1", "incompatible replicas");
|
||||||
MergeDevNamesError("/task:0", "/task:1", "incompatible tasks");
|
MergeDevNamesError("/task:0", "/task:1", "incompatible tasks");
|
||||||
MergeDevNamesError("/gpu:*", "/cpu:*", "incompatible types");
|
MergeDevNamesError("/gpu:*", "/cpu:*", "incompatible types");
|
||||||
MergeDevNamesError("/gpu:0", "/gpu:1", "incompatible ids");
|
MergeDevNamesError("/device:GPU:0", "/device:GPU:1", "incompatible ids");
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(DeviceNameUtilsTest, MergeDevNamesAllowSoftPlacement) {
|
TEST(DeviceNameUtilsTest, MergeDevNamesAllowSoftPlacement) {
|
||||||
// Incompatible components with allow_soft_placement.
|
// Incompatible components with allow_soft_placement.
|
||||||
MergeDevNamesHelperAllowSoftPlacement("/gpu:*", "/cpu:1", "");
|
MergeDevNamesHelperAllowSoftPlacement("/gpu:*", "/cpu:1", "");
|
||||||
MergeDevNamesHelperAllowSoftPlacement("/cpu:*", "/gpu:1", "");
|
MergeDevNamesHelperAllowSoftPlacement("/cpu:*", "/device:GPU:1", "");
|
||||||
MergeDevNamesHelperAllowSoftPlacement("/gpu:1", "/gpu:2", "/gpu:*");
|
MergeDevNamesHelperAllowSoftPlacement("/device:GPU:1", "/device:GPU:2", "/device:GPU:*");
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(DeviceNameUtilsTest, GetNamesForDeviceMappings) {
|
TEST(DeviceNameUtilsTest, GetNamesForDeviceMappings) {
|
||||||
DeviceNameUtils::ParsedName p = Name("/job:foo/replica:10/task:0/gpu:1");
|
DeviceNameUtils::ParsedName p = Name("/job:foo/replica:10/task:0/device:GPU:1");
|
||||||
EXPECT_EQ(str_util::Join(DeviceNameUtils::GetNamesForDeviceMappings(p), ","),
|
EXPECT_EQ(str_util::Join(DeviceNameUtils::GetNamesForDeviceMappings(p), ","),
|
||||||
"/job:foo/replica:10/task:0/device:GPU:1,"
|
"/job:foo/replica:10/task:0/device:GPU:1,"
|
||||||
"/job:foo/replica:10/task:0/gpu:1");
|
"/job:foo/replica:10/task:0/gpu:1");
|
||||||
|
@ -73,12 +73,12 @@ other wrappers and the dynamic decoder described below. For example, one can
|
|||||||
write:
|
write:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
cell = tf.contrib.rnn.DeviceWrapper(LSTMCell(512), "/gpu:0")
|
cell = tf.contrib.rnn.DeviceWrapper(LSTMCell(512), "/device:GPU:0")
|
||||||
attention_mechanism = tf.contrib.seq2seq.LuongAttention(512, encoder_outputs)
|
attention_mechanism = tf.contrib.seq2seq.LuongAttention(512, encoder_outputs)
|
||||||
attn_cell = tf.contrib.seq2seq.AttentionWrapper(
|
attn_cell = tf.contrib.seq2seq.AttentionWrapper(
|
||||||
cell, attention_mechanism, attention_size=256)
|
cell, attention_mechanism, attention_size=256)
|
||||||
attn_cell = tf.contrib.rnn.DeviceWrapper(attn_cell, "/gpu:1")
|
attn_cell = tf.contrib.rnn.DeviceWrapper(attn_cell, "/device:GPU:1")
|
||||||
top_cell = tf.contrib.rnn.DeviceWrapper(LSTMCell(512), "/gpu:1")
|
top_cell = tf.contrib.rnn.DeviceWrapper(LSTMCell(512), "/device:GPU:1")
|
||||||
multi_cell = MultiRNNCell([attn_cell, top_cell])
|
multi_cell = MultiRNNCell([attn_cell, top_cell])
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -110,7 +110,7 @@ devices. For example, the following snippet creates a variable named `v` and
|
|||||||
places it on the second GPU device:
|
places it on the second GPU device:
|
||||||
|
|
||||||
``` python
|
``` python
|
||||||
with tf.device("/gpu:1"):
|
with tf.device("/device:GPU:1"):
|
||||||
v = tf.get_variable("v", [1])
|
v = tf.get_variable("v", [1])
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -411,7 +411,7 @@ the first tower are prepended with `tower_0`, e.g. `tower_0/conv1/Conv2D`.
|
|||||||
|
|
||||||
* A preferred hardware device to run the operation within a tower.
|
* A preferred hardware device to run the operation within a tower.
|
||||||
@{tf.device} specifies this. For
|
@{tf.device} specifies this. For
|
||||||
instance, all operations in the first tower reside within `device('/gpu:0')`
|
instance, all operations in the first tower reside within `device('/device:GPU:0')`
|
||||||
scope indicating that they should be run on the first GPU.
|
scope indicating that they should be run on the first GPU.
|
||||||
|
|
||||||
All variables are pinned to the CPU and accessed via
|
All variables are pinned to the CPU and accessed via
|
||||||
|
@ -7,8 +7,8 @@ supported device types are `CPU` and `GPU`. They are represented as `strings`.
|
|||||||
For example:
|
For example:
|
||||||
|
|
||||||
* `"/cpu:0"`: The CPU of your machine.
|
* `"/cpu:0"`: The CPU of your machine.
|
||||||
* `"/gpu:0"`: The GPU of your machine, if you have one.
|
* `"/device:GPU:0"`: The GPU of your machine, if you have one.
|
||||||
* `"/gpu:1"`: The second GPU of your machine, etc.
|
* `"/device:GPU:1"`: The second GPU of your machine, etc.
|
||||||
|
|
||||||
If a TensorFlow operation has both CPU and GPU implementations, the GPU devices
|
If a TensorFlow operation has both CPU and GPU implementations, the GPU devices
|
||||||
will be given priority when the operation is assigned to a device. For example,
|
will be given priority when the operation is assigned to a device. For example,
|
||||||
@ -35,11 +35,11 @@ You should see the following output:
|
|||||||
|
|
||||||
```
|
```
|
||||||
Device mapping:
|
Device mapping:
|
||||||
/job:localhost/replica:0/task:0/gpu:0 -> device: 0, name: Tesla K40c, pci bus
|
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: Tesla K40c, pci bus
|
||||||
id: 0000:05:00.0
|
id: 0000:05:00.0
|
||||||
b: /job:localhost/replica:0/task:0/gpu:0
|
b: /job:localhost/replica:0/task:0/device:GPU:0
|
||||||
a: /job:localhost/replica:0/task:0/gpu:0
|
a: /job:localhost/replica:0/task:0/device:GPU:0
|
||||||
MatMul: /job:localhost/replica:0/task:0/gpu:0
|
MatMul: /job:localhost/replica:0/task:0/device:GPU:0
|
||||||
[[ 22. 28.]
|
[[ 22. 28.]
|
||||||
[ 49. 64.]]
|
[ 49. 64.]]
|
||||||
|
|
||||||
@ -71,11 +71,11 @@ example) and automatically copy tensors between devices if required.
|
|||||||
|
|
||||||
```
|
```
|
||||||
Device mapping:
|
Device mapping:
|
||||||
/job:localhost/replica:0/task:0/gpu:0 -> device: 0, name: Tesla K40c, pci bus
|
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: Tesla K40c, pci bus
|
||||||
id: 0000:05:00.0
|
id: 0000:05:00.0
|
||||||
b: /job:localhost/replica:0/task:0/cpu:0
|
b: /job:localhost/replica:0/task:0/cpu:0
|
||||||
a: /job:localhost/replica:0/task:0/cpu:0
|
a: /job:localhost/replica:0/task:0/cpu:0
|
||||||
MatMul: /job:localhost/replica:0/task:0/gpu:0
|
MatMul: /job:localhost/replica:0/task:0/device:GPU:0
|
||||||
[[ 22. 28.]
|
[[ 22. 28.]
|
||||||
[ 49. 64.]]
|
[ 49. 64.]]
|
||||||
```
|
```
|
||||||
@ -127,7 +127,7 @@ to specify the preference explicitly:
|
|||||||
|
|
||||||
```python
|
```python
|
||||||
# Creates a graph.
|
# Creates a graph.
|
||||||
with tf.device('/gpu:2'):
|
with tf.device('/device:GPU:2'):
|
||||||
a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
|
a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
|
||||||
b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
|
b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
|
||||||
c = tf.matmul(a, b)
|
c = tf.matmul(a, b)
|
||||||
@ -142,9 +142,9 @@ If the device you have specified does not exist, you will get
|
|||||||
|
|
||||||
```
|
```
|
||||||
InvalidArgumentError: Invalid argument: Cannot assign a device to node 'b':
|
InvalidArgumentError: Invalid argument: Cannot assign a device to node 'b':
|
||||||
Could not satisfy explicit device specification '/gpu:2'
|
Could not satisfy explicit device specification '/device:GPU:2'
|
||||||
[[Node: b = Const[dtype=DT_FLOAT, value=Tensor<type: float shape: [3,2]
|
[[Node: b = Const[dtype=DT_FLOAT, value=Tensor<type: float shape: [3,2]
|
||||||
values: 1 2 3...>, _device="/gpu:2"]()]]
|
values: 1 2 3...>, _device="/device:GPU:2"]()]]
|
||||||
```
|
```
|
||||||
|
|
||||||
If you would like TensorFlow to automatically choose an existing and supported
|
If you would like TensorFlow to automatically choose an existing and supported
|
||||||
@ -154,7 +154,7 @@ the session.
|
|||||||
|
|
||||||
```python
|
```python
|
||||||
# Creates a graph.
|
# Creates a graph.
|
||||||
with tf.device('/gpu:2'):
|
with tf.device('/device:GPU:2'):
|
||||||
a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
|
a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
|
||||||
b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
|
b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
|
||||||
c = tf.matmul(a, b)
|
c = tf.matmul(a, b)
|
||||||
@ -175,7 +175,7 @@ For example:
|
|||||||
```
|
```
|
||||||
# Creates a graph.
|
# Creates a graph.
|
||||||
c = []
|
c = []
|
||||||
for d in ['/gpu:2', '/gpu:3']:
|
for d in ['/device:GPU:2', '/device:GPU:3']:
|
||||||
with tf.device(d):
|
with tf.device(d):
|
||||||
a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3])
|
a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3])
|
||||||
b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2])
|
b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2])
|
||||||
@ -192,20 +192,20 @@ You will see the following output.
|
|||||||
|
|
||||||
```
|
```
|
||||||
Device mapping:
|
Device mapping:
|
||||||
/job:localhost/replica:0/task:0/gpu:0 -> device: 0, name: Tesla K20m, pci bus
|
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: Tesla K20m, pci bus
|
||||||
id: 0000:02:00.0
|
id: 0000:02:00.0
|
||||||
/job:localhost/replica:0/task:0/gpu:1 -> device: 1, name: Tesla K20m, pci bus
|
/job:localhost/replica:0/task:0/device:GPU:1 -> device: 1, name: Tesla K20m, pci bus
|
||||||
id: 0000:03:00.0
|
id: 0000:03:00.0
|
||||||
/job:localhost/replica:0/task:0/gpu:2 -> device: 2, name: Tesla K20m, pci bus
|
/job:localhost/replica:0/task:0/device:GPU:2 -> device: 2, name: Tesla K20m, pci bus
|
||||||
id: 0000:83:00.0
|
id: 0000:83:00.0
|
||||||
/job:localhost/replica:0/task:0/gpu:3 -> device: 3, name: Tesla K20m, pci bus
|
/job:localhost/replica:0/task:0/device:GPU:3 -> device: 3, name: Tesla K20m, pci bus
|
||||||
id: 0000:84:00.0
|
id: 0000:84:00.0
|
||||||
Const_3: /job:localhost/replica:0/task:0/gpu:3
|
Const_3: /job:localhost/replica:0/task:0/device:GPU:3
|
||||||
Const_2: /job:localhost/replica:0/task:0/gpu:3
|
Const_2: /job:localhost/replica:0/task:0/device:GPU:3
|
||||||
MatMul_1: /job:localhost/replica:0/task:0/gpu:3
|
MatMul_1: /job:localhost/replica:0/task:0/device:GPU:3
|
||||||
Const_1: /job:localhost/replica:0/task:0/gpu:2
|
Const_1: /job:localhost/replica:0/task:0/device:GPU:2
|
||||||
Const: /job:localhost/replica:0/task:0/gpu:2
|
Const: /job:localhost/replica:0/task:0/device:GPU:2
|
||||||
MatMul: /job:localhost/replica:0/task:0/gpu:2
|
MatMul: /job:localhost/replica:0/task:0/device:GPU:2
|
||||||
AddN: /job:localhost/replica:0/task:0/cpu:0
|
AddN: /job:localhost/replica:0/task:0/cpu:0
|
||||||
[[ 44. 56.]
|
[[ 44. 56.]
|
||||||
[ 98. 128.]]
|
[ 98. 128.]]
|
||||||
|
@ -47,12 +47,12 @@ def my_model(features, labels, mode):
|
|||||||
# Create three fully connected layers respectively of size 10, 20, and 10 with
|
# Create three fully connected layers respectively of size 10, 20, and 10 with
|
||||||
# each layer having a dropout probability of 0.1.
|
# each layer having a dropout probability of 0.1.
|
||||||
net = features[X_FEATURE]
|
net = features[X_FEATURE]
|
||||||
with tf.device('/gpu:1'):
|
with tf.device('/device:GPU:1'):
|
||||||
for units in [10, 20, 10]:
|
for units in [10, 20, 10]:
|
||||||
net = tf.layers.dense(net, units=units, activation=tf.nn.relu)
|
net = tf.layers.dense(net, units=units, activation=tf.nn.relu)
|
||||||
net = tf.layers.dropout(net, rate=0.1)
|
net = tf.layers.dropout(net, rate=0.1)
|
||||||
|
|
||||||
with tf.device('/gpu:2'):
|
with tf.device('/device:GPU:2'):
|
||||||
# Compute logits (1 per class).
|
# Compute logits (1 per class).
|
||||||
logits = tf.layers.dense(net, 3, activation=None)
|
logits = tf.layers.dense(net, 3, activation=None)
|
||||||
|
|
||||||
|
@ -173,7 +173,7 @@ class SessionClusterSpecPropagationTest(test_util.TensorFlowTestCase):
|
|||||||
#
|
#
|
||||||
# W0718 17:14:41.521534 190121 device_mgr.cc:107] Unknown device:
|
# W0718 17:14:41.521534 190121 device_mgr.cc:107] Unknown device:
|
||||||
# /job:worker/replica:0/task:0/device:CPU:0 all devices:
|
# /job:worker/replica:0/task:0/device:CPU:0 all devices:
|
||||||
# /job:local/replica:0/task:0/gpu:0,
|
# /job:local/replica:0/task:0/device:GPU:0,
|
||||||
# /job:local/replica:0/task:0/device:GPU:0,
|
# /job:local/replica:0/task:0/device:GPU:0,
|
||||||
# /job:local/replica:0/task:0/cpu:1, CPU:0, GPU:0,
|
# /job:local/replica:0/task:0/cpu:1, CPU:0, GPU:0,
|
||||||
# /job:local/replica:0/task:0/device:CPU:1,
|
# /job:local/replica:0/task:0/device:CPU:1,
|
||||||
@ -198,7 +198,7 @@ class SessionClusterSpecPropagationTest(test_util.TensorFlowTestCase):
|
|||||||
sum1 = input1 + input2
|
sum1 = input1 + input2
|
||||||
|
|
||||||
if test.is_gpu_available():
|
if test.is_gpu_available():
|
||||||
device_str = '/job:worker/task:0/gpu:0'
|
device_str = '/job:worker/task:0/device:GPU:0'
|
||||||
else:
|
else:
|
||||||
device_str = '/job:worker/task:0/cpu:1'
|
device_str = '/job:worker/task:0/cpu:1'
|
||||||
with ops.device(device_str):
|
with ops.device(device_str):
|
||||||
|
@ -1124,7 +1124,7 @@ class SessionTest(test_util.TensorFlowTestCase):
|
|||||||
# which is why placing this is invalid. If at some point
|
# which is why placing this is invalid. If at some point
|
||||||
# GPU kernels are added to this test, some other different
|
# GPU kernels are added to this test, some other different
|
||||||
# op / device combo should be chosen.
|
# op / device combo should be chosen.
|
||||||
with ops.device('/gpu:0'):
|
with ops.device('/device:GPU:0'):
|
||||||
a = constant_op.constant(1.0, shape=[1, 2])
|
a = constant_op.constant(1.0, shape=[1, 2])
|
||||||
|
|
||||||
b = constant_op.constant(1.0, shape=[1, 2])
|
b = constant_op.constant(1.0, shape=[1, 2])
|
||||||
@ -1145,7 +1145,7 @@ class SessionTest(test_util.TensorFlowTestCase):
|
|||||||
# which is why placing this is invalid. If at some point
|
# which is why placing this is invalid. If at some point
|
||||||
# GPU kernels are added to this test, some other different
|
# GPU kernels are added to this test, some other different
|
||||||
# op / device combo should be chosen.
|
# op / device combo should be chosen.
|
||||||
with ops.device('/gpu:0'):
|
with ops.device('/device:GPU:0'):
|
||||||
_ = constant_op.constant(1.0, shape=[1, 2])
|
_ = constant_op.constant(1.0, shape=[1, 2])
|
||||||
|
|
||||||
b = constant_op.constant(1.0, shape=[1, 2])
|
b = constant_op.constant(1.0, shape=[1, 2])
|
||||||
@ -1494,7 +1494,7 @@ class SessionTest(test_util.TensorFlowTestCase):
|
|||||||
allow_soft_placement=True,
|
allow_soft_placement=True,
|
||||||
graph_options=config_pb2.GraphOptions(build_cost_model=100))
|
graph_options=config_pb2.GraphOptions(build_cost_model=100))
|
||||||
with session.Session(config=config) as sess:
|
with session.Session(config=config) as sess:
|
||||||
with ops.device('/gpu:0'):
|
with ops.device('/device:GPU:0'):
|
||||||
a = array_ops.placeholder(dtypes.float32, shape=[])
|
a = array_ops.placeholder(dtypes.float32, shape=[])
|
||||||
b = math_ops.add(a, a)
|
b = math_ops.add(a, a)
|
||||||
c = array_ops.identity(b)
|
c = array_ops.identity(b)
|
||||||
|
@ -100,8 +100,8 @@ class TimelineTest(test.TestCase):
|
|||||||
self.assertTrue(run_metadata.HasField('step_stats'))
|
self.assertTrue(run_metadata.HasField('step_stats'))
|
||||||
step_stats = run_metadata.step_stats
|
step_stats = run_metadata.step_stats
|
||||||
devices = [d.device for d in step_stats.dev_stats]
|
devices = [d.device for d in step_stats.dev_stats]
|
||||||
self.assertTrue('/job:localhost/replica:0/task:0/gpu:0' in devices)
|
self.assertTrue('/job:localhost/replica:0/task:0/device:GPU:0' in devices)
|
||||||
self.assertTrue('/gpu:0/stream:all' in devices)
|
self.assertTrue('/device:GPU:0/stream:all' in devices)
|
||||||
tl = timeline.Timeline(step_stats)
|
tl = timeline.Timeline(step_stats)
|
||||||
ctf = tl.generate_chrome_trace_format()
|
ctf = tl.generate_chrome_trace_format()
|
||||||
self._validateTrace(ctf)
|
self._validateTrace(ctf)
|
||||||
|
@ -380,7 +380,8 @@ def device_path_to_device_name(device_dir):
|
|||||||
path_items = os.path.basename(device_dir)[
|
path_items = os.path.basename(device_dir)[
|
||||||
len(METADATA_FILE_PREFIX) + len(DEVICE_TAG):].split(",")
|
len(METADATA_FILE_PREFIX) + len(DEVICE_TAG):].split(",")
|
||||||
return "/".join([
|
return "/".join([
|
||||||
path_item.replace("_", ":", 1) for path_item in path_items])
|
path_item.replace("device_", "device:").replace("_", ":", 1)
|
||||||
|
for path_item in path_items])
|
||||||
|
|
||||||
|
|
||||||
class DebugTensorDatum(object):
|
class DebugTensorDatum(object):
|
||||||
|
@ -237,11 +237,11 @@ class DebugDumpDirTest(test_util.TensorFlowTestCase):
|
|||||||
gpu_0_dir = os.path.join(
|
gpu_0_dir = os.path.join(
|
||||||
self._dump_root,
|
self._dump_root,
|
||||||
debug_data.METADATA_FILE_PREFIX + debug_data.DEVICE_TAG +
|
debug_data.METADATA_FILE_PREFIX + debug_data.DEVICE_TAG +
|
||||||
",job_localhost,replica_0,task_0,gpu_0")
|
",job_localhost,replica_0,task_0,device_GPU_0")
|
||||||
gpu_1_dir = os.path.join(
|
gpu_1_dir = os.path.join(
|
||||||
self._dump_root,
|
self._dump_root,
|
||||||
debug_data.METADATA_FILE_PREFIX + debug_data.DEVICE_TAG +
|
debug_data.METADATA_FILE_PREFIX + debug_data.DEVICE_TAG +
|
||||||
",job_localhost,replica_0,task_0,gpu_1")
|
",job_localhost,replica_0,task_0,device_GPU_1")
|
||||||
os.makedirs(cpu_0_dir)
|
os.makedirs(cpu_0_dir)
|
||||||
os.makedirs(gpu_0_dir)
|
os.makedirs(gpu_0_dir)
|
||||||
os.makedirs(gpu_1_dir)
|
os.makedirs(gpu_1_dir)
|
||||||
@ -281,12 +281,12 @@ class DebugDumpDirTest(test_util.TensorFlowTestCase):
|
|||||||
node = graph_gpu_0.node.add()
|
node = graph_gpu_0.node.add()
|
||||||
node.name = "node_foo_1"
|
node.name = "node_foo_1"
|
||||||
node.op = "FooOp"
|
node.op = "FooOp"
|
||||||
node.device = "/job:localhost/replica:0/task:0/gpu:0"
|
node.device = "/job:localhost/replica:0/task:0/device:GPU:0"
|
||||||
graph_gpu_1 = graph_pb2.GraphDef()
|
graph_gpu_1 = graph_pb2.GraphDef()
|
||||||
node = graph_gpu_1.node.add()
|
node = graph_gpu_1.node.add()
|
||||||
node.name = "node_foo_1"
|
node.name = "node_foo_1"
|
||||||
node.op = "FooOp"
|
node.op = "FooOp"
|
||||||
node.device = "/job:localhost/replica:0/task:0/gpu:1"
|
node.device = "/job:localhost/replica:0/task:0/device:GPU:1"
|
||||||
|
|
||||||
dump_dir = debug_data.DebugDumpDir(
|
dump_dir = debug_data.DebugDumpDir(
|
||||||
self._dump_root,
|
self._dump_root,
|
||||||
@ -294,14 +294,14 @@ class DebugDumpDirTest(test_util.TensorFlowTestCase):
|
|||||||
|
|
||||||
self.assertItemsEqual(
|
self.assertItemsEqual(
|
||||||
["/job:localhost/replica:0/task:0/cpu:0",
|
["/job:localhost/replica:0/task:0/cpu:0",
|
||||||
"/job:localhost/replica:0/task:0/gpu:0",
|
"/job:localhost/replica:0/task:0/device:GPU:0",
|
||||||
"/job:localhost/replica:0/task:0/gpu:1"], dump_dir.devices())
|
"/job:localhost/replica:0/task:0/device:GPU:1"], dump_dir.devices())
|
||||||
self.assertEqual(1472563253536385, dump_dir.t0)
|
self.assertEqual(1472563253536385, dump_dir.t0)
|
||||||
self.assertEqual(3, dump_dir.size)
|
self.assertEqual(3, dump_dir.size)
|
||||||
|
|
||||||
with self.assertRaisesRegexp(
|
with self.assertRaisesRegexp(
|
||||||
ValueError, r"Invalid device name: "):
|
ValueError, r"Invalid device name: "):
|
||||||
dump_dir.nodes("/job:localhost/replica:0/task:0/gpu:2")
|
dump_dir.nodes("/job:localhost/replica:0/task:0/device:GPU:2")
|
||||||
self.assertItemsEqual(["node_foo_1", "node_foo_1", "node_foo_1"],
|
self.assertItemsEqual(["node_foo_1", "node_foo_1", "node_foo_1"],
|
||||||
dump_dir.nodes())
|
dump_dir.nodes())
|
||||||
self.assertItemsEqual(
|
self.assertItemsEqual(
|
||||||
@ -319,16 +319,16 @@ class DebugDumpDirTest(test_util.TensorFlowTestCase):
|
|||||||
node = graph_gpu_0.node.add()
|
node = graph_gpu_0.node.add()
|
||||||
node.name = "node_foo_1"
|
node.name = "node_foo_1"
|
||||||
node.op = "FooOp"
|
node.op = "FooOp"
|
||||||
node.device = "/job:localhost/replica:0/task:0/gpu:0"
|
node.device = "/job:localhost/replica:0/task:0/device:GPU:0"
|
||||||
graph_gpu_1 = graph_pb2.GraphDef()
|
graph_gpu_1 = graph_pb2.GraphDef()
|
||||||
node = graph_gpu_1.node.add()
|
node = graph_gpu_1.node.add()
|
||||||
node.name = "node_foo_1"
|
node.name = "node_foo_1"
|
||||||
node.op = "FooOp"
|
node.op = "FooOp"
|
||||||
node.device = "/job:localhost/replica:0/task:0/gpu:1"
|
node.device = "/job:localhost/replica:0/task:0/device:GPU:1"
|
||||||
node = graph_gpu_1.node.add() # Here is the duplicate.
|
node = graph_gpu_1.node.add() # Here is the duplicate.
|
||||||
node.name = "node_foo_1"
|
node.name = "node_foo_1"
|
||||||
node.op = "FooOp"
|
node.op = "FooOp"
|
||||||
node.device = "/job:localhost/replica:0/task:0/gpu:1"
|
node.device = "/job:localhost/replica:0/task:0/device:GPU:1"
|
||||||
|
|
||||||
with self.assertRaisesRegexp(
|
with self.assertRaisesRegexp(
|
||||||
ValueError, r"Duplicate node name on device "):
|
ValueError, r"Duplicate node name on device "):
|
||||||
|
@ -711,7 +711,7 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
|
|||||||
# Test node name list lookup of the DebugDumpDir object.
|
# Test node name list lookup of the DebugDumpDir object.
|
||||||
if test_util.gpu_device_name():
|
if test_util.gpu_device_name():
|
||||||
node_names = dump.nodes(
|
node_names = dump.nodes(
|
||||||
device_name="/job:localhost/replica:0/task:0/gpu:0")
|
device_name="/job:localhost/replica:0/task:0/device:GPU:0")
|
||||||
else:
|
else:
|
||||||
node_names = dump.nodes()
|
node_names = dump.nodes()
|
||||||
self.assertTrue(u_name in node_names)
|
self.assertTrue(u_name in node_names)
|
||||||
|
@ -402,7 +402,7 @@ class LocalCLIDebugWrapperSessionTest(test_util.TensorFlowTestCase):
|
|||||||
|
|
||||||
def testRuntimeErrorBeforeGraphExecutionIsRaised(self):
|
def testRuntimeErrorBeforeGraphExecutionIsRaised(self):
|
||||||
# Use an impossible device name to cause an error before graph execution.
|
# Use an impossible device name to cause an error before graph execution.
|
||||||
with ops.device("/gpu:1337"):
|
with ops.device("/device:GPU:1337"):
|
||||||
w = variables.Variable([1.0] * 10, name="w")
|
w = variables.Variable([1.0] * 10, name="w")
|
||||||
|
|
||||||
wrapped_sess = LocalCLIDebuggerWrapperSessionForTest(
|
wrapped_sess = LocalCLIDebuggerWrapperSessionForTest(
|
||||||
|
@ -79,17 +79,17 @@ class DeviceTest(test_util.TensorFlowTestCase):
|
|||||||
self.assertEquals("/replica:1/task:0/device:CPU:0", d.to_string())
|
self.assertEquals("/replica:1/task:0/device:CPU:0", d.to_string())
|
||||||
d.parse_from_string("/replica:1/task:0/device:CPU:0")
|
d.parse_from_string("/replica:1/task:0/device:CPU:0")
|
||||||
self.assertEquals("/replica:1/task:0/device:CPU:0", d.to_string())
|
self.assertEquals("/replica:1/task:0/device:CPU:0", d.to_string())
|
||||||
d.parse_from_string("/job:muu/gpu:2")
|
d.parse_from_string("/job:muu/device:GPU:2")
|
||||||
self.assertEquals("/job:muu/device:GPU:2", d.to_string())
|
self.assertEquals("/job:muu/device:GPU:2", d.to_string())
|
||||||
with self.assertRaises(Exception) as e:
|
with self.assertRaises(Exception) as e:
|
||||||
d.parse_from_string("/job:muu/gpu:2/cpu:0")
|
d.parse_from_string("/job:muu/device:GPU:2/cpu:0")
|
||||||
self.assertTrue("Cannot specify multiple device" in str(e.exception))
|
self.assertTrue("Cannot specify multiple device" in str(e.exception))
|
||||||
|
|
||||||
def testFromString(self):
|
def testFromString(self):
|
||||||
d = device.DeviceSpec.from_string("/job:foo/replica:0")
|
d = device.DeviceSpec.from_string("/job:foo/replica:0")
|
||||||
self.assertEquals("/job:foo/replica:0", d.to_string())
|
self.assertEquals("/job:foo/replica:0", d.to_string())
|
||||||
with self.assertRaises(Exception) as e:
|
with self.assertRaises(Exception) as e:
|
||||||
d = device.DeviceSpec.from_string("/job:muu/gpu:2/cpu:0")
|
d = device.DeviceSpec.from_string("/job:muu/device:GPU:2/cpu:0")
|
||||||
self.assertTrue("Cannot specify multiple device" in str(e.exception))
|
self.assertTrue("Cannot specify multiple device" in str(e.exception))
|
||||||
|
|
||||||
d = device.DeviceSpec.from_string("/job:foo/replica:0/task:3/cpu:*")
|
d = device.DeviceSpec.from_string("/job:foo/replica:0/task:3/cpu:*")
|
||||||
@ -102,13 +102,13 @@ class DeviceTest(test_util.TensorFlowTestCase):
|
|||||||
def testMerge(self):
|
def testMerge(self):
|
||||||
d = device.DeviceSpec.from_string("/job:foo/replica:0")
|
d = device.DeviceSpec.from_string("/job:foo/replica:0")
|
||||||
self.assertEquals("/job:foo/replica:0", d.to_string())
|
self.assertEquals("/job:foo/replica:0", d.to_string())
|
||||||
d.merge_from(device.DeviceSpec.from_string("/task:1/gpu:2"))
|
d.merge_from(device.DeviceSpec.from_string("/task:1/device:GPU:2"))
|
||||||
self.assertEquals("/job:foo/replica:0/task:1/device:GPU:2", d.to_string())
|
self.assertEquals("/job:foo/replica:0/task:1/device:GPU:2", d.to_string())
|
||||||
|
|
||||||
d = device.DeviceSpec()
|
d = device.DeviceSpec()
|
||||||
d.merge_from(device.DeviceSpec.from_string("/task:1/cpu:0"))
|
d.merge_from(device.DeviceSpec.from_string("/task:1/cpu:0"))
|
||||||
self.assertEquals("/task:1/device:CPU:0", d.to_string())
|
self.assertEquals("/task:1/device:CPU:0", d.to_string())
|
||||||
d.merge_from(device.DeviceSpec.from_string("/job:boo/gpu:0"))
|
d.merge_from(device.DeviceSpec.from_string("/job:boo/device:GPU:0"))
|
||||||
self.assertEquals("/job:boo/task:1/device:GPU:0", d.to_string())
|
self.assertEquals("/job:boo/task:1/device:GPU:0", d.to_string())
|
||||||
d.merge_from(device.DeviceSpec.from_string("/job:muu/cpu:2"))
|
d.merge_from(device.DeviceSpec.from_string("/job:muu/cpu:2"))
|
||||||
self.assertEquals("/job:muu/task:1/device:CPU:2", d.to_string())
|
self.assertEquals("/job:muu/task:1/device:CPU:2", d.to_string())
|
||||||
@ -134,10 +134,10 @@ class DeviceTest(test_util.TensorFlowTestCase):
|
|||||||
|
|
||||||
self.assertEqual("/job:foo/replica:0/task:0/device:GPU:0",
|
self.assertEqual("/job:foo/replica:0/task:0/device:GPU:0",
|
||||||
device.canonical_name(
|
device.canonical_name(
|
||||||
"/job:foo/replica:0/task:0/gpu:0"))
|
"/job:foo/replica:0/task:0/device:GPU:0"))
|
||||||
self.assertEqual("/job:foo/replica:0/task:0/device:GPU:0",
|
self.assertEqual("/job:foo/replica:0/task:0/device:GPU:0",
|
||||||
device.canonical_name(
|
device.canonical_name(
|
||||||
"/gpu:0/task:0/replica:0/job:foo"))
|
"/device:GPU:0/task:0/replica:0/job:foo"))
|
||||||
|
|
||||||
def testCheckValid(self):
|
def testCheckValid(self):
|
||||||
device.check_valid("/job:foo/replica:0")
|
device.check_valid("/job:foo/replica:0")
|
||||||
@ -155,7 +155,7 @@ class DeviceTest(test_util.TensorFlowTestCase):
|
|||||||
self.assertTrue("Unknown attribute: 'bar'" in str(e.exception))
|
self.assertTrue("Unknown attribute: 'bar'" in str(e.exception))
|
||||||
|
|
||||||
with self.assertRaises(Exception) as e:
|
with self.assertRaises(Exception) as e:
|
||||||
device.check_valid("/cpu:0/gpu:2")
|
device.check_valid("/cpu:0/device:GPU:2")
|
||||||
self.assertTrue("Cannot specify multiple device" in str(e.exception))
|
self.assertTrue("Cannot specify multiple device" in str(e.exception))
|
||||||
|
|
||||||
|
|
||||||
|
@ -505,7 +505,7 @@ class FunctionTest(test.TestCase):
|
|||||||
|
|
||||||
_ = PlusOne(1, name="p1")
|
_ = PlusOne(1, name="p1")
|
||||||
with self.assertRaisesRegexp(ValueError, "Unknown keyword arguments"):
|
with self.assertRaisesRegexp(ValueError, "Unknown keyword arguments"):
|
||||||
_ = PlusOne(1, device="/gpu:0")
|
_ = PlusOne(1, device="/device:GPU:0")
|
||||||
|
|
||||||
def testFunctionDecorator(self):
|
def testFunctionDecorator(self):
|
||||||
|
|
||||||
|
@ -106,9 +106,9 @@ class DeviceFunctionsTest(test.TestCase):
|
|||||||
var_0 = variables.Variable(0)
|
var_0 = variables.Variable(0)
|
||||||
with ops.device(test_device_func_pin_variable_to_cpu):
|
with ops.device(test_device_func_pin_variable_to_cpu):
|
||||||
var_1 = variables.Variable(1)
|
var_1 = variables.Variable(1)
|
||||||
with ops.device(lambda op: "/gpu:0"):
|
with ops.device(lambda op: "/device:GPU:0"):
|
||||||
var_2 = variables.Variable(2)
|
var_2 = variables.Variable(2)
|
||||||
with ops.device("/gpu:0"): # Implicit merging device function.
|
with ops.device("/device:GPU:0"): # Implicit merging device function.
|
||||||
var_3 = variables.Variable(3)
|
var_3 = variables.Variable(3)
|
||||||
|
|
||||||
self.assertDeviceEqual(var_0.device, None)
|
self.assertDeviceEqual(var_0.device, None)
|
||||||
|
@ -878,7 +878,7 @@ class ImportGraphDefTest(test.TestCase):
|
|||||||
self.assertEqual(c.device, c4.device) # worker overrides ps.
|
self.assertEqual(c.device, c4.device) # worker overrides ps.
|
||||||
|
|
||||||
with ops.Graph().as_default():
|
with ops.Graph().as_default():
|
||||||
with ops.device(device.merge_device("/gpu:0")):
|
with ops.device(device.merge_device("/device:GPU:0")):
|
||||||
a5, b5, c5 = importer.import_graph_def(
|
a5, b5, c5 = importer.import_graph_def(
|
||||||
gdef, return_elements=["a", "b", "c"])
|
gdef, return_elements=["a", "b", "c"])
|
||||||
self.assertEqual("/device:GPU:0", a5.device)
|
self.assertEqual("/device:GPU:0", a5.device)
|
||||||
|
@ -550,7 +550,7 @@ class ScopedMetaGraphTest(test.TestCase):
|
|||||||
a = variables.Variable(
|
a = variables.Variable(
|
||||||
constant_op.constant(
|
constant_op.constant(
|
||||||
1.0, shape=[2, 2]), name="a")
|
1.0, shape=[2, 2]), name="a")
|
||||||
with ops.device("/job:ps/replica:0/task:0/gpu:0"):
|
with ops.device("/job:ps/replica:0/task:0/device:GPU:0"):
|
||||||
b = variables.Variable(
|
b = variables.Variable(
|
||||||
constant_op.constant(
|
constant_op.constant(
|
||||||
2.0, shape=[2, 2]), name="b")
|
2.0, shape=[2, 2]), name="b")
|
||||||
|
@ -3342,7 +3342,7 @@ class Graph(object):
|
|||||||
For example:
|
For example:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
with g.device('/gpu:0'):
|
with g.device('/device:GPU:0'):
|
||||||
# All operations constructed in this context will be placed
|
# All operations constructed in this context will be placed
|
||||||
# on GPU 0.
|
# on GPU 0.
|
||||||
with g.device(None):
|
with g.device(None):
|
||||||
@ -3352,7 +3352,7 @@ class Graph(object):
|
|||||||
# Defines a function from `Operation` to device string.
|
# Defines a function from `Operation` to device string.
|
||||||
def matmul_on_gpu(n):
|
def matmul_on_gpu(n):
|
||||||
if n.type == "MatMul":
|
if n.type == "MatMul":
|
||||||
return "/gpu:0"
|
return "/device:GPU:0"
|
||||||
else:
|
else:
|
||||||
return "/cpu:0"
|
return "/cpu:0"
|
||||||
|
|
||||||
|
@ -1555,26 +1555,26 @@ class ColocationGroupTest(test_util.TensorFlowTestCase):
|
|||||||
|
|
||||||
def testColocationDeviceInteraction(self):
|
def testColocationDeviceInteraction(self):
|
||||||
with ops.device("/cpu:0"):
|
with ops.device("/cpu:0"):
|
||||||
with ops.device("/gpu:0"):
|
with ops.device("/device:GPU:0"):
|
||||||
a = constant_op.constant([2.0], name="a")
|
a = constant_op.constant([2.0], name="a")
|
||||||
with ops.colocate_with(a.op):
|
with ops.colocate_with(a.op):
|
||||||
# 'b' is created in the scope of /cpu:0, but it is
|
# 'b' is created in the scope of /cpu:0, but it is
|
||||||
# colocated with 'a', which is on '/gpu:0'. colocate_with
|
# colocated with 'a', which is on '/device:GPU:0'. colocate_with
|
||||||
# overrides devices because it is a stronger constraint.
|
# overrides devices because it is a stronger constraint.
|
||||||
b = constant_op.constant(3.0)
|
b = constant_op.constant(3.0)
|
||||||
self.assertEqual([b"loc:@a"], b.op.colocation_groups())
|
self.assertEqual([b"loc:@a"], b.op.colocation_groups())
|
||||||
self.assertEqual(a.op.device, b.op.device)
|
self.assertEqual(a.op.device, b.op.device)
|
||||||
|
|
||||||
def testColocationCanonicalization(self):
|
def testColocationCanonicalization(self):
|
||||||
with ops.device("/gpu:0"):
|
with ops.device("/device:GPU:0"):
|
||||||
_ = constant_op.constant(2.0)
|
_ = constant_op.constant(2.0)
|
||||||
with ops.device(lambda op: "/gpu:0"):
|
with ops.device(lambda op: "/device:GPU:0"):
|
||||||
b = constant_op.constant(3.0)
|
b = constant_op.constant(3.0)
|
||||||
with ops.get_default_graph().colocate_with(b):
|
with ops.get_default_graph().colocate_with(b):
|
||||||
with ops.device("/gpu:0"):
|
with ops.device("/device:GPU:0"):
|
||||||
c = constant_op.constant(4.0)
|
c = constant_op.constant(4.0)
|
||||||
|
|
||||||
# A's device will be /gpu:0
|
# A's device will be /device:GPU:0
|
||||||
# B's device will be /device:GPU:0
|
# B's device will be /device:GPU:0
|
||||||
# C's device will be /device:GPU:0 because it
|
# C's device will be /device:GPU:0 because it
|
||||||
# inherits B's device name, after canonicalizing the names.
|
# inherits B's device name, after canonicalizing the names.
|
||||||
@ -1582,10 +1582,10 @@ class ColocationGroupTest(test_util.TensorFlowTestCase):
|
|||||||
|
|
||||||
def testLocationOverrides(self):
|
def testLocationOverrides(self):
|
||||||
with ops.device("/cpu:0"):
|
with ops.device("/cpu:0"):
|
||||||
with ops.device("/gpu:0"):
|
with ops.device("/device:GPU:0"):
|
||||||
a = constant_op.constant([2.0], name="a")
|
a = constant_op.constant([2.0], name="a")
|
||||||
# Note that this colocation is "redundant", since we are
|
# Note that this colocation is "redundant", since we are
|
||||||
# within the scope of "/gpu:0". However, we would like to
|
# within the scope of "/device:GPU:0". However, we would like to
|
||||||
# preserve in the GraphDef that these two ops should be
|
# preserve in the GraphDef that these two ops should be
|
||||||
# colocated in a portable way.
|
# colocated in a portable way.
|
||||||
with ops.colocate_with(a.op):
|
with ops.colocate_with(a.op):
|
||||||
@ -1652,7 +1652,7 @@ class ColocationGroupTest(test_util.TensorFlowTestCase):
|
|||||||
self.assertEqual([b"loc:@a"], b.op.colocation_groups())
|
self.assertEqual([b"loc:@a"], b.op.colocation_groups())
|
||||||
|
|
||||||
def testInconsistentDeviceWithinColocate(self):
|
def testInconsistentDeviceWithinColocate(self):
|
||||||
with ops.device("/gpu:0"):
|
with ops.device("/device:GPU:0"):
|
||||||
a = constant_op.constant([2.0], name="a")
|
a = constant_op.constant([2.0], name="a")
|
||||||
with ops.colocate_with(a.op):
|
with ops.colocate_with(a.op):
|
||||||
# This is allowed due to legacy but clearly wrong, since we
|
# This is allowed due to legacy but clearly wrong, since we
|
||||||
|
@ -405,7 +405,7 @@ class TensorFlowTestCase(googletest.TestCase):
|
|||||||
trigger the creation of a new session.
|
trigger the creation of a new session.
|
||||||
|
|
||||||
Use the `use_gpu` and `force_gpu` options to control where ops are run. If
|
Use the `use_gpu` and `force_gpu` options to control where ops are run. If
|
||||||
`force_gpu` is True, all ops are pinned to `/gpu:0`. Otherwise, if `use_gpu`
|
`force_gpu` is True, all ops are pinned to `/device:GPU:0`. Otherwise, if `use_gpu`
|
||||||
is True, TensorFlow tries to run as many ops on the GPU as possible. If both
|
is True, TensorFlow tries to run as many ops on the GPU as possible. If both
|
||||||
`force_gpu and `use_gpu` are False, all ops are pinned to the CPU.
|
`force_gpu and `use_gpu` are False, all ops are pinned to the CPU.
|
||||||
|
|
||||||
@ -427,7 +427,7 @@ class TensorFlowTestCase(googletest.TestCase):
|
|||||||
config: An optional config_pb2.ConfigProto to use to configure the
|
config: An optional config_pb2.ConfigProto to use to configure the
|
||||||
session.
|
session.
|
||||||
use_gpu: If True, attempt to run as many ops as possible on GPU.
|
use_gpu: If True, attempt to run as many ops as possible on GPU.
|
||||||
force_gpu: If True, pin all ops to `/gpu:0`.
|
force_gpu: If True, pin all ops to `/device:GPU:0`.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A Session object that should be used as a context manager to surround
|
A Session object that should be used as a context manager to surround
|
||||||
@ -466,11 +466,11 @@ class TensorFlowTestCase(googletest.TestCase):
|
|||||||
sess = self._cached_session
|
sess = self._cached_session
|
||||||
with sess.graph.as_default(), sess.as_default():
|
with sess.graph.as_default(), sess.as_default():
|
||||||
if force_gpu:
|
if force_gpu:
|
||||||
# Use the name of an actual device if one is detected, or '/gpu:0'
|
# Use the name of an actual device if one is detected, or '/device:GPU:0'
|
||||||
# otherwise
|
# otherwise
|
||||||
gpu_name = gpu_device_name()
|
gpu_name = gpu_device_name()
|
||||||
if not gpu_name:
|
if not gpu_name:
|
||||||
gpu_name = "/gpu:0"
|
gpu_name = "/device:GPU:0"
|
||||||
with sess.graph.device(gpu_name):
|
with sess.graph.device(gpu_name):
|
||||||
yield sess
|
yield sess
|
||||||
elif use_gpu:
|
elif use_gpu:
|
||||||
@ -481,11 +481,11 @@ class TensorFlowTestCase(googletest.TestCase):
|
|||||||
else:
|
else:
|
||||||
with session.Session(graph=graph, config=prepare_config(config)) as sess:
|
with session.Session(graph=graph, config=prepare_config(config)) as sess:
|
||||||
if force_gpu:
|
if force_gpu:
|
||||||
# Use the name of an actual device if one is detected, or '/gpu:0'
|
# Use the name of an actual device if one is detected, or '/device:GPU:0'
|
||||||
# otherwise
|
# otherwise
|
||||||
gpu_name = gpu_device_name()
|
gpu_name = gpu_device_name()
|
||||||
if not gpu_name:
|
if not gpu_name:
|
||||||
gpu_name = "/gpu:0"
|
gpu_name = "/device:GPU:0"
|
||||||
with sess.graph.device(gpu_name):
|
with sess.graph.device(gpu_name):
|
||||||
yield sess
|
yield sess
|
||||||
elif use_gpu:
|
elif use_gpu:
|
||||||
|
@ -238,7 +238,7 @@ class GpuMultiSessionMemoryTest(test_util.TensorFlowTestCase):
|
|||||||
n_iterations = 500
|
n_iterations = 500
|
||||||
with session as s:
|
with session as s:
|
||||||
data = variables.Variable(1.0)
|
data = variables.Variable(1.0)
|
||||||
with ops.device('/gpu:0'):
|
with ops.device('/device:GPU:0'):
|
||||||
random_seed.set_random_seed(1)
|
random_seed.set_random_seed(1)
|
||||||
matrix1 = variables.Variable(
|
matrix1 = variables.Variable(
|
||||||
random_ops.truncated_normal([1024, 1]), name='matrix1')
|
random_ops.truncated_normal([1024, 1]), name='matrix1')
|
||||||
|
@ -311,7 +311,7 @@ class CholeskyBenchmark(test.Benchmark):
|
|||||||
if test.is_gpu_available(True):
|
if test.is_gpu_available(True):
|
||||||
with ops.Graph().as_default(), \
|
with ops.Graph().as_default(), \
|
||||||
session.Session() as sess, \
|
session.Session() as sess, \
|
||||||
ops.device("/gpu:0"):
|
ops.device("/device:GPU:0"):
|
||||||
l = linalg_ops.cholesky(data)
|
l = linalg_ops.cholesky(data)
|
||||||
self.run_op_benchmark(
|
self.run_op_benchmark(
|
||||||
sess,
|
sess,
|
||||||
@ -338,11 +338,11 @@ class CholeskyBenchmark(test.Benchmark):
|
|||||||
|
|
||||||
if test.is_gpu_available(True):
|
if test.is_gpu_available(True):
|
||||||
_BenchmarkGrad(
|
_BenchmarkGrad(
|
||||||
MatrixInverseCompositeGrad, "composite_matrix_inverse", "/gpu:0")
|
MatrixInverseCompositeGrad, "composite_matrix_inverse", "/device:GPU:0")
|
||||||
_BenchmarkGrad(
|
_BenchmarkGrad(
|
||||||
TriAngInvCompositeGrad, "composite_tri_ang_inverse", "/gpu:0")
|
TriAngInvCompositeGrad, "composite_tri_ang_inverse", "/device:GPU:0")
|
||||||
_BenchmarkGrad(
|
_BenchmarkGrad(
|
||||||
TriAngSolveCompositeGrad, "composite_triangular_solve", "/gpu:0")
|
TriAngSolveCompositeGrad, "composite_triangular_solve", "/device:GPU:0")
|
||||||
|
|
||||||
_BenchmarkGrad(
|
_BenchmarkGrad(
|
||||||
MatrixInverseCompositeGrad, "composite_matrix_inverse", "/cpu:0")
|
MatrixInverseCompositeGrad, "composite_matrix_inverse", "/cpu:0")
|
||||||
|
@ -1423,9 +1423,8 @@ class ControlFlowTest(test.TestCase):
|
|||||||
self.assertEqual(45, rx.eval())
|
self.assertEqual(45, rx.eval())
|
||||||
|
|
||||||
def _testWhileGrad_ColocateGradients(self, colocate):
|
def _testWhileGrad_ColocateGradients(self, colocate):
|
||||||
gpu_dev_name = test.gpu_device_name().lower() if test.is_gpu_available(
|
gpu_dev_name = test.gpu_device_name() if test.is_gpu_available(
|
||||||
) else "/gpu:0"
|
) else "/device:GPU:0"
|
||||||
gpu_short_name = gpu_dev_name.split("/")[-1]
|
|
||||||
|
|
||||||
with self.test_session(graph=ops.Graph()) as sess:
|
with self.test_session(graph=ops.Graph()) as sess:
|
||||||
v = constant_op.constant(2.0, name="v")
|
v = constant_op.constant(2.0, name="v")
|
||||||
@ -1439,19 +1438,19 @@ class ControlFlowTest(test.TestCase):
|
|||||||
r = gradients_impl.gradients(
|
r = gradients_impl.gradients(
|
||||||
loop, v, colocate_gradients_with_ops=colocate)[0]
|
loop, v, colocate_gradients_with_ops=colocate)[0]
|
||||||
r_ops = r.graph.get_operations()
|
r_ops = r.graph.get_operations()
|
||||||
r_devices = [(op.name, op.device.lower()) for op in r_ops]
|
r_devices = [(op.name, op.device) for op in r_ops]
|
||||||
|
|
||||||
self.assertTrue(any("Square" in op.name for op in r_ops))
|
self.assertTrue(any("Square" in op.name for op in r_ops))
|
||||||
|
|
||||||
for (name, dev) in r_devices:
|
for (name, dev) in r_devices:
|
||||||
if not colocate and name.endswith("Square"):
|
if not colocate and name.endswith("Square"):
|
||||||
# Only forward graph contain gpu in Square device
|
# Only forward graph contain gpu in Square device
|
||||||
self.assertTrue(gpu_short_name in dev)
|
self.assertTrue(gpu_dev_name in dev)
|
||||||
elif colocate and "Square" in name:
|
elif colocate and "Square" in name:
|
||||||
# Forward and backward graphs contain gpu in Square/Square_grad devices
|
# Forward and backward graphs contain gpu in Square/Square_grad devices
|
||||||
self.assertTrue(gpu_short_name in dev)
|
self.assertTrue(gpu_dev_name in dev)
|
||||||
else:
|
else:
|
||||||
self.assertFalse(gpu_short_name in dev)
|
self.assertFalse(gpu_dev_name in dev)
|
||||||
self.assertAllClose(1024.0, sess.run(r))
|
self.assertAllClose(1024.0, sess.run(r))
|
||||||
|
|
||||||
def testWhileGrad_ColocateGradients(self):
|
def testWhileGrad_ColocateGradients(self):
|
||||||
@ -2426,7 +2425,7 @@ class ControlFlowTest(test.TestCase):
|
|||||||
|
|
||||||
# device set on tensor, default device on graph => default device on dep.
|
# device set on tensor, default device on graph => default device on dep.
|
||||||
vdef = variables.Variable([0.0], name="vdef")
|
vdef = variables.Variable([0.0], name="vdef")
|
||||||
with ops.device("/job:worker/gpu:1"):
|
with ops.device("/job:worker/device:GPU:1"):
|
||||||
with_vdef_dep = control_flow_ops.with_dependencies([vdef.initializer],
|
with_vdef_dep = control_flow_ops.with_dependencies([vdef.initializer],
|
||||||
vdef)
|
vdef)
|
||||||
# The device is empty, but the colocation constraint is set.
|
# The device is empty, but the colocation constraint is set.
|
||||||
|
@ -347,7 +347,7 @@ def sparse_tensor_dense_vs_dense_matmul_benchmark(thresh,
|
|||||||
ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_dense(
|
ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_dense(
|
||||||
x_t, y_t, adjoint_a, adjoint_b)
|
x_t, y_t, adjoint_a, adjoint_b)
|
||||||
else:
|
else:
|
||||||
with ops.device("/gpu:0"):
|
with ops.device("/device:GPU:0"):
|
||||||
x_t = constant_op.constant(x)
|
x_t = constant_op.constant(x)
|
||||||
y_t = constant_op.constant(y)
|
y_t = constant_op.constant(y)
|
||||||
ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_dense(
|
ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_dense(
|
||||||
@ -365,7 +365,7 @@ def sparse_tensor_dense_vs_dense_matmul_benchmark(thresh,
|
|||||||
ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_sparse(
|
ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_sparse(
|
||||||
x_ind, x_val, x_shape, y_t, adjoint_a, adjoint_b)
|
x_ind, x_val, x_shape, y_t, adjoint_a, adjoint_b)
|
||||||
else:
|
else:
|
||||||
with ops.device("/gpu:0"):
|
with ops.device("/device:GPU:0"):
|
||||||
x_ind = constant_op.constant(np.vstack(np.where(x)).astype(np.int64).T)
|
x_ind = constant_op.constant(np.vstack(np.where(x)).astype(np.int64).T)
|
||||||
x_val = constant_op.constant(x[np.where(x)])
|
x_val = constant_op.constant(x[np.where(x)])
|
||||||
x_shape = constant_op.constant(np.array(x.shape).astype(np.int64))
|
x_shape = constant_op.constant(np.array(x.shape).astype(np.int64))
|
||||||
|
@ -722,7 +722,7 @@ class VariableScopeTest(test.TestCase):
|
|||||||
def device_func(op):
|
def device_func(op):
|
||||||
if op.type in ["Variable", "VariableV2", "VarHandleOp"]:
|
if op.type in ["Variable", "VariableV2", "VarHandleOp"]:
|
||||||
varname_type.append((op.name, op.get_attr("dtype")))
|
varname_type.append((op.name, op.get_attr("dtype")))
|
||||||
return "/gpu:0"
|
return "/device:GPU:0"
|
||||||
|
|
||||||
with g.as_default():
|
with g.as_default():
|
||||||
with ops.device(device_func):
|
with ops.device(device_func):
|
||||||
|
@ -163,20 +163,20 @@ class GradientsTest(test_util.TensorFlowTestCase):
|
|||||||
with ops.Graph().as_default() as g:
|
with ops.Graph().as_default() as g:
|
||||||
w = constant(1.0, shape=[1, 1])
|
w = constant(1.0, shape=[1, 1])
|
||||||
x = constant(1.0, shape=[1, 2])
|
x = constant(1.0, shape=[1, 2])
|
||||||
with g.device("/gpu:0"):
|
with g.device("/device:GPU:0"):
|
||||||
wx = math_ops.matmul(w, x)
|
wx = math_ops.matmul(w, x)
|
||||||
gw = gradients.gradients(wx, [w], colocate_gradients_with_ops=True)[0]
|
gw = gradients.gradients(wx, [w], colocate_gradients_with_ops=True)[0]
|
||||||
self.assertEqual(gw.op.colocation_groups(), wx.op.colocation_groups())
|
self.assertEqual(gw.op.colocation_groups(), wx.op.colocation_groups())
|
||||||
|
|
||||||
def testColocateGradientsWithAggregation(self):
|
def testColocateGradientsWithAggregation(self):
|
||||||
with ops.Graph().as_default() as g:
|
with ops.Graph().as_default() as g:
|
||||||
with g.device("/gpu:1"):
|
with g.device("/device:GPU:1"):
|
||||||
w = constant(1.0, shape=[1, 1])
|
w = constant(1.0, shape=[1, 1])
|
||||||
x = constant(1.0, shape=[1, 2])
|
x = constant(1.0, shape=[1, 2])
|
||||||
y = constant(1.0, shape=[1, 2])
|
y = constant(1.0, shape=[1, 2])
|
||||||
wx = math_ops.matmul(w, x)
|
wx = math_ops.matmul(w, x)
|
||||||
wy = math_ops.matmul(w, y)
|
wy = math_ops.matmul(w, y)
|
||||||
with g.device("/gpu:0"):
|
with g.device("/device:GPU:0"):
|
||||||
z = wx + wy
|
z = wx + wy
|
||||||
|
|
||||||
gw1 = gradients.gradients(z, [w], colocate_gradients_with_ops=True)[0]
|
gw1 = gradients.gradients(z, [w], colocate_gradients_with_ops=True)[0]
|
||||||
@ -187,7 +187,7 @@ class GradientsTest(test_util.TensorFlowTestCase):
|
|||||||
|
|
||||||
def testColocateGradientsWithAggregationInMultipleDevices(self):
|
def testColocateGradientsWithAggregationInMultipleDevices(self):
|
||||||
with ops.Graph().as_default() as g:
|
with ops.Graph().as_default() as g:
|
||||||
with g.device("/gpu:1"):
|
with g.device("/device:GPU:1"):
|
||||||
w = constant(1.0, shape=[1, 1])
|
w = constant(1.0, shape=[1, 1])
|
||||||
x = constant(1.0, shape=[1, 2])
|
x = constant(1.0, shape=[1, 2])
|
||||||
y = constant(1.0, shape=[1, 2])
|
y = constant(1.0, shape=[1, 2])
|
||||||
@ -195,7 +195,7 @@ class GradientsTest(test_util.TensorFlowTestCase):
|
|||||||
wx = math_ops.matmul(w, x)
|
wx = math_ops.matmul(w, x)
|
||||||
with g.device("/task:2"):
|
with g.device("/task:2"):
|
||||||
wy = math_ops.matmul(w, y)
|
wy = math_ops.matmul(w, y)
|
||||||
with g.device("/gpu:0"):
|
with g.device("/device:GPU:0"):
|
||||||
z = wx + wy
|
z = wx + wy
|
||||||
|
|
||||||
gw1 = gradients.gradients(z, [w], colocate_gradients_with_ops=True)[0]
|
gw1 = gradients.gradients(z, [w], colocate_gradients_with_ops=True)[0]
|
||||||
|
@ -47,7 +47,7 @@ def build_graph(device, n, m, k, transpose_a, transpose_b, dtype):
|
|||||||
Returns:
|
Returns:
|
||||||
A matmul operation to run()
|
A matmul operation to run()
|
||||||
"""
|
"""
|
||||||
with ops.device('/%s:0' % device):
|
with ops.device('%s' % device):
|
||||||
if not transpose_a:
|
if not transpose_a:
|
||||||
x = variables.Variable(random_ops.random_uniform([n, m], dtype=dtype))
|
x = variables.Variable(random_ops.random_uniform([n, m], dtype=dtype))
|
||||||
else:
|
else:
|
||||||
@ -112,7 +112,7 @@ class MatmulBenchmark(test.Benchmark):
|
|||||||
return duration
|
return duration
|
||||||
|
|
||||||
def run_test_gpu(self, n, m, k, transpose_a, transpose_b, dtype, num_iters):
|
def run_test_gpu(self, n, m, k, transpose_a, transpose_b, dtype, num_iters):
|
||||||
self.run_graph('gpu', n, m, k, transpose_a, transpose_b, num_iters, dtype)
|
self.run_graph(test.gpu_device_name(), n, m, k, transpose_a, transpose_b, num_iters, dtype)
|
||||||
|
|
||||||
def test_round(self, num_iters):
|
def test_round(self, num_iters):
|
||||||
dtypes = [np.float32, np.float64]
|
dtypes = [np.float32, np.float64]
|
||||||
|
@ -71,37 +71,39 @@ class MatmulBenchmarkTest(googletest.TestCase):
|
|||||||
def _VerifyBuildGraph(self, n, m, k, transpose_a, transpose_b, dtype):
|
def _VerifyBuildGraph(self, n, m, k, transpose_a, transpose_b, dtype):
|
||||||
graph = ops.Graph()
|
graph = ops.Graph()
|
||||||
with graph.as_default():
|
with graph.as_default():
|
||||||
matmul_benchmark.build_graph("gpu", n, m, k, transpose_a, transpose_b,
|
matmul_benchmark.build_graph(googletest.gpu_device_name(), n, m, k, transpose_a, transpose_b,
|
||||||
dtype)
|
dtype)
|
||||||
gd = graph.as_graph_def()
|
gd = graph.as_graph_def()
|
||||||
self.assertProtoEquals("""
|
dev=googletest.gpu_device_name()
|
||||||
node { name: "random_uniform/shape" op: "Const" device: "/device:GPU:0" }
|
proto_expected = """
|
||||||
node { name: "random_uniform/min" op: "Const" device: "/device:GPU:0" }
|
node { name: "random_uniform/shape" op: "Const" device: \""""+ dev +"""\" }
|
||||||
node { name: "random_uniform/max" op: "Const" device: "/device:GPU:0" }
|
node { name: "random_uniform/min" op: "Const" device: \""""+ dev +"""\" }
|
||||||
node { name: "random_uniform/RandomUniform" op: "RandomUniform" input: "random_uniform/shape" device: "/device:GPU:0" }
|
node { name: "random_uniform/max" op: "Const" device: \""""+ dev +"""\" }
|
||||||
node { name: "random_uniform/sub" op: "Sub" input: "random_uniform/max" input: "random_uniform/min" device: "/device:GPU:0" }
|
node { name: "random_uniform/RandomUniform" op: "RandomUniform" input: "random_uniform/shape" device: \""""+ dev +"""\" }
|
||||||
node { name: "random_uniform/mul" op: "Mul" input: "random_uniform/RandomUniform" input: "random_uniform/sub" device: "/device:GPU:0" }
|
node { name: "random_uniform/sub" op: "Sub" input: "random_uniform/max" input: "random_uniform/min" device: \""""+ dev +"""\" }
|
||||||
node { name: "random_uniform" op: "Add" input: "random_uniform/mul" input: "random_uniform/min" device: "/device:GPU:0" }
|
node { name: "random_uniform/mul" op: "Mul" input: "random_uniform/RandomUniform" input: "random_uniform/sub" device: \""""+ dev +"""\" }
|
||||||
node { name: "Variable" op: "VariableV2" device: "/device:GPU:0" }
|
node { name: "random_uniform" op: "Add" input: "random_uniform/mul" input: "random_uniform/min" device: \""""+ dev +"""\" }
|
||||||
node { name: "Variable/Assign" op: "Assign" input: "Variable" input: "random_uniform" device: "/device:GPU:0" }
|
node { name: "Variable" op: "VariableV2" device: \""""+ dev +"""\" }
|
||||||
node { name: "Variable/read" op: "Identity" input: "Variable" device: "/device:GPU:0" }
|
node { name: "Variable/Assign" op: "Assign" input: "Variable" input: "random_uniform" device: \""""+ dev +"""\" }
|
||||||
node { name: "random_uniform_1/shape" op: "Const" device: "/device:GPU:0" }
|
node { name: "Variable/read" op: "Identity" input: "Variable" device: \""""+ dev +"""\" }
|
||||||
node { name: "random_uniform_1/min" op: "Const" device: "/device:GPU:0" }
|
node { name: "random_uniform_1/shape" op: "Const" device: \""""+ dev +"""\" }
|
||||||
node { name: "random_uniform_1/max" op: "Const" device: "/device:GPU:0" }
|
node { name: "random_uniform_1/min" op: "Const" device: \""""+ dev +"""\" }
|
||||||
node { name: "random_uniform_1/RandomUniform" op: "RandomUniform" input: "random_uniform_1/shape" device: "/device:GPU:0" }
|
node { name: "random_uniform_1/max" op: "Const" device: \""""+ dev +"""\" }
|
||||||
node { name: "random_uniform_1/sub" op: "Sub" input: "random_uniform_1/max" input: "random_uniform_1/min" device: "/device:GPU:0" }
|
node { name: "random_uniform_1/RandomUniform" op: "RandomUniform" input: "random_uniform_1/shape" device: \""""+ dev +"""\" }
|
||||||
node { name: "random_uniform_1/mul" op: "Mul" input: "random_uniform_1/RandomUniform" input: "random_uniform_1/sub" device: "/device:GPU:0" }
|
node { name: "random_uniform_1/sub" op: "Sub" input: "random_uniform_1/max" input: "random_uniform_1/min" device: \""""+ dev +"""\" }
|
||||||
node { name: "random_uniform_1" op: "Add" input: "random_uniform_1/mul" input: "random_uniform_1/min" device: "/device:GPU:0" }
|
node { name: "random_uniform_1/mul" op: "Mul" input: "random_uniform_1/RandomUniform" input: "random_uniform_1/sub" device: \""""+ dev +"""\" }
|
||||||
node { name: "Variable_1" op: "VariableV2" device: "/device:GPU:0" }
|
node { name: "random_uniform_1" op: "Add" input: "random_uniform_1/mul" input: "random_uniform_1/min" device: \""""+ dev +"""\" }
|
||||||
node { name: "Variable_1/Assign" op: "Assign" input: "Variable_1" input: "random_uniform_1" device: "/device:GPU:0" }
|
node { name: "Variable_1" op: "VariableV2" device: \""""+ dev +"""\" }
|
||||||
node { name: "Variable_1/read" op: "Identity" input: "Variable_1" device: "/device:GPU:0" }
|
node { name: "Variable_1/Assign" op: "Assign" input: "Variable_1" input: "random_uniform_1" device: \""""+ dev +"""\" }
|
||||||
node { name: "MatMul" op: "MatMul" input: "Variable/read" input: "Variable_1/read" device: "/device:GPU:0" }
|
node { name: "Variable_1/read" op: "Identity" input: "Variable_1" device: \""""+ dev +"""\" }
|
||||||
node { name: "group_deps" op: "NoOp" input: "^MatMul" device: "/device:GPU:0" }
|
node { name: "MatMul" op: "MatMul" input: "Variable/read" input: "Variable_1/read" device: \""""+ dev +"""\" }
|
||||||
""", self._StripGraph(gd))
|
node { name: "group_deps" op: "NoOp" input: "^MatMul" device: \""""+ dev +"""\" }
|
||||||
|
"""
|
||||||
|
self.assertProtoEquals(str(proto_expected), self._StripGraph(gd))
|
||||||
|
|
||||||
def _VerifyRunGraph(self, n, m, k, transpose_a, transpose_b, dtype):
|
def _VerifyRunGraph(self, n, m, k, transpose_a, transpose_b, dtype):
|
||||||
benchmark_instance = matmul_benchmark.MatmulBenchmark()
|
benchmark_instance = matmul_benchmark.MatmulBenchmark()
|
||||||
duration = benchmark_instance.run_graph("gpu", n, m, k, transpose_a,
|
duration = benchmark_instance.run_graph(googletest.gpu_device_name(), n, m, k, transpose_a,
|
||||||
transpose_b, 1, dtype)
|
transpose_b, 1, dtype)
|
||||||
self.assertTrue(duration > 1e-6)
|
self.assertTrue(duration > 1e-6)
|
||||||
|
|
||||||
|
@ -97,21 +97,22 @@ class RunMetadataTest(test.TestCase):
|
|||||||
if not test.is_gpu_available(cuda_only=True):
|
if not test.is_gpu_available(cuda_only=True):
|
||||||
return
|
return
|
||||||
|
|
||||||
|
gpu_dev = test.gpu_device_name()
|
||||||
ops.reset_default_graph()
|
ops.reset_default_graph()
|
||||||
with ops.device('/gpu:0'):
|
with ops.device(gpu_dev):
|
||||||
tfprof_node, run_meta = _run_model()
|
tfprof_node, run_meta = _run_model()
|
||||||
self.assertEqual(tfprof_node.children[0].name, 'MatMul')
|
self.assertEqual(tfprof_node.children[0].name, 'MatMul')
|
||||||
self.assertGreater(tfprof_node.children[0].exec_micros, 10)
|
self.assertGreater(tfprof_node.children[0].exec_micros, 10)
|
||||||
|
|
||||||
ret = _extract_node(run_meta, ['MatMul', 'MatMul:MatMul'])
|
ret = _extract_node(run_meta, ['MatMul', 'MatMul:MatMul'])
|
||||||
self.assertEqual(len(ret), 3)
|
self.assertEqual(len(ret), 3)
|
||||||
self.assertTrue('/job:localhost/replica:0/task:0/gpu:0' in ret)
|
self.assertTrue('/job:localhost/replica:0/task:0' + gpu_dev in ret)
|
||||||
del ret['/job:localhost/replica:0/task:0/gpu:0']
|
del ret['/job:localhost/replica:0/task:0' + gpu_dev]
|
||||||
|
|
||||||
has_all_stream = False
|
has_all_stream = False
|
||||||
for k, _ in six.iteritems(ret):
|
for k, _ in six.iteritems(ret):
|
||||||
self.assertTrue('gpu:0/stream' in k)
|
self.assertTrue(gpu_dev + '/stream' in k)
|
||||||
if 'gpu:0/stream:all' in k:
|
if gpu_dev + '/stream:all' in k:
|
||||||
has_all_stream = True
|
has_all_stream = True
|
||||||
self.assertTrue(has_all_stream)
|
self.assertTrue(has_all_stream)
|
||||||
|
|
||||||
@ -159,24 +160,24 @@ class RunMetadataTest(test.TestCase):
|
|||||||
return
|
return
|
||||||
|
|
||||||
ops.reset_default_graph()
|
ops.reset_default_graph()
|
||||||
with ops.device('/gpu:0'):
|
with ops.device('/device:GPU:0'):
|
||||||
tfprof_node, run_meta = _run_loop_model()
|
tfprof_node, run_meta = _run_loop_model()
|
||||||
# The while-loop caused a node to appear 4 times in scheduling.
|
# The while-loop caused a node to appear 4 times in scheduling.
|
||||||
ret = _extract_node(run_meta,
|
ret = _extract_node(run_meta,
|
||||||
'rnn/while/rnn/basic_rnn_cell/basic_rnn_cell/MatMul')
|
'rnn/while/rnn/basic_rnn_cell/basic_rnn_cell/MatMul')
|
||||||
self.assertEqual(len(ret['/job:localhost/replica:0/task:0/gpu:0']), 4)
|
self.assertEqual(len(ret['/job:localhost/replica:0/task:0/device:GPU:0']), 4)
|
||||||
|
|
||||||
total_cpu_execs = 0
|
total_cpu_execs = 0
|
||||||
for node in ret['/job:localhost/replica:0/task:0/gpu:0']:
|
for node in ret['/job:localhost/replica:0/task:0/device:GPU:0']:
|
||||||
total_cpu_execs += node.op_end_rel_micros
|
total_cpu_execs += node.op_end_rel_micros
|
||||||
|
|
||||||
ret = _extract_node(
|
ret = _extract_node(
|
||||||
run_meta,
|
run_meta,
|
||||||
'rnn/while/rnn/basic_rnn_cell/basic_rnn_cell/MatMul:MatMul')
|
'rnn/while/rnn/basic_rnn_cell/basic_rnn_cell/MatMul:MatMul')
|
||||||
self.assertGreaterEqual(len(ret['/gpu:0/stream:all']), 4)
|
self.assertGreaterEqual(len(ret['/device:GPU:0/stream:all']), 4)
|
||||||
|
|
||||||
total_accelerator_execs = 0
|
total_accelerator_execs = 0
|
||||||
for node in ret['/gpu:0/stream:all']:
|
for node in ret['/device:GPU:0/stream:all']:
|
||||||
total_accelerator_execs += node.op_end_rel_micros
|
total_accelerator_execs += node.op_end_rel_micros
|
||||||
|
|
||||||
mm_node = lib.SearchTFProfNode(
|
mm_node = lib.SearchTFProfNode(
|
||||||
|
@ -315,7 +315,7 @@ class ProfileOptionBuilder(object):
|
|||||||
"""Selectively counting statistics based on node types.
|
"""Selectively counting statistics based on node types.
|
||||||
|
|
||||||
Here, 'types' means the profiler nodes' properties. Profiler by default
|
Here, 'types' means the profiler nodes' properties. Profiler by default
|
||||||
consider device name (e.g. /job:xx/.../gpu:0) and operation type
|
consider device name (e.g. /job:xx/.../device:GPU:0) and operation type
|
||||||
(e.g. MatMul) as profiler nodes' properties. User can also associate
|
(e.g. MatMul) as profiler nodes' properties. User can also associate
|
||||||
customized 'types' to profiler nodes through OpLogProto proto.
|
customized 'types' to profiler nodes through OpLogProto proto.
|
||||||
|
|
||||||
|
@ -50,7 +50,7 @@ class RemoveDeviceTest : public ::testing::Test {
|
|||||||
add_node2->set_op("Add");
|
add_node2->set_op("Add");
|
||||||
add_node2->add_input("const_node1");
|
add_node2->add_input("const_node1");
|
||||||
add_node2->add_input("const_node2");
|
add_node2->add_input("const_node2");
|
||||||
add_node2->set_device("//gpu:1");
|
add_node2->set_device("//device:GPU:1");
|
||||||
|
|
||||||
NodeDef* add_node3 = graph_def.add_node();
|
NodeDef* add_node3 = graph_def.add_node();
|
||||||
add_node3->set_name("add_node3");
|
add_node3->set_name("add_node3");
|
||||||
|
Loading…
Reference in New Issue
Block a user