[OpenCL] Extends matmul_benchmark.py to cover SYCL (#11697)

* [OpenCL] Extends matmul_benchmark.py to cover SYCL

* Fixed typo

* /gpu:0 -> /device:GPU:0

* Fixes control_flow_ops_py_test

* /gpu: -> /device:GPU:

* Fixes //tensorflow/python/profiler/internal:run_metadata_test

* gpu: -> GPU:

* Fixes tfprof_node

* [OpenCL] Fixes device path to name with many colons (#123)

The device path is constructed from a device name by replacing all
colons with underscores. Some device names contain more than one colon,
for example 'device:SYCL:0' which gives a path 'device_SYCL_0'. The
previous code would not convert this back to the original device name,
but rather to 'device:SYCL_0'.

An alternative fix would be to convert all underscores to colons in the
device name (i.e. remove the restriction inside `replace("_", ":", 1)`),
however I'm not sure if there are any device names which contain
underscores.

* If no gpu device aviable fake one

* gpu: -> device:GPU

* Fixes profiler test

* /gpu:x -> /device:GPU:x

* Fixes debug_io_utils_test.cc test

* Fixes device_name_utils_test.cc
This commit is contained in:
Luke Iwanski 2017-08-11 01:35:21 +01:00 committed by Rasmus Munk Larsen
parent 35e7a36658
commit ab96f41fb4
69 changed files with 286 additions and 285 deletions

View File

@ -101,7 +101,7 @@ void ConcurrentSteps(const Options* opts, int session_index) {
std::unique_ptr<Session> session(NewSession(options)); std::unique_ptr<Session> session(NewSession(options));
GraphDef def = CreateGraphDef(); GraphDef def = CreateGraphDef();
if (options.target.empty()) { if (options.target.empty()) {
graph::SetDefaultDevice(opts->use_gpu ? "/gpu:0" : "/cpu:0", &def); graph::SetDefaultDevice(opts->use_gpu ? "/device:GPU:0" : "/cpu:0", &def);
} }
TF_CHECK_OK(session->Create(def)); TF_CHECK_OK(session->Create(def));

View File

@ -93,7 +93,7 @@ class CudnnRNNBenchmark(test.Benchmark):
batch_size = config["batch_size"] batch_size = config["batch_size"]
seq_length = config["seq_length"] seq_length = config["seq_length"]
with ops.Graph().as_default(), ops.device("/gpu:0"): with ops.Graph().as_default(), ops.device("/device:GPU:0"):
model = cudnn_rnn_ops.CudnnLSTM(num_layers, num_units, num_units) model = cudnn_rnn_ops.CudnnLSTM(num_layers, num_units, num_units)
params_size_t = model.params_size() params_size_t = model.params_size()
input_data = variables.Variable( input_data = variables.Variable(
@ -125,7 +125,7 @@ class CudnnRNNBenchmark(test.Benchmark):
batch_size = config["batch_size"] batch_size = config["batch_size"]
seq_length = config["seq_length"] seq_length = config["seq_length"]
with ops.Graph().as_default(), ops.device("/gpu:0"): with ops.Graph().as_default(), ops.device("/device:GPU:0"):
inputs = seq_length * [ inputs = seq_length * [
array_ops.zeros([batch_size, num_units], dtypes.float32) array_ops.zeros([batch_size, num_units], dtypes.float32)
] ]
@ -153,7 +153,7 @@ class CudnnRNNBenchmark(test.Benchmark):
batch_size = config["batch_size"] batch_size = config["batch_size"]
seq_length = config["seq_length"] seq_length = config["seq_length"]
with ops.Graph().as_default(), ops.device("/gpu:0"): with ops.Graph().as_default(), ops.device("/device:GPU:0"):
inputs = seq_length * [ inputs = seq_length * [
array_ops.zeros([batch_size, num_units], dtypes.float32) array_ops.zeros([batch_size, num_units], dtypes.float32)
] ]

View File

@ -634,7 +634,7 @@ class MixtureBenchmark(test.Benchmark):
np.random.seed(127) np.random.seed(127)
with session.Session(config=config, graph=ops.Graph()) as sess: with session.Session(config=config, graph=ops.Graph()) as sess:
random_seed.set_random_seed(0) random_seed.set_random_seed(0)
with ops.device("/gpu:0" if use_gpu else "/cpu:0"): with ops.device("/device:GPU:0" if use_gpu else "/cpu:0"):
mixture = create_distribution( mixture = create_distribution(
num_components=num_components, num_components=num_components,
batch_size=batch_size, batch_size=batch_size,

View File

@ -443,19 +443,19 @@ class VariablesTest(test.TestCase):
e = variables_lib2.variable('e', initializer=e_init) e = variables_lib2.variable('e', initializer=e_init)
# The values below highlight how the VariableDeviceChooser puts initial # The values below highlight how the VariableDeviceChooser puts initial
# values on the same device as the variable job. # values on the same device as the variable job.
self.assertDeviceEqual(a.device, '/gpu:0') self.assertDeviceEqual(a.device, '/device:GPU:0')
self.assertEqual(a.initial_value.op.colocation_groups(), self.assertEqual(a.initial_value.op.colocation_groups(),
a.op.colocation_groups()) a.op.colocation_groups())
self.assertDeviceEqual(b.device, '/gpu:0') self.assertDeviceEqual(b.device, '/device:GPU:0')
self.assertEqual(b.initial_value.op.colocation_groups(), self.assertEqual(b.initial_value.op.colocation_groups(),
b.op.colocation_groups()) b.op.colocation_groups())
self.assertDeviceEqual(c.device, '/cpu:12') self.assertDeviceEqual(c.device, '/cpu:12')
self.assertEqual(c.initial_value.op.colocation_groups(), self.assertEqual(c.initial_value.op.colocation_groups(),
c.op.colocation_groups()) c.op.colocation_groups())
self.assertDeviceEqual(d.device, '/gpu:0') self.assertDeviceEqual(d.device, '/device:GPU:0')
self.assertEqual(d.initial_value.op.colocation_groups(), self.assertEqual(d.initial_value.op.colocation_groups(),
d.op.colocation_groups()) d.op.colocation_groups())
self.assertDeviceEqual(e.device, '/gpu:0') self.assertDeviceEqual(e.device, '/device:GPU:0')
self.assertDeviceEqual(e.initial_value.device, '/cpu:99') self.assertDeviceEqual(e.initial_value.device, '/cpu:99')

View File

@ -43,7 +43,7 @@ class AllReduceTest(test.TestCase):
self._testSingleAllReduce(sess, dtype, nccl.all_max, np.maximum) self._testSingleAllReduce(sess, dtype, nccl.all_max, np.maximum)
def _testSingleAllReduce(self, sess, np_type, nccl_fn, numpy_accumulation_fn): def _testSingleAllReduce(self, sess, np_type, nccl_fn, numpy_accumulation_fn):
for devices in [['/gpu:0', '/gpu:0', '/gpu:0'], ['/gpu:0', '/gpu:0']]: for devices in [['/device:GPU:0', '/device:GPU:0', '/device:GPU:0'], ['/device:GPU:0', '/device:GPU:0']]:
shape = (3, 4) shape = (3, 4)
np_ans = None np_ans = None
tensors = [] tensors = []
@ -84,7 +84,7 @@ class BroadcastTest(test.TestCase):
# Create session inside outer loop to test use of # Create session inside outer loop to test use of
# same communicator across multiple sessions. # same communicator across multiple sessions.
with self.test_session(use_gpu=True) as sess: with self.test_session(use_gpu=True) as sess:
for devices in [['/gpu:0', '/gpu:0', '/gpu:0'], ['/gpu:0', '/gpu:0']]: for devices in [['/device:GPU:0', '/device:GPU:0', '/device:GPU:0'], ['/device:GPU:0', '/device:GPU:0']]:
shape = (3, 4) shape = (3, 4)
sender = np.random.randint(0, len(devices) - 1) sender = np.random.randint(0, len(devices) - 1)
with ops.device(devices[sender]): with ops.device(devices[sender]):
@ -115,7 +115,7 @@ class CombinedTest(test.TestCase):
# Create session inside outer loop to test use of # Create session inside outer loop to test use of
# same communicator across multiple sessions. # same communicator across multiple sessions.
with self.test_session(use_gpu=True) as sess: with self.test_session(use_gpu=True) as sess:
for devices in [['/gpu:0', '/gpu:0', '/gpu:0'], ['/gpu:0', '/gpu:0']]: for devices in [['/device:GPU:0', '/device:GPU:0', '/device:GPU:0'], ['/device:GPU:0', '/device:GPU:0']]:
shape = (3, 4) shape = (3, 4)
# all-reduce # all-reduce

View File

@ -446,12 +446,12 @@ class RNNCellTest(test.TestCase):
# Can't perform this test w/o a GPU # Can't perform this test w/o a GPU
return return
gpu_dev = test.gpu_device_name()
with self.test_session(use_gpu=True) as sess: with self.test_session(use_gpu=True) as sess:
with variable_scope.variable_scope( with variable_scope.variable_scope(
"root", initializer=init_ops.constant_initializer(0.5)): "root", initializer=init_ops.constant_initializer(0.5)):
x = array_ops.zeros([1, 1, 3]) x = array_ops.zeros([1, 1, 3])
cell = rnn_cell_impl.DeviceWrapper( cell = rnn_cell_impl.DeviceWrapper(rnn_cell_impl.GRUCell(3), gpu_dev)
rnn_cell_impl.GRUCell(3), test_util.gpu_device_name())
with ops.device("/cpu:0"): with ops.device("/cpu:0"):
outputs, _ = rnn.dynamic_rnn( outputs, _ = rnn.dynamic_rnn(
cell=cell, inputs=x, dtype=dtypes.float32) cell=cell, inputs=x, dtype=dtypes.float32)
@ -463,8 +463,7 @@ class RNNCellTest(test.TestCase):
_ = sess.run(outputs, options=opts, run_metadata=run_metadata) _ = sess.run(outputs, options=opts, run_metadata=run_metadata)
step_stats = run_metadata.step_stats step_stats = run_metadata.step_stats
ix = 0 if (("gpu" in step_stats.dev_stats[0].device) or ix = 0 if gpu_dev in step_stats.dev_stats[0].device else 1
("sycl" in step_stats.dev_stats[0].device)) else 1
gpu_stats = step_stats.dev_stats[ix].node_stats gpu_stats = step_stats.dev_stats[ix].node_stats
cpu_stats = step_stats.dev_stats[1 - ix].node_stats cpu_stats = step_stats.dev_stats[1 - ix].node_stats
self.assertFalse([s for s in cpu_stats if "gru_cell" in s.node_name]) self.assertFalse([s for s in cpu_stats if "gru_cell" in s.node_name])

View File

@ -42,7 +42,6 @@ from tensorflow.python.ops import variables as variables_lib
from tensorflow.python.platform import test from tensorflow.python.platform import test
from tensorflow.python.platform import tf_logging from tensorflow.python.platform import tf_logging
from tensorflow.python.util import nest from tensorflow.python.util import nest
from tensorflow.python.framework import test_util
class Plus1RNNCell(rnn_lib.RNNCell): class Plus1RNNCell(rnn_lib.RNNCell):
"""RNN Cell generating (output, new_state) = (input + 1, state + 1).""" """RNN Cell generating (output, new_state) = (input + 1, state + 1)."""
@ -2208,11 +2207,11 @@ class TensorArrayOnCorrectDeviceTest(test.TestCase):
if not test.is_gpu_available(): if not test.is_gpu_available():
return # Test requires access to a GPU return # Test requires access to a GPU
gpu_dev = test.gpu_device_name()
run_metadata = self._execute_rnn_on( run_metadata = self._execute_rnn_on(
rnn_device="/cpu:0", cell_device=test_util.gpu_device_name()) rnn_device="/cpu:0", cell_device=gpu_dev)
step_stats = run_metadata.step_stats step_stats = run_metadata.step_stats
ix = 0 if (("gpu" in step_stats.dev_stats[0].device) or ix = 0 if (gpu_dev in step_stats.dev_stats[0].device) else 1
("sycl" in step_stats.dev_stats[0].device)) else 1
gpu_stats = step_stats.dev_stats[ix].node_stats gpu_stats = step_stats.dev_stats[ix].node_stats
cpu_stats = step_stats.dev_stats[1 - ix].node_stats cpu_stats = step_stats.dev_stats[1 - ix].node_stats
@ -2233,12 +2232,12 @@ class TensorArrayOnCorrectDeviceTest(test.TestCase):
if not test.is_gpu_available(): if not test.is_gpu_available():
return # Test requires access to a GPU return # Test requires access to a GPU
gpu_dev = test.gpu_device_name()
run_metadata = self._execute_rnn_on( run_metadata = self._execute_rnn_on(
rnn_device="/cpu:0", cell_device="/cpu:0", rnn_device="/cpu:0", cell_device="/cpu:0",
input_device=test_util.gpu_device_name()) input_device=gpu_dev)
step_stats = run_metadata.step_stats step_stats = run_metadata.step_stats
ix = 0 if (("gpu" in step_stats.dev_stats[0].device) or ix = 0 if (gpu_dev in step_stats.dev_stats[0].device) else 1
("sycl" in step_stats.dev_stats[0].device)) else 1
gpu_stats = step_stats.dev_stats[ix].node_stats gpu_stats = step_stats.dev_stats[ix].node_stats
cpu_stats = step_stats.dev_stats[1 - ix].node_stats cpu_stats = step_stats.dev_stats[1 - ix].node_stats
@ -2253,11 +2252,11 @@ class TensorArrayOnCorrectDeviceTest(test.TestCase):
if not test.is_gpu_available(): if not test.is_gpu_available():
return # Test requires access to a GPU return # Test requires access to a GPU
gpu_dev = test.gpu_device_name()
run_metadata = self._execute_rnn_on( run_metadata = self._execute_rnn_on(
input_device=test_util.gpu_device_name()) input_device=gpu_dev)
step_stats = run_metadata.step_stats step_stats = run_metadata.step_stats
ix = 0 if (("gpu" in step_stats.dev_stats[0].device) or ix = 0 if (gpu_dev in step_stats.dev_stats[0].device) else 1
("sycl" in step_stats.dev_stats[0].device)) else 1
gpu_stats = step_stats.dev_stats[ix].node_stats gpu_stats = step_stats.dev_stats[ix].node_stats
cpu_stats = step_stats.dev_stats[1 - ix].node_stats cpu_stats = step_stats.dev_stats[1 - ix].node_stats

View File

@ -357,7 +357,7 @@ def training_gru_block_vs_gru_cell(batch_size,
ops.reset_default_graph() ops.reset_default_graph()
with session.Session(graph=ops.Graph()) as sess: with session.Session(graph=ops.Graph()) as sess:
# Specify the device which is been used. # Specify the device which is been used.
with ops.device("/cpu:0" if not use_gpu else "/gpu:0"): with ops.device("/cpu:0" if not use_gpu else "/device:GPU:0"):
# Random initializers. # Random initializers.
seed = 1994 seed = 1994
@ -429,7 +429,7 @@ def inference_gru_block_vs_gru_cell(batch_size,
"""Benchmark inference speed between GRUBlockCell vs GRUCell.""" """Benchmark inference speed between GRUBlockCell vs GRUCell."""
ops.reset_default_graph() ops.reset_default_graph()
with session.Session(graph=ops.Graph()) as sess: with session.Session(graph=ops.Graph()) as sess:
with ops.device("/cpu:0" if not use_gpu else "/gpu:0"): with ops.device("/cpu:0" if not use_gpu else "/device:GPU:0"):
# Random initializers. # Random initializers.
seed = 1994 seed = 1994
@ -484,7 +484,7 @@ def single_bprop_step_gru_block_vs_gru_cell(batch_size,
"""Benchmark single bprop step speed between GRUBlockCell vs GRUCell.""" """Benchmark single bprop step speed between GRUBlockCell vs GRUCell."""
ops.reset_default_graph() ops.reset_default_graph()
with session.Session(graph=ops.Graph()) as sess: with session.Session(graph=ops.Graph()) as sess:
with ops.device("/cpu:0" if not use_gpu else "/gpu:0"): with ops.device("/cpu:0" if not use_gpu else "/device:GPU:0"):
initializer = init_ops.random_uniform_initializer(-1, 1, seed=1989) initializer = init_ops.random_uniform_initializer(-1, 1, seed=1989)
# Inputs # Inputs
x = vs.get_variable("x", [batch_size, input_size]) x = vs.get_variable("x", [batch_size, input_size])

View File

@ -78,7 +78,7 @@ class GatherTreeTest(test.TestCase):
sequence_length = [[3, 3, 3]] sequence_length = [[3, 3, 3]]
expected_result = _transpose_batch_time( expected_result = _transpose_batch_time(
[[[2, -1, 2], [6, 5, 6], [7, 8, 9], [-1, -1, -1]]]) [[[2, -1, 2], [6, 5, 6], [7, 8, 9], [-1, -1, -1]]])
with ops.device("/gpu:0"): with ops.device("/device:GPU:0"):
beams = beam_search_ops.gather_tree( beams = beam_search_ops.gather_tree(
step_ids=step_ids, parent_ids=parent_ids, step_ids=step_ids, parent_ids=parent_ids,
sequence_length=sequence_length) sequence_length=sequence_length)

View File

@ -22,7 +22,7 @@ limitations under the License.
// Device names // Device names
// * Every Device should have a unique name with the format: // * Every Device should have a unique name with the format:
// /job:___/replica:___/task:___/(gpu|cpu):___ // /job:___/replica:___/task:___/(gpu|cpu):___
// An example name would be "/job:train/replica:0/task:3/gpu:2". // An example name would be "/job:train/replica:0/task:3/device:GPU:2".
// * Task numbers are within the specified replica, so there are as // * Task numbers are within the specified replica, so there are as
// many "task zeros" as replicas. // many "task zeros" as replicas.

View File

@ -476,7 +476,7 @@ TEST(DirectSessionTest, PlacePrunedGraph) {
vx.scalar<float>()() = 1.0; vx.scalar<float>()() = 1.0;
Node* x = test::graph::Constant(&g, vx); Node* x = test::graph::Constant(&g, vx);
Node* y = test::graph::Unary(&g, "Darth", x); Node* y = test::graph::Unary(&g, "Darth", x);
y->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0"); y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0");
GraphDef def; GraphDef def;
test::graph::ToGraphDef(&g, &def); test::graph::ToGraphDef(&g, &def);
@ -494,7 +494,7 @@ TEST(DirectSessionTest, PlacePrunedGraph) {
vx.scalar<float>()() = 1.0; vx.scalar<float>()() = 1.0;
Node* x = test::graph::Constant(&g, vx); Node* x = test::graph::Constant(&g, vx);
Node* y = test::graph::Unary(&g, "Darth", x); Node* y = test::graph::Unary(&g, "Darth", x);
y->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0"); y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0");
GraphDef def; GraphDef def;
test::graph::ToGraphDef(&g, &def); test::graph::ToGraphDef(&g, &def);

View File

@ -154,14 +154,14 @@ static void TestHWAccelerator(bool enableHWTrace) {
Tensor x_tensor(DT_FLOAT, TensorShape({2, 1})); Tensor x_tensor(DT_FLOAT, TensorShape({2, 1}));
test::FillValues<float>(&x_tensor, {1, 1}); test::FillValues<float>(&x_tensor, {1, 1});
Node* x = test::graph::Constant(&graph, x_tensor); Node* x = test::graph::Constant(&graph, x_tensor);
x->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0"); x->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0");
#ifdef TENSORFLOW_USE_SYCL #ifdef TENSORFLOW_USE_SYCL
x->set_assigned_device_name("/job:localhost/replica:0/task:0/device:SYCL:0"); x->set_assigned_device_name("/job:localhost/replica:0/task:0/device:SYCL:0");
#endif // TENSORFLOW_USE_SYCL #endif // TENSORFLOW_USE_SYCL
// y = A * x // y = A * x
Node* y = test::graph::Matmul(&graph, a, x, false, false); Node* y = test::graph::Matmul(&graph, a, x, false, false);
y->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0"); y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0");
#ifdef TENSORFLOW_USE_SYCL #ifdef TENSORFLOW_USE_SYCL
y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:SYCL:0"); y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:SYCL:0");
#endif // TENSORFLOW_USE_SYCL #endif // TENSORFLOW_USE_SYCL

View File

@ -588,7 +588,7 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options,
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
BaseGPUDevice* gpu_device; BaseGPUDevice* gpu_device;
TF_RETURN_IF_ERROR(CreateGPUDevice(options, TF_RETURN_IF_ERROR(CreateGPUDevice(options,
strings::StrCat(name_prefix, "/gpu:", i), strings::StrCat(name_prefix, "/device:GPU:", i),
valid_gpu_ids[i], &gpu_device)); valid_gpu_ids[i], &gpu_device));
TF_RETURN_IF_ERROR(gpu_device->Init(options)); TF_RETURN_IF_ERROR(gpu_device->Init(options));
devices->push_back(gpu_device); devices->push_back(gpu_device);
@ -1049,7 +1049,7 @@ Status BaseGPUDeviceFactory::GetValidDeviceIds(
size_t new_id = ids->size(); size_t new_id = ids->size();
ids->push_back(visible_gpu_id); ids->push_back(visible_gpu_id);
LOG(INFO) << "Creating TensorFlow device (/gpu:" << new_id << ") -> " LOG(INFO) << "Creating TensorFlow device (/device:GPU:" << new_id << ") -> "
<< "(" << GetShortDeviceDescription(visible_gpu_id, desc) << ")"; << "(" << GetShortDeviceDescription(visible_gpu_id, desc) << ")";
} }

View File

@ -141,7 +141,7 @@ class BaseGPUDeviceFactory : public DeviceFactory {
Allocator* cpu_allocator) = 0; Allocator* cpu_allocator) = 0;
// Returns into 'ids' the list of valid GPU ids, in the order that // Returns into 'ids' the list of valid GPU ids, in the order that
// they should map to logical gpu ids "/gpu:0", "/gpu:1", etc, based // they should map to logical gpu ids "/device:GPU:0", "/device:GPU:1", etc, based
// upon 'visible_device_list', a comma-separated list of 'visible // upon 'visible_device_list', a comma-separated list of 'visible
// gpu ids'. // gpu ids'.
Status GetValidDeviceIds(const string& visible_device_list, Status GetValidDeviceIds(const string& visible_device_list,

View File

@ -106,9 +106,9 @@ TEST_F(GpuStreamUtilTest, SimpleGraphManyStreams) {
TEST_F(GpuStreamUtilTest, StreamOverrides) { TEST_F(GpuStreamUtilTest, StreamOverrides) {
auto root = Scope::NewRootScope().ExitOnError(); auto root = Scope::NewRootScope().ExitOnError();
ops::_Recv(root.WithOpName("input"), DT_FLOAT, "input", "/cpu:0", 0, ops::_Recv(root.WithOpName("input"), DT_FLOAT, "input", "/cpu:0", 0,
"/gpu:0"); "/device:GPU:0");
Output n = ops::MatMul(root, {}, {}); Output n = ops::MatMul(root, {}, {});
ops::_Send(root.WithOpName("output"), n, "output", "/gpu:0", 0, "/cpu:0"); ops::_Send(root.WithOpName("output"), n, "output", "/device:GPU:0", 0, "/cpu:0");
Graph g(OpRegistry::Global()); Graph g(OpRegistry::Global());
TF_ASSERT_OK(root.ToGraph(&g)); TF_ASSERT_OK(root.ToGraph(&g));

View File

@ -53,7 +53,7 @@ TEST(MemoryTypeChecker, Int32NotOk) {
EXPECT_TRUE(errors::IsInternal(ValidateMemoryTypes(DEVICE_GPU, g))); EXPECT_TRUE(errors::IsInternal(ValidateMemoryTypes(DEVICE_GPU, g)));
// But we can insert _HostSend/_HostRecv to ensure the invariant. // But we can insert _HostSend/_HostRecv to ensure the invariant.
TF_EXPECT_OK(EnsureMemoryTypes(DEVICE_GPU, "/gpu:0", g)); TF_EXPECT_OK(EnsureMemoryTypes(DEVICE_GPU, "/device:GPU:0", g));
TF_EXPECT_OK(ValidateMemoryTypes(DEVICE_GPU, g)); TF_EXPECT_OK(ValidateMemoryTypes(DEVICE_GPU, g));
#endif // GOOGLE_CUDA #endif // GOOGLE_CUDA
#ifdef TENSORFLOW_USE_SYCL #ifdef TENSORFLOW_USE_SYCL

View File

@ -86,7 +86,7 @@ void DebugGateway::CopyTensor(const string& node_name, const int output_slot,
// Determine if the tensor is on device (GPU) or host (CPU). // Determine if the tensor is on device (GPU) or host (CPU).
// The second part of the check is necessary because even an OpKernel on // The second part of the check is necessary because even an OpKernel on
// may have output tensors allocated on CPU. // may have output tensors allocated on CPU.
if ((device->name().find("gpu:") != string::npos || device->name().find("SYCL:") != string::npos) && if ((device->name().find("GPU:") != string::npos || device->name().find("SYCL:") != string::npos) &&
!ctx->output_alloc_attr(output_slot).on_host()) { !ctx->output_alloc_attr(output_slot).on_host()) {
// GPU tensors: Copy it to host (CPU). // GPU tensors: Copy it to host (CPU).
DeviceContext* device_ctxt = ctx->op_device_context(); DeviceContext* device_ctxt = ctx->op_device_context();

View File

@ -47,7 +47,7 @@ class SessionDebugMinusAXTest : public ::testing::Test {
Graph graph(OpRegistry::Global()); Graph graph(OpRegistry::Global());
#if GOOGLE_CUDA #if GOOGLE_CUDA
const string kDeviceName = "/job:localhost/replica:0/task:0/gpu:0"; const string kDeviceName = "/job:localhost/replica:0/task:0/device:GPU:0";
#elif defined(TENSORFLOW_USE_SYCL) #elif defined(TENSORFLOW_USE_SYCL)
const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0"; const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0";
#else #else
@ -505,7 +505,7 @@ class SessionDebugOutputSlotWithoutOngoingEdgeTest : public ::testing::Test {
Graph graph(OpRegistry::Global()); Graph graph(OpRegistry::Global());
#if GOOGLE_CUDA #if GOOGLE_CUDA
const string kDeviceName = "/job:localhost/replica:0/task:0/gpu:0"; const string kDeviceName = "/job:localhost/replica:0/task:0/device:GPU:0";
#elif defined(TENSORFLOW_USE_SYCL) #elif defined(TENSORFLOW_USE_SYCL)
const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0"; const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0";
#else #else
@ -607,7 +607,7 @@ class SessionDebugVariableTest : public ::testing::Test {
Graph graph(OpRegistry::Global()); Graph graph(OpRegistry::Global());
#if GOOGLE_CUDA #if GOOGLE_CUDA
const string kDeviceName = "/job:localhost/replica:0/task:0/gpu:0"; const string kDeviceName = "/job:localhost/replica:0/task:0/device:GPU:0";
#elif defined(TENSORFLOW_USE_SYCL) #elif defined(TENSORFLOW_USE_SYCL)
const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0"; const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0";
#else #else
@ -879,7 +879,7 @@ class SessionDebugGPUSwitchTest : public ::testing::Test {
Graph graph(OpRegistry::Global()); Graph graph(OpRegistry::Global());
#ifdef GOOGLE_CUDA #ifdef GOOGLE_CUDA
const string kDeviceName = "/job:localhost/replica:0/task:0/gpu:0"; const string kDeviceName = "/job:localhost/replica:0/task:0/device:GPU:0";
#elif TENSORFLOW_USE_SYCL #elif TENSORFLOW_USE_SYCL
const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0"; const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0";
#endif #endif

View File

@ -51,14 +51,14 @@ class DebugIOUtilsTest : public ::testing::Test {
}; };
TEST_F(DebugIOUtilsTest, ConstructDebugNodeKey) { TEST_F(DebugIOUtilsTest, ConstructDebugNodeKey) {
DebugNodeKey debug_node_key("/job:worker/replica:1/task:0/gpu:2", DebugNodeKey debug_node_key("/job:worker/replica:1/task:0/device:GPU:2",
"hidden_1/MatMul", 0, "DebugIdentity"); "hidden_1/MatMul", 0, "DebugIdentity");
EXPECT_EQ("/job:worker/replica:1/task:0/gpu:2", debug_node_key.device_name); EXPECT_EQ("/job:worker/replica:1/task:0/device:GPU:2", debug_node_key.device_name);
EXPECT_EQ("hidden_1/MatMul", debug_node_key.node_name); EXPECT_EQ("hidden_1/MatMul", debug_node_key.node_name);
EXPECT_EQ(0, debug_node_key.output_slot); EXPECT_EQ(0, debug_node_key.output_slot);
EXPECT_EQ("DebugIdentity", debug_node_key.debug_op); EXPECT_EQ("DebugIdentity", debug_node_key.debug_op);
EXPECT_EQ("hidden_1/MatMul:0:DebugIdentity", debug_node_key.debug_node_name); EXPECT_EQ("hidden_1/MatMul:0:DebugIdentity", debug_node_key.debug_node_name);
EXPECT_EQ("_tfdbg_device_,job_worker,replica_1,task_0,gpu_2", EXPECT_EQ("_tfdbg_device_,job_worker,replica_1,task_0,device_GPU_2",
debug_node_key.device_path); debug_node_key.device_path);
} }

View File

@ -140,7 +140,7 @@ Rendezvous::ParsedKey Key(const string& sender, const uint64 incarnation,
} }
#define ALICE "/job:j/replica:0/task:0/cpu:0" #define ALICE "/job:j/replica:0/task:0/cpu:0"
#define BOB "/job:j/replica:0/task:0/gpu:0" #define BOB "/job:j/replica:0/task:0/device:GPU:0"
TEST_F(ExecutorTest, SimpleAdd) { TEST_F(ExecutorTest, SimpleAdd) {
// c = a + b // c = a + b

View File

@ -31,9 +31,9 @@ TEST(GrpcChannelTest, IsSameAddressSpace) {
EXPECT_TRUE(IsSameAddrSp("/job:mnist/replica:10/task:10/cpu:0", EXPECT_TRUE(IsSameAddrSp("/job:mnist/replica:10/task:10/cpu:0",
"/job:mnist/replica:10/task:10/cpu:1")); "/job:mnist/replica:10/task:10/cpu:1"));
EXPECT_TRUE(IsSameAddrSp("/job:mnist/replica:10/task:10/cpu:0", EXPECT_TRUE(IsSameAddrSp("/job:mnist/replica:10/task:10/cpu:0",
"/job:mnist/replica:10/task:10/gpu:2")); "/job:mnist/replica:10/task:10/device:GPU:2"));
EXPECT_TRUE(IsSameAddrSp("/job:mnist/replica:10/task:10", EXPECT_TRUE(IsSameAddrSp("/job:mnist/replica:10/task:10",
"/job:mnist/replica:10/task:10/gpu:2")); "/job:mnist/replica:10/task:10/device:GPU:2"));
EXPECT_TRUE(IsSameAddrSp("/job:mnist/replica:10/task:10/cpu:1", EXPECT_TRUE(IsSameAddrSp("/job:mnist/replica:10/task:10/cpu:1",
"/job:mnist/replica:10/task:10")); "/job:mnist/replica:10/task:10"));

View File

@ -38,8 +38,8 @@ message NodeDef {
// | ( ("gpu" | "cpu") ":" ([1-9][0-9]* | "*") ) // | ( ("gpu" | "cpu") ":" ([1-9][0-9]* | "*") )
// //
// Valid values for this string include: // Valid values for this string include:
// * "/job:worker/replica:0/task:1/gpu:3" (full specification) // * "/job:worker/replica:0/task:1/device:GPU:3" (full specification)
// * "/job:worker/gpu:3" (partial specification) // * "/job:worker/device:GPU:3" (partial specification)
// * "" (no specification) // * "" (no specification)
// //
// If the constraints do not resolve to a single device (or if this // If the constraints do not resolve to a single device (or if this

View File

@ -39,11 +39,11 @@ namespace {
TEST(RendezvousTest, Key) { TEST(RendezvousTest, Key) {
const string key = Rendezvous::CreateKey( const string key = Rendezvous::CreateKey(
"/job:mnist/replica:1/task:2/CPU:0", 7890, "/job:mnist/replica:1/task:2/CPU:0", 7890,
"/job:mnist/replica:1/task:2/GPU:0", "var0", FrameAndIter(0, 0)); "/job:mnist/replica:1/task:2/device:GPU:0", "var0", FrameAndIter(0, 0));
EXPECT_EQ(key, EXPECT_EQ(key,
"/job:mnist/replica:1/task:2/CPU:0;" "/job:mnist/replica:1/task:2/CPU:0;"
"0000000000001ed2;" // 7890 = 0x1ed2 "0000000000001ed2;" // 7890 = 0x1ed2
"/job:mnist/replica:1/task:2/GPU:0;" "/job:mnist/replica:1/task:2/device:GPU:0;"
"var0;" "var0;"
"0:0"); "0:0");
Rendezvous::ParsedKey parsed; Rendezvous::ParsedKey parsed;
@ -51,12 +51,12 @@ TEST(RendezvousTest, Key) {
EXPECT_EQ(parsed.src_device, "/job:mnist/replica:1/task:2/CPU:0"); EXPECT_EQ(parsed.src_device, "/job:mnist/replica:1/task:2/CPU:0");
EXPECT_EQ(parsed.src_incarnation, 7890); EXPECT_EQ(parsed.src_incarnation, 7890);
EXPECT_EQ(parsed.src.type, "CPU"); EXPECT_EQ(parsed.src.type, "CPU");
EXPECT_EQ(parsed.dst_device, "/job:mnist/replica:1/task:2/GPU:0"); EXPECT_EQ(parsed.dst_device, "/job:mnist/replica:1/task:2/device:GPU:0");
EXPECT_EQ(parsed.dst.type, "GPU"); EXPECT_EQ(parsed.dst.type, "GPU");
EXPECT_FALSE(Rendezvous::ParseKey("foo;bar;baz", &parsed).ok()); EXPECT_FALSE(Rendezvous::ParseKey("foo;bar;baz", &parsed).ok());
EXPECT_FALSE(Rendezvous::ParseKey("/job:mnist/replica:1/task:2/CPU:0;" EXPECT_FALSE(Rendezvous::ParseKey("/job:mnist/replica:1/task:2/CPU:0;"
"/job:mnist/replica:1/task:2/GPU:0;", "/job:mnist/replica:1/task:2/device:GPU:0;",
&parsed) &parsed)
.ok()); .ok());
EXPECT_FALSE( EXPECT_FALSE(
@ -99,7 +99,7 @@ string V(const Tensor& tensor) {
Rendezvous::ParsedKey MakeKey(const string& name) { Rendezvous::ParsedKey MakeKey(const string& name) {
string s = Rendezvous::CreateKey("/job:mnist/replica:1/task:2/CPU:0", 7890, string s = Rendezvous::CreateKey("/job:mnist/replica:1/task:2/CPU:0", 7890,
"/job:mnist/replica:1/task:2/GPU:0", name, "/job:mnist/replica:1/task:2/device:GPU:0", name,
FrameAndIter(0, 0)); FrameAndIter(0, 0));
Rendezvous::ParsedKey k; Rendezvous::ParsedKey k;
TF_EXPECT_OK(Rendezvous::ParseKey(s, &k)); TF_EXPECT_OK(Rendezvous::ParseKey(s, &k));

View File

@ -50,7 +50,7 @@ extern Status TopologicalSortNodesWithTimePriority(
namespace { namespace {
const char gpu_device[] = "/job:a/replica:0/task:0/gpu:0"; const char gpu_device[] = "/job:a/replica:0/task:0/device:GPU:0";
string SplitByDevice(const Node* node) { return node->assigned_device_name(); } string SplitByDevice(const Node* node) { return node->assigned_device_name(); }

View File

@ -40,7 +40,7 @@ namespace tensorflow {
namespace { namespace {
const char kCPUDevice[] = "/job:a/replica:0/task:0/cpu:0"; const char kCPUDevice[] = "/job:a/replica:0/task:0/cpu:0";
const char kGPUDevice[] = "/job:a/replica:0/task:0/gpu:0"; const char kGPUDevice[] = "/job:a/replica:0/task:0/device:GPU:0";
static void InitGraph(const string& s, Graph* graph, static void InitGraph(const string& s, Graph* graph,
const string& device = kCPUDevice) { const string& device = kCPUDevice) {

View File

@ -89,7 +89,7 @@ Status SingleMachine::Provision() {
VLOG(1) << "Number of GPUs: " << num_gpus_; VLOG(1) << "Number of GPUs: " << num_gpus_;
for (int i = 0; i < num_gpus_; ++i) { for (int i = 0; i < num_gpus_; ++i) {
string device_name = string device_name =
strings::StrCat("/job:localhost/replica:0/task:0/gpu:", i); strings::StrCat("/job:localhost/replica:0/task:0/device:GPU:", i);
VLOG(1) << "Adding GPU device " << device_name; VLOG(1) << "Adding GPU device " << device_name;
devices_[device_name] = GetLocalGPUInfo(i); devices_[device_name] = GetLocalGPUInfo(i);
} }

View File

@ -42,7 +42,7 @@ class AnalyticalCostEstimatorTest : public ::testing::Test {
gpu_device.set_frequency(1100); gpu_device.set_frequency(1100);
gpu_device.set_bandwidth(180 * 1024 * 1024); gpu_device.set_bandwidth(180 * 1024 * 1024);
(*gpu_device.mutable_environment())["architecture"] = "6"; (*gpu_device.mutable_environment())["architecture"] = "6";
devices["/job:localhost/replica:0/task:0/gpu:0"] = gpu_device; devices["/job:localhost/replica:0/task:0/device:GPU:0"] = gpu_device;
cluster_.reset(new VirtualCluster(devices)); cluster_.reset(new VirtualCluster(devices));
} }

View File

@ -30,14 +30,14 @@ TEST(VirtualPlacerTest, LocalDevices) {
devices["/job:localhost/replica:0/task:0/cpu:0"] = cpu_device; devices["/job:localhost/replica:0/task:0/cpu:0"] = cpu_device;
DeviceProperties gpu_device; DeviceProperties gpu_device;
gpu_device.set_type("GPU"); gpu_device.set_type("GPU");
devices["/job:localhost/replica:0/task:0/gpu:0"] = gpu_device; devices["/job:localhost/replica:0/task:0/device:GPU:0"] = gpu_device;
VirtualCluster cluster(devices); VirtualCluster cluster(devices);
VirtualPlacer placer(&cluster); VirtualPlacer placer(&cluster);
NodeDef node; NodeDef node;
node.set_op("Conv2D"); node.set_op("Conv2D");
EXPECT_EQ("GPU", placer.get_device(node).type()); EXPECT_EQ("GPU", placer.get_device(node).type());
EXPECT_EQ("/job:localhost/replica:0/task:0/gpu:0", EXPECT_EQ("/job:localhost/replica:0/task:0/device:GPU:0",
placer.get_canonical_device_name(node)); placer.get_canonical_device_name(node));
node.set_device("CPU"); node.set_device("CPU");
@ -47,7 +47,7 @@ TEST(VirtualPlacerTest, LocalDevices) {
node.set_device("GPU:0"); node.set_device("GPU:0");
EXPECT_EQ("GPU", placer.get_device(node).type()); EXPECT_EQ("GPU", placer.get_device(node).type());
EXPECT_EQ("/job:localhost/replica:0/task:0/gpu:0", EXPECT_EQ("/job:localhost/replica:0/task:0/device:GPU:0",
placer.get_canonical_device_name(node)); placer.get_canonical_device_name(node));
} }
@ -60,7 +60,7 @@ TEST(VirtualPlacerTest, EmptyJobBecomesLocalhost) {
devices["/job:localhost/replica:0/task:0/cpu:0"] = cpu_device; devices["/job:localhost/replica:0/task:0/cpu:0"] = cpu_device;
DeviceProperties gpu_device; DeviceProperties gpu_device;
gpu_device.set_type("GPU"); gpu_device.set_type("GPU");
devices["/job:localhost/replica:0/task:0/gpu:0"] = gpu_device; devices["/job:localhost/replica:0/task:0/device:GPU:0"] = gpu_device;
VirtualCluster cluster(devices); VirtualCluster cluster(devices);
VirtualPlacer placer(&cluster); VirtualPlacer placer(&cluster);
@ -70,7 +70,7 @@ TEST(VirtualPlacerTest, EmptyJobBecomesLocalhost) {
EXPECT_EQ("/job:localhost/replica:0/task:0/cpu:0", EXPECT_EQ("/job:localhost/replica:0/task:0/cpu:0",
placer.get_canonical_device_name(node)); placer.get_canonical_device_name(node));
node.set_device("/device:GPU:0"); node.set_device("/device:GPU:0");
EXPECT_EQ("/job:localhost/replica:0/task:0/gpu:0", EXPECT_EQ("/job:localhost/replica:0/task:0/device:GPU:0",
placer.get_canonical_device_name(node)); placer.get_canonical_device_name(node));
} }
@ -113,7 +113,7 @@ TEST(VirtualPlacerTest, RemoteDevices) {
devices["/job:my_job/replica:0/task:0/cpu:0"] = cpu_device; devices["/job:my_job/replica:0/task:0/cpu:0"] = cpu_device;
DeviceProperties gpu_device; DeviceProperties gpu_device;
gpu_device.set_type("GPU"); gpu_device.set_type("GPU");
devices["/job:my_job/replica:0/task:0/gpu:0"] = gpu_device; devices["/job:my_job/replica:0/task:0/device:GPU:0"] = gpu_device;
VirtualCluster cluster(devices); VirtualCluster cluster(devices);
VirtualPlacer placer(&cluster); VirtualPlacer placer(&cluster);
@ -122,7 +122,7 @@ TEST(VirtualPlacerTest, RemoteDevices) {
// Device falls back to GPU. // Device falls back to GPU.
EXPECT_EQ("GPU", placer.get_device(node).type()); EXPECT_EQ("GPU", placer.get_device(node).type());
EXPECT_EQ("/job:my_job/replica:0/task:0/gpu:0", EXPECT_EQ("/job:my_job/replica:0/task:0/device:GPU:0",
placer.get_canonical_device_name(node)); placer.get_canonical_device_name(node));
node.set_device("/job:my_job/replica:0/task:0/cpu:0"); node.set_device("/job:my_job/replica:0/task:0/cpu:0");
@ -130,27 +130,27 @@ TEST(VirtualPlacerTest, RemoteDevices) {
EXPECT_EQ("/job:my_job/replica:0/task:0/cpu:0", EXPECT_EQ("/job:my_job/replica:0/task:0/cpu:0",
placer.get_canonical_device_name(node)); placer.get_canonical_device_name(node));
node.set_device("/job:my_job/replica:0/task:0/gpu:0"); node.set_device("/job:my_job/replica:0/task:0/device:GPU:0");
EXPECT_EQ("GPU", placer.get_device(node).type()); EXPECT_EQ("GPU", placer.get_device(node).type());
EXPECT_EQ("/job:my_job/replica:0/task:0/gpu:0", EXPECT_EQ("/job:my_job/replica:0/task:0/device:GPU:0",
placer.get_canonical_device_name(node)); placer.get_canonical_device_name(node));
// There is no local cpu available. Device falls back to GPU. // There is no local cpu available. Device falls back to GPU.
node.set_device("CPU"); node.set_device("CPU");
EXPECT_EQ("GPU", placer.get_device(node).type()); EXPECT_EQ("GPU", placer.get_device(node).type());
EXPECT_EQ("/job:my_job/replica:0/task:0/gpu:0", EXPECT_EQ("/job:my_job/replica:0/task:0/device:GPU:0",
placer.get_canonical_device_name(node)); placer.get_canonical_device_name(node));
node.set_device("GPU:0"); node.set_device("GPU:0");
// There is no local GPU available. Fall back to default GPU. // There is no local GPU available. Fall back to default GPU.
EXPECT_EQ("GPU", placer.get_device(node).type()); EXPECT_EQ("GPU", placer.get_device(node).type());
EXPECT_EQ("/job:my_job/replica:0/task:0/gpu:0", EXPECT_EQ("/job:my_job/replica:0/task:0/device:GPU:0",
placer.get_canonical_device_name(node)); placer.get_canonical_device_name(node));
// This isn't a valid name. Fall back to GPU. // This isn't a valid name. Fall back to GPU.
node.set_device("/job:my_job/replica:0/task:0"); node.set_device("/job:my_job/replica:0/task:0");
EXPECT_EQ("GPU", placer.get_device(node).type()); EXPECT_EQ("GPU", placer.get_device(node).type());
EXPECT_EQ("/job:my_job/replica:0/task:0/gpu:0", EXPECT_EQ("/job:my_job/replica:0/task:0/device:GPU:0",
placer.get_canonical_device_name(node)); placer.get_canonical_device_name(node));
} }

View File

@ -320,14 +320,14 @@ TEST_F(ModelPrunerTest, PruningPerservesCrossDeviceIdentity) {
Output c = ops::Const(s.WithOpName("c").WithDevice("/cpu:0"), 0.0f, {10, 10}); Output c = ops::Const(s.WithOpName("c").WithDevice("/cpu:0"), 0.0f, {10, 10});
// Node i1 should be preserved. // Node i1 should be preserved.
Output i1 = ops::Identity(s.WithOpName("i1").WithDevice("/gpu:0"), c); Output i1 = ops::Identity(s.WithOpName("i1").WithDevice("/device:GPU:0"), c);
Output a1 = ops::Sqrt(s.WithOpName("a1").WithDevice("/gpu:0"), {i1}); Output a1 = ops::Sqrt(s.WithOpName("a1").WithDevice("/device:GPU:0"), {i1});
Output a2 = ops::Sqrt(s.WithOpName("a2").WithDevice("/gpu:0"), {i1}); Output a2 = ops::Sqrt(s.WithOpName("a2").WithDevice("/device:GPU:0"), {i1});
// Node i2 should be pruned since it resides on the sender's device. // Node i2 should be pruned since it resides on the sender's device.
Output i2 = ops::Identity(s.WithOpName("i2").WithDevice("/cpu:0"), c); Output i2 = ops::Identity(s.WithOpName("i2").WithDevice("/cpu:0"), c);
Output a3 = ops::Sqrt(s.WithOpName("a3").WithDevice("/gpu:0"), {i2}); Output a3 = ops::Sqrt(s.WithOpName("a3").WithDevice("/device:GPU:0"), {i2});
Output a4 = ops::Sqrt(s.WithOpName("a4").WithDevice("/gpu:0"), {i2}); Output a4 = ops::Sqrt(s.WithOpName("a4").WithDevice("/device:GPU:0"), {i2});
GrapplerItem item; GrapplerItem item;
TF_CHECK_OK(s.ToGraphDef(&item.graph)); TF_CHECK_OK(s.ToGraphDef(&item.graph));

View File

@ -579,8 +579,8 @@ Status GPUTracerImpl::Collect(StepStatsCollector *collector) {
// TODO(pbar) Handle device IDs and prefix properly. // TODO(pbar) Handle device IDs and prefix properly.
const string prefix = ""; const string prefix = "";
const int id = 0; const int id = 0;
const string stream_device = strings::StrCat(prefix, "/gpu:", id, "/stream:"); const string stream_device = strings::StrCat(prefix, "/device:GPU:", id, "/stream:");
const string memcpy_device = strings::StrCat(prefix, "/gpu:", id, "/memcpy"); const string memcpy_device = strings::StrCat(prefix, "/device:GPU:", id, "/memcpy");
mutex_lock l2(trace_mu_); mutex_lock l2(trace_mu_);
for (const auto &rec : kernel_records_) { for (const auto &rec : kernel_records_) {

View File

@ -63,12 +63,12 @@ class GPUTracerTest : public ::testing::Test {
Tensor x_tensor(DT_FLOAT, TensorShape({2, 1})); Tensor x_tensor(DT_FLOAT, TensorShape({2, 1}));
test::FillValues<float>(&x_tensor, {1, 1}); test::FillValues<float>(&x_tensor, {1, 1});
Node* x = test::graph::Constant(&graph, x_tensor); Node* x = test::graph::Constant(&graph, x_tensor);
x->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0"); x->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0");
x_ = x->name(); x_ = x->name();
// y = A * x // y = A * x
Node* y = test::graph::Matmul(&graph, a, x, false, false); Node* y = test::graph::Matmul(&graph, a, x, false, false);
y->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0"); y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0");
y_ = y->name(); y_ = y->name();
// Use an Identity op to force a memcpy to CPU and back to GPU. // Use an Identity op to force a memcpy to CPU and back to GPU.
@ -77,7 +77,7 @@ class GPUTracerTest : public ::testing::Test {
Node* y_neg = test::graph::Unary(&graph, "Neg", i); Node* y_neg = test::graph::Unary(&graph, "Neg", i);
y_neg_ = y_neg->name(); y_neg_ = y_neg->name();
y_neg->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0"); y_neg->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0");
test::graph::ToGraphDef(&graph, &def_); test::graph::ToGraphDef(&graph, &def_);
} }

View File

@ -127,10 +127,10 @@ tfprof> advise
Not running under xxxx. Skip JobChecker. Not running under xxxx. Skip JobChecker.
AcceleratorUtilizationChecker: AcceleratorUtilizationChecker:
device: /job:worker/replica:0/task:0/gpu:0 low utilization: 0.03 device: /job:worker/replica:0/task:0/device:GPU:0 low utilization: 0.03
device: /job:worker/replica:0/task:0/gpu:1 low utilization: 0.08 device: /job:worker/replica:0/task:0/device:GPU:1 low utilization: 0.08
device: /job:worker/replica:0/task:0/gpu:2 low utilization: 0.04 device: /job:worker/replica:0/task:0/device:GPU:2 low utilization: 0.04
device: /job:worker/replica:0/task:0/gpu:3 low utilization: 0.21 device: /job:worker/replica:0/task:0/device:GPU:3 low utilization: 0.21
OperationChecker: OperationChecker:
Found operation using NHWC data_format on GPU. Maybe NCHW is faster. Found operation using NHWC data_format on GPU. Maybe NCHW is faster.

View File

@ -31,10 +31,10 @@ tfprof --graph_path=graph.pbtxt \
tfprof> advise tfprof> advise
AcceleratorUtilizationChecker: AcceleratorUtilizationChecker:
device: /job:worker/replica:0/task:0/gpu:0 low utilization: 0.03 device: /job:worker/replica:0/task:0/device:GPU:0 low utilization: 0.03
device: /job:worker/replica:0/task:0/gpu:1 low utilization: 0.08 device: /job:worker/replica:0/task:0/device:GPU:1 low utilization: 0.08
device: /job:worker/replica:0/task:0/gpu:2 low utilization: 0.04 device: /job:worker/replica:0/task:0/device:GPU:2 low utilization: 0.04
device: /job:worker/replica:0/task:0/gpu:3 low utilization: 0.21 device: /job:worker/replica:0/task:0/device:GPU:3 low utilization: 0.21
OperationChecker: OperationChecker:
Found operation using NHWC data_format on GPU. Maybe NCHW is faster. Found operation using NHWC data_format on GPU. Maybe NCHW is faster.

View File

@ -134,7 +134,7 @@ AddN 50.10ms (17.33%, 1.34%), 5481
tfprof> op -select micros,device -order_by micros tfprof> op -select micros,device -order_by micros
node name | execution time | assigned devices node name | execution time | assigned devices
SoftmaxCrossEntropyWithLogits 1.37sec (100.00%, 36.44%), /job:worker/replica:0/task:0/cpu:0 SoftmaxCrossEntropyWithLogits 1.37sec (100.00%, 36.44%), /job:worker/replica:0/task:0/cpu:0
MatMul 618.97ms (63.56%, 16.51%), |/job:worker/replica:0/task:0/cpu:0|/job:worker/replica:0/task:0/gpu:0|/job:worker/replica:0/task:0/gpu:1|/job:worker/replica:0/task:0/gpu:2|/job:worker/replica:0/task:0/gpu:3 MatMul 618.97ms (63.56%, 16.51%), |/job:worker/replica:0/task:0/cpu:0|/job:worker/replica:0/task:0/device:GPU:0|/job:worker/replica:0/task:0/device:GPU:1|/job:worker/replica:0/task:0/device:GPU:2|/job:worker/replica:0/task:0/device:GPU:3
``` ```

View File

@ -53,10 +53,10 @@ class TFProfAdvisorTest : public ::testing::Test {
NodeExecStats node_stat; NodeExecStats node_stat;
node_stat.set_all_start_micros(start_miros); node_stat.set_all_start_micros(start_miros);
node_stat.set_op_end_rel_micros(end_rel_micros); node_stat.set_op_end_rel_micros(end_rel_micros);
node->AddStepStat(step, "/job:localhost/replica:0/task:0/gpu:0", node_stat); node->AddStepStat(step, "/job:localhost/replica:0/task:0/device:GPU:0", node_stat);
node->AddStepStat(step, "/job:localhost/replica:0/task:0/gpu:0:stream:all", node->AddStepStat(step, "/job:localhost/replica:0/task:0/device:GPU:0:stream:all",
node_stat); node_stat);
node->AddStepStat(step, "/job:localhost/replica:0/task:0/gpu:0:stream:0", node->AddStepStat(step, "/job:localhost/replica:0/task:0/device:GPU:0:stream:0",
node_stat); node_stat);
return node; return node;
} }

View File

@ -25,7 +25,7 @@ bool CountAsAcceleratorTime(const string& device) {
} }
bool CountAsCPUTime(const string& device) { bool CountAsCPUTime(const string& device) {
return RE2::FullMatch(device, ".*/(gpu|cpu|device:sycl):\\d+"); return RE2::FullMatch(device, ".*/(device:gpu|gpu|cpu|device:sycl):\\d+");
} }
bool IsCanonicalDevice(const string& device) { return CountAsCPUTime(device); } bool IsCanonicalDevice(const string& device) { return CountAsCPUTime(device); }
@ -143,7 +143,7 @@ void TFGraphNode::AddStepStat(int64 step, const string& device,
// TODO(xpan): Make this more robust? // TODO(xpan): Make this more robust?
// See run_metadata_test.py // See run_metadata_test.py
// It can be /job:0/replica:0/xxxx/gpu:0, or simply /gpu:0. // It can be /job:0/replica:0/xxxx/device:GPU:0, or simply /device:GPU:0.
// It can has some ad-hoc suffix, such as /stream:xx or /memcpy:xx. // It can has some ad-hoc suffix, such as /stream:xx or /memcpy:xx.
if (IsCanonicalDevice(dev)) { if (IsCanonicalDevice(dev)) {
if (!canonical_device_.empty()) { if (!canonical_device_.empty()) {

View File

@ -42,7 +42,7 @@ message GPUOptions {
// A comma-separated list of GPU ids that determines the 'visible' // A comma-separated list of GPU ids that determines the 'visible'
// to 'virtual' mapping of GPU devices. For example, if TensorFlow // to 'virtual' mapping of GPU devices. For example, if TensorFlow
// can see 8 GPU devices in the process, and one wanted to map // can see 8 GPU devices in the process, and one wanted to map
// visible GPU devices 5 and 3 as "/gpu:0", and "/gpu:1", then one // visible GPU devices 5 and 3 as "/device:GPU:0", and "/device:GPU:1", then one
// would specify this field as "5,3". This field is similar in // would specify this field as "5,3". This field is similar in
// spirit to the CUDA_VISIBLE_DEVICES environment variable, except // spirit to the CUDA_VISIBLE_DEVICES environment variable, except
// it applies to the visible GPU devices in the process. // it applies to the visible GPU devices in the process.

View File

@ -76,21 +76,21 @@ TEST(DeviceNameUtilsTest, Basic) {
DeviceNameUtils::ParsedName p; DeviceNameUtils::ParsedName p;
EXPECT_FALSE(DeviceNameUtils::ParseFullName("foobar", &p)); EXPECT_FALSE(DeviceNameUtils::ParseFullName("foobar", &p));
EXPECT_FALSE( EXPECT_FALSE(
DeviceNameUtils::ParseFullName("/job:123/replica:1/task:2/gpu:3", &p)); DeviceNameUtils::ParseFullName("/job:123/replica:1/task:2/device:GPU:3", &p));
EXPECT_FALSE( EXPECT_FALSE(
DeviceNameUtils::ParseFullName("/job:123/replica:1/task:2/gpu:", &p)); DeviceNameUtils::ParseFullName("/job:123/replica:1/task:2/gpu:", &p));
EXPECT_FALSE(DeviceNameUtils::ParseFullName( EXPECT_FALSE(DeviceNameUtils::ParseFullName(
"/job:123/replica:1/task:2/device:gpu:", &p)); "/job:123/replica:1/task:2/device:gpu:", &p));
EXPECT_FALSE( EXPECT_FALSE(
DeviceNameUtils::ParseFullName("/job:foo/replica:-1/task:2/gpu:3", &p)); DeviceNameUtils::ParseFullName("/job:foo/replica:-1/task:2/device:GPU:3", &p));
EXPECT_FALSE( EXPECT_FALSE(
DeviceNameUtils::ParseFullName("/job:foo/replica:1/task:-2/gpu:3", &p)); DeviceNameUtils::ParseFullName("/job:foo/replica:1/task:-2/device:GPU:3", &p));
EXPECT_FALSE( EXPECT_FALSE(
DeviceNameUtils::ParseFullName("/job:foo/replica:1/task:2/bar:3", &p)); DeviceNameUtils::ParseFullName("/job:foo/replica:1/task:2/bar:3", &p));
EXPECT_FALSE(DeviceNameUtils::ParseFullName( EXPECT_FALSE(DeviceNameUtils::ParseFullName(
"/job:foo/replica:1/task:2/gpu:3/extra", &p)); "/job:foo/replica:1/task:2/device:GPU:3/extra", &p));
EXPECT_TRUE( EXPECT_TRUE(
DeviceNameUtils::ParseFullName("/job:foo/replica:1/task:2/gpu:3", &p)); DeviceNameUtils::ParseFullName("/job:foo/replica:1/task:2/device:GPU:3", &p));
EXPECT_TRUE(p.has_job); EXPECT_TRUE(p.has_job);
EXPECT_TRUE(p.has_replica); EXPECT_TRUE(p.has_replica);
EXPECT_TRUE(p.has_task); EXPECT_TRUE(p.has_task);
@ -106,7 +106,7 @@ TEST(DeviceNameUtilsTest, Basic) {
// Allow _ in job names. // Allow _ in job names.
DeviceNameUtils::ParsedName p; DeviceNameUtils::ParsedName p;
EXPECT_TRUE(DeviceNameUtils::ParseFullName( EXPECT_TRUE(DeviceNameUtils::ParseFullName(
"/job:foo_bar/replica:1/task:2/gpu:3", &p)); "/job:foo_bar/replica:1/task:2/device:GPU:3", &p));
EXPECT_TRUE(p.has_job); EXPECT_TRUE(p.has_job);
EXPECT_TRUE(p.has_replica); EXPECT_TRUE(p.has_replica);
EXPECT_TRUE(p.has_task); EXPECT_TRUE(p.has_task);
@ -193,7 +193,7 @@ TEST(DeviceNameUtilsTest, Basic) {
} }
{ {
DeviceNameUtils::ParsedName p; DeviceNameUtils::ParsedName p;
EXPECT_TRUE(DeviceNameUtils::ParseFullName("/job:*/replica:4/gpu:5", &p)); EXPECT_TRUE(DeviceNameUtils::ParseFullName("/job:*/replica:4/device:GPU:5", &p));
EXPECT_FALSE(p.has_job); EXPECT_FALSE(p.has_job);
EXPECT_TRUE(p.has_replica); EXPECT_TRUE(p.has_replica);
EXPECT_FALSE(p.has_task); EXPECT_FALSE(p.has_task);
@ -216,13 +216,13 @@ TEST(DeviceNameUtilsTest, Basic) {
} }
EXPECT_TRUE(DeviceNameUtils::IsSameAddressSpace( EXPECT_TRUE(DeviceNameUtils::IsSameAddressSpace(
"/job:foo/replica:1/task:2/cpu:3", "/job:foo/replica:1/task:2/gpu:4")); "/job:foo/replica:1/task:2/cpu:3", "/job:foo/replica:1/task:2/device:GPU:4"));
EXPECT_FALSE(DeviceNameUtils::IsSameAddressSpace( EXPECT_FALSE(DeviceNameUtils::IsSameAddressSpace(
"/job:foo/replica:1/task:2/cpu:3", "/job:foo/replica:1/task:3/gpu:4")); "/job:foo/replica:1/task:2/cpu:3", "/job:foo/replica:1/task:3/device:GPU:4"));
EXPECT_FALSE(DeviceNameUtils::IsSameAddressSpace( EXPECT_FALSE(DeviceNameUtils::IsSameAddressSpace(
"/job:foo/replica:1/task:2/cpu:3", "/job:foo/replica:10/task:2/gpu:4")); "/job:foo/replica:1/task:2/cpu:3", "/job:foo/replica:10/task:2/device:GPU:4"));
EXPECT_FALSE(DeviceNameUtils::IsSameAddressSpace( EXPECT_FALSE(DeviceNameUtils::IsSameAddressSpace(
"/job:foo/replica:1/task:2/cpu:3", "/job:bar/replica:1/task:2/gpu:4")); "/job:foo/replica:1/task:2/cpu:3", "/job:bar/replica:1/task:2/device:GPU:4"));
EXPECT_EQ(DeviceNameUtils::LocalName("CPU", 1), "CPU:1"); EXPECT_EQ(DeviceNameUtils::LocalName("CPU", 1), "CPU:1");
EXPECT_EQ(DeviceNameUtils::LocalName("GPU", 2), "GPU:2"); EXPECT_EQ(DeviceNameUtils::LocalName("GPU", 2), "GPU:2");
@ -284,17 +284,17 @@ static bool IsCSHelper(StringPiece pattern, StringPiece actual) {
} }
TEST(DeviceNameUtilsTest, IsCompleteSpecification) { TEST(DeviceNameUtilsTest, IsCompleteSpecification) {
EXPECT_TRUE(IsCSHelper("/job:*", "/job:work/replica:1/task:2/gpu:3")); EXPECT_TRUE(IsCSHelper("/job:*", "/job:work/replica:1/task:2/device:GPU:3"));
EXPECT_TRUE( EXPECT_TRUE(
IsCSHelper("/job:*/replica:*", "/job:work/replica:1/task:2/gpu:3")); IsCSHelper("/job:*/replica:*", "/job:work/replica:1/task:2/device:GPU:3"));
EXPECT_TRUE(IsCSHelper("/job:*/task:*", "/job:work/replica:1/task:2/gpu:3")); EXPECT_TRUE(IsCSHelper("/job:*/task:*", "/job:work/replica:1/task:2/device:GPU:3"));
EXPECT_TRUE(IsCSHelper("/job:*/replica:*/task:*", EXPECT_TRUE(IsCSHelper("/job:*/replica:*/task:*",
"/job:work/replica:1/task:2/gpu:3")); "/job:work/replica:1/task:2/device:GPU:3"));
EXPECT_TRUE( EXPECT_TRUE(
IsCSHelper("/job:*/replica:*/gpu:*", "/job:work/replica:1/task:2/gpu:3")); IsCSHelper("/job:*/replica:*/gpu:*", "/job:work/replica:1/task:2/device:GPU:3"));
EXPECT_FALSE(IsCSHelper("/cpu:*", "/job:worker/replica:1/task:2/gpu:3")); EXPECT_FALSE(IsCSHelper("/cpu:*", "/job:worker/replica:1/task:2/device:GPU:3"));
EXPECT_FALSE(IsCSHelper("/gpu:2", "/job:worker/replica:1/task:2/gpu:1")); EXPECT_FALSE(IsCSHelper("/device:GPU:2", "/job:worker/replica:1/task:2/device:GPU:1"));
EXPECT_TRUE(IsCSHelper("/gpu:*", "/job:worker/replica:1/task:2/gpu:3")); EXPECT_TRUE(IsCSHelper("/gpu:*", "/job:worker/replica:1/task:2/device:GPU:3"));
} }
static bool IsSpecHelper(StringPiece pattern, StringPiece actual) { static bool IsSpecHelper(StringPiece pattern, StringPiece actual) {
@ -305,36 +305,36 @@ static bool IsSpecHelper(StringPiece pattern, StringPiece actual) {
} }
TEST(DeviceNameUtilsTest, IsSpecification) { TEST(DeviceNameUtilsTest, IsSpecification) {
EXPECT_TRUE(IsSpecHelper("/job:*", "/job:work/replica:1/task:2/gpu:3")); EXPECT_TRUE(IsSpecHelper("/job:*", "/job:work/replica:1/task:2/device:GPU:3"));
EXPECT_TRUE(IsSpecHelper("/job:*", "/job:work/replica:1/gpu:3")); EXPECT_TRUE(IsSpecHelper("/job:*", "/job:work/replica:1/device:GPU:3"));
EXPECT_TRUE(IsSpecHelper("/job:*", "/job:work/replica:1")); EXPECT_TRUE(IsSpecHelper("/job:*", "/job:work/replica:1"));
EXPECT_TRUE(IsSpecHelper("/job:*", "/replica:1")); EXPECT_TRUE(IsSpecHelper("/job:*", "/replica:1"));
EXPECT_TRUE(IsSpecHelper("/job:*", "/job:work")); EXPECT_TRUE(IsSpecHelper("/job:*", "/job:work"));
EXPECT_TRUE( EXPECT_TRUE(
IsSpecHelper("/job:*/replica:*", "/job:work/replica:1/task:2/gpu:3")); IsSpecHelper("/job:*/replica:*", "/job:work/replica:1/task:2/device:GPU:3"));
EXPECT_TRUE(IsSpecHelper("/job:work/replica:1/gpu:*", EXPECT_TRUE(IsSpecHelper("/job:work/replica:1/gpu:*",
"/job:work/replica:1/task:2/gpu:3")); "/job:work/replica:1/task:2/device:GPU:3"));
EXPECT_TRUE(IsSpecHelper("/job:work/replica:1/gpu:3", EXPECT_TRUE(IsSpecHelper("/job:work/replica:1/device:GPU:3",
"/job:work/replica:1/task:2/gpu:3")); "/job:work/replica:1/task:2/device:GPU:3"));
EXPECT_TRUE(IsSpecHelper("/job:work/replica:1/task:2", EXPECT_TRUE(IsSpecHelper("/job:work/replica:1/task:2",
"/job:work/replica:1/task:2/gpu:3")); "/job:work/replica:1/task:2/device:GPU:3"));
EXPECT_TRUE(IsSpecHelper("/job:work/replica:*/task:2", EXPECT_TRUE(IsSpecHelper("/job:work/replica:*/task:2",
"/job:work/replica:1/task:2/gpu:3")); "/job:work/replica:1/task:2/device:GPU:3"));
EXPECT_TRUE(IsSpecHelper("/task:*", "/job:*/replica:1/task:2/gpu:3")); EXPECT_TRUE(IsSpecHelper("/task:*", "/job:*/replica:1/task:2/device:GPU:3"));
EXPECT_TRUE(IsSpecHelper("/task:2", "/job:*/replica:1/task:2/gpu:3")); EXPECT_TRUE(IsSpecHelper("/task:2", "/job:*/replica:1/task:2/device:GPU:3"));
EXPECT_TRUE(IsSpecHelper("/cpu:*", "/job:*/replica:1/task:2/cpu:1")); EXPECT_TRUE(IsSpecHelper("/cpu:*", "/job:*/replica:1/task:2/cpu:1"));
EXPECT_TRUE(IsSpecHelper("/cpu:0", "/cpu:0")); EXPECT_TRUE(IsSpecHelper("/cpu:0", "/cpu:0"));
EXPECT_TRUE(IsSpecHelper("/gpu:*", "/job:worker/replica:1/task:2/gpu:3")); EXPECT_TRUE(IsSpecHelper("/gpu:*", "/job:worker/replica:1/task:2/device:GPU:3"));
EXPECT_FALSE(IsSpecHelper("/job:worker/replica:1/task:2/gpu:3", "/gpu:*")); EXPECT_FALSE(IsSpecHelper("/job:worker/replica:1/task:2/device:GPU:3", "/gpu:*"));
EXPECT_FALSE(IsSpecHelper("/cpu:*", "/job:*/replica:1/task:2")); EXPECT_FALSE(IsSpecHelper("/cpu:*", "/job:*/replica:1/task:2"));
EXPECT_FALSE(IsSpecHelper("/cpu:*", "/job:*/replica:1/task:2/gpu:1")); EXPECT_FALSE(IsSpecHelper("/cpu:*", "/job:*/replica:1/task:2/device:GPU:1"));
EXPECT_FALSE(IsSpecHelper("/cpu:*", "/job:worker/replica:1/task:2/gpu:3")); EXPECT_FALSE(IsSpecHelper("/cpu:*", "/job:worker/replica:1/task:2/device:GPU:3"));
EXPECT_FALSE(IsSpecHelper("/gpu:2", "/job:worker/replica:1/task:2/gpu:1")); EXPECT_FALSE(IsSpecHelper("/device:GPU:2", "/job:worker/replica:1/task:2/device:GPU:1"));
EXPECT_FALSE(IsSpecHelper("/job:work/replica:*/task:0", EXPECT_FALSE(IsSpecHelper("/job:work/replica:*/task:0",
"/job:work/replica:1/task:2/gpu:3")); "/job:work/replica:1/task:2/device:GPU:3"));
EXPECT_FALSE(IsSpecHelper("/job:work/replica:0/task:2", EXPECT_FALSE(IsSpecHelper("/job:work/replica:0/task:2",
"/job:work/replica:*/task:2/gpu:3")); "/job:work/replica:*/task:2/device:GPU:3"));
} }
TEST(DeviceNameUtilsTest, SplitDeviceName) { TEST(DeviceNameUtilsTest, SplitDeviceName) {
@ -348,7 +348,7 @@ TEST(DeviceNameUtilsTest, SplitDeviceName) {
"/job:foo/cpu:1/task:2/replica:1", &task, &device)); "/job:foo/cpu:1/task:2/replica:1", &task, &device));
EXPECT_EQ("/job:foo/replica:1/task:2", task); EXPECT_EQ("/job:foo/replica:1/task:2", task);
EXPECT_EQ("CPU:1", device); EXPECT_EQ("CPU:1", device);
EXPECT_TRUE(DeviceNameUtils::SplitDeviceName("/gpu:3", &task, &device)); EXPECT_TRUE(DeviceNameUtils::SplitDeviceName("/device:GPU:3", &task, &device));
EXPECT_EQ("", task); EXPECT_EQ("", task);
EXPECT_EQ("GPU:3", device); EXPECT_EQ("GPU:3", device);
EXPECT_FALSE(DeviceNameUtils::SplitDeviceName("gpu:3", &task, &device)); EXPECT_FALSE(DeviceNameUtils::SplitDeviceName("gpu:3", &task, &device));
@ -413,11 +413,11 @@ TEST(DeviceNameUtilsTest, MergeDevNames) {
MergeDevNamesHelper("", "/job:foo", "/job:foo"); MergeDevNamesHelper("", "/job:foo", "/job:foo");
MergeDevNamesHelper("", "/replica:2", "/replica:2"); MergeDevNamesHelper("", "/replica:2", "/replica:2");
MergeDevNamesHelper("", "/task:7", "/task:7"); MergeDevNamesHelper("", "/task:7", "/task:7");
// MergeDevNamesHelper("", "/gpu:1", "/gpu:1"); // MergeDevNamesHelper("", "/device:GPU:1", "/device:GPU:1");
// Combining disjoint names. // Combining disjoint names.
MergeDevNamesHelper("/job:foo", "/task:7", "/job:foo/task:7"); MergeDevNamesHelper("/job:foo", "/task:7", "/job:foo/task:7");
MergeDevNamesHelper("/job:foo", "/gpu:1", "/job:foo/gpu:1"); MergeDevNamesHelper("/job:foo", "/device:GPU:1", "/job:foo/device:GPU:1");
// Combining overlapping names. // Combining overlapping names.
MergeDevNamesHelper("/job:foo/replica:0", "/replica:0/task:1", MergeDevNamesHelper("/job:foo/replica:0", "/replica:0/task:1",
@ -426,25 +426,25 @@ TEST(DeviceNameUtilsTest, MergeDevNames) {
// Wildcard tests. // Wildcard tests.
MergeDevNamesHelper("", "/gpu:*", "/gpu:*"); MergeDevNamesHelper("", "/gpu:*", "/gpu:*");
MergeDevNamesHelper("/gpu:*", "/gpu:*", "/gpu:*"); MergeDevNamesHelper("/gpu:*", "/gpu:*", "/gpu:*");
MergeDevNamesHelper("/gpu:1", "/gpu:*", "/gpu:1"); MergeDevNamesHelper("/device:GPU:1", "/gpu:*", "/device:GPU:1");
// Incompatible components. // Incompatible components.
MergeDevNamesError("/job:foo", "/job:bar", "incompatible jobs"); MergeDevNamesError("/job:foo", "/job:bar", "incompatible jobs");
MergeDevNamesError("/replica:0", "/replica:1", "incompatible replicas"); MergeDevNamesError("/replica:0", "/replica:1", "incompatible replicas");
MergeDevNamesError("/task:0", "/task:1", "incompatible tasks"); MergeDevNamesError("/task:0", "/task:1", "incompatible tasks");
MergeDevNamesError("/gpu:*", "/cpu:*", "incompatible types"); MergeDevNamesError("/gpu:*", "/cpu:*", "incompatible types");
MergeDevNamesError("/gpu:0", "/gpu:1", "incompatible ids"); MergeDevNamesError("/device:GPU:0", "/device:GPU:1", "incompatible ids");
} }
TEST(DeviceNameUtilsTest, MergeDevNamesAllowSoftPlacement) { TEST(DeviceNameUtilsTest, MergeDevNamesAllowSoftPlacement) {
// Incompatible components with allow_soft_placement. // Incompatible components with allow_soft_placement.
MergeDevNamesHelperAllowSoftPlacement("/gpu:*", "/cpu:1", ""); MergeDevNamesHelperAllowSoftPlacement("/gpu:*", "/cpu:1", "");
MergeDevNamesHelperAllowSoftPlacement("/cpu:*", "/gpu:1", ""); MergeDevNamesHelperAllowSoftPlacement("/cpu:*", "/device:GPU:1", "");
MergeDevNamesHelperAllowSoftPlacement("/gpu:1", "/gpu:2", "/gpu:*"); MergeDevNamesHelperAllowSoftPlacement("/device:GPU:1", "/device:GPU:2", "/device:GPU:*");
} }
TEST(DeviceNameUtilsTest, GetNamesForDeviceMappings) { TEST(DeviceNameUtilsTest, GetNamesForDeviceMappings) {
DeviceNameUtils::ParsedName p = Name("/job:foo/replica:10/task:0/gpu:1"); DeviceNameUtils::ParsedName p = Name("/job:foo/replica:10/task:0/device:GPU:1");
EXPECT_EQ(str_util::Join(DeviceNameUtils::GetNamesForDeviceMappings(p), ","), EXPECT_EQ(str_util::Join(DeviceNameUtils::GetNamesForDeviceMappings(p), ","),
"/job:foo/replica:10/task:0/device:GPU:1," "/job:foo/replica:10/task:0/device:GPU:1,"
"/job:foo/replica:10/task:0/gpu:1"); "/job:foo/replica:10/task:0/gpu:1");

View File

@ -73,12 +73,12 @@ other wrappers and the dynamic decoder described below. For example, one can
write: write:
```python ```python
cell = tf.contrib.rnn.DeviceWrapper(LSTMCell(512), "/gpu:0") cell = tf.contrib.rnn.DeviceWrapper(LSTMCell(512), "/device:GPU:0")
attention_mechanism = tf.contrib.seq2seq.LuongAttention(512, encoder_outputs) attention_mechanism = tf.contrib.seq2seq.LuongAttention(512, encoder_outputs)
attn_cell = tf.contrib.seq2seq.AttentionWrapper( attn_cell = tf.contrib.seq2seq.AttentionWrapper(
cell, attention_mechanism, attention_size=256) cell, attention_mechanism, attention_size=256)
attn_cell = tf.contrib.rnn.DeviceWrapper(attn_cell, "/gpu:1") attn_cell = tf.contrib.rnn.DeviceWrapper(attn_cell, "/device:GPU:1")
top_cell = tf.contrib.rnn.DeviceWrapper(LSTMCell(512), "/gpu:1") top_cell = tf.contrib.rnn.DeviceWrapper(LSTMCell(512), "/device:GPU:1")
multi_cell = MultiRNNCell([attn_cell, top_cell]) multi_cell = MultiRNNCell([attn_cell, top_cell])
``` ```

View File

@ -110,7 +110,7 @@ devices. For example, the following snippet creates a variable named `v` and
places it on the second GPU device: places it on the second GPU device:
``` python ``` python
with tf.device("/gpu:1"): with tf.device("/device:GPU:1"):
v = tf.get_variable("v", [1]) v = tf.get_variable("v", [1])
``` ```

View File

@ -411,7 +411,7 @@ the first tower are prepended with `tower_0`, e.g. `tower_0/conv1/Conv2D`.
* A preferred hardware device to run the operation within a tower. * A preferred hardware device to run the operation within a tower.
@{tf.device} specifies this. For @{tf.device} specifies this. For
instance, all operations in the first tower reside within `device('/gpu:0')` instance, all operations in the first tower reside within `device('/device:GPU:0')`
scope indicating that they should be run on the first GPU. scope indicating that they should be run on the first GPU.
All variables are pinned to the CPU and accessed via All variables are pinned to the CPU and accessed via

View File

@ -7,8 +7,8 @@ supported device types are `CPU` and `GPU`. They are represented as `strings`.
For example: For example:
* `"/cpu:0"`: The CPU of your machine. * `"/cpu:0"`: The CPU of your machine.
* `"/gpu:0"`: The GPU of your machine, if you have one. * `"/device:GPU:0"`: The GPU of your machine, if you have one.
* `"/gpu:1"`: The second GPU of your machine, etc. * `"/device:GPU:1"`: The second GPU of your machine, etc.
If a TensorFlow operation has both CPU and GPU implementations, the GPU devices If a TensorFlow operation has both CPU and GPU implementations, the GPU devices
will be given priority when the operation is assigned to a device. For example, will be given priority when the operation is assigned to a device. For example,
@ -35,11 +35,11 @@ You should see the following output:
``` ```
Device mapping: Device mapping:
/job:localhost/replica:0/task:0/gpu:0 -> device: 0, name: Tesla K40c, pci bus /job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: Tesla K40c, pci bus
id: 0000:05:00.0 id: 0000:05:00.0
b: /job:localhost/replica:0/task:0/gpu:0 b: /job:localhost/replica:0/task:0/device:GPU:0
a: /job:localhost/replica:0/task:0/gpu:0 a: /job:localhost/replica:0/task:0/device:GPU:0
MatMul: /job:localhost/replica:0/task:0/gpu:0 MatMul: /job:localhost/replica:0/task:0/device:GPU:0
[[ 22. 28.] [[ 22. 28.]
[ 49. 64.]] [ 49. 64.]]
@ -71,11 +71,11 @@ example) and automatically copy tensors between devices if required.
``` ```
Device mapping: Device mapping:
/job:localhost/replica:0/task:0/gpu:0 -> device: 0, name: Tesla K40c, pci bus /job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: Tesla K40c, pci bus
id: 0000:05:00.0 id: 0000:05:00.0
b: /job:localhost/replica:0/task:0/cpu:0 b: /job:localhost/replica:0/task:0/cpu:0
a: /job:localhost/replica:0/task:0/cpu:0 a: /job:localhost/replica:0/task:0/cpu:0
MatMul: /job:localhost/replica:0/task:0/gpu:0 MatMul: /job:localhost/replica:0/task:0/device:GPU:0
[[ 22. 28.] [[ 22. 28.]
[ 49. 64.]] [ 49. 64.]]
``` ```
@ -127,7 +127,7 @@ to specify the preference explicitly:
```python ```python
# Creates a graph. # Creates a graph.
with tf.device('/gpu:2'): with tf.device('/device:GPU:2'):
a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a') a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b') b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
c = tf.matmul(a, b) c = tf.matmul(a, b)
@ -142,9 +142,9 @@ If the device you have specified does not exist, you will get
``` ```
InvalidArgumentError: Invalid argument: Cannot assign a device to node 'b': InvalidArgumentError: Invalid argument: Cannot assign a device to node 'b':
Could not satisfy explicit device specification '/gpu:2' Could not satisfy explicit device specification '/device:GPU:2'
[[Node: b = Const[dtype=DT_FLOAT, value=Tensor<type: float shape: [3,2] [[Node: b = Const[dtype=DT_FLOAT, value=Tensor<type: float shape: [3,2]
values: 1 2 3...>, _device="/gpu:2"]()]] values: 1 2 3...>, _device="/device:GPU:2"]()]]
``` ```
If you would like TensorFlow to automatically choose an existing and supported If you would like TensorFlow to automatically choose an existing and supported
@ -154,7 +154,7 @@ the session.
```python ```python
# Creates a graph. # Creates a graph.
with tf.device('/gpu:2'): with tf.device('/device:GPU:2'):
a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a') a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b') b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
c = tf.matmul(a, b) c = tf.matmul(a, b)
@ -175,7 +175,7 @@ For example:
``` ```
# Creates a graph. # Creates a graph.
c = [] c = []
for d in ['/gpu:2', '/gpu:3']: for d in ['/device:GPU:2', '/device:GPU:3']:
with tf.device(d): with tf.device(d):
a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3]) a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3])
b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2]) b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2])
@ -192,20 +192,20 @@ You will see the following output.
``` ```
Device mapping: Device mapping:
/job:localhost/replica:0/task:0/gpu:0 -> device: 0, name: Tesla K20m, pci bus /job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: Tesla K20m, pci bus
id: 0000:02:00.0 id: 0000:02:00.0
/job:localhost/replica:0/task:0/gpu:1 -> device: 1, name: Tesla K20m, pci bus /job:localhost/replica:0/task:0/device:GPU:1 -> device: 1, name: Tesla K20m, pci bus
id: 0000:03:00.0 id: 0000:03:00.0
/job:localhost/replica:0/task:0/gpu:2 -> device: 2, name: Tesla K20m, pci bus /job:localhost/replica:0/task:0/device:GPU:2 -> device: 2, name: Tesla K20m, pci bus
id: 0000:83:00.0 id: 0000:83:00.0
/job:localhost/replica:0/task:0/gpu:3 -> device: 3, name: Tesla K20m, pci bus /job:localhost/replica:0/task:0/device:GPU:3 -> device: 3, name: Tesla K20m, pci bus
id: 0000:84:00.0 id: 0000:84:00.0
Const_3: /job:localhost/replica:0/task:0/gpu:3 Const_3: /job:localhost/replica:0/task:0/device:GPU:3
Const_2: /job:localhost/replica:0/task:0/gpu:3 Const_2: /job:localhost/replica:0/task:0/device:GPU:3
MatMul_1: /job:localhost/replica:0/task:0/gpu:3 MatMul_1: /job:localhost/replica:0/task:0/device:GPU:3
Const_1: /job:localhost/replica:0/task:0/gpu:2 Const_1: /job:localhost/replica:0/task:0/device:GPU:2
Const: /job:localhost/replica:0/task:0/gpu:2 Const: /job:localhost/replica:0/task:0/device:GPU:2
MatMul: /job:localhost/replica:0/task:0/gpu:2 MatMul: /job:localhost/replica:0/task:0/device:GPU:2
AddN: /job:localhost/replica:0/task:0/cpu:0 AddN: /job:localhost/replica:0/task:0/cpu:0
[[ 44. 56.] [[ 44. 56.]
[ 98. 128.]] [ 98. 128.]]

View File

@ -47,12 +47,12 @@ def my_model(features, labels, mode):
# Create three fully connected layers respectively of size 10, 20, and 10 with # Create three fully connected layers respectively of size 10, 20, and 10 with
# each layer having a dropout probability of 0.1. # each layer having a dropout probability of 0.1.
net = features[X_FEATURE] net = features[X_FEATURE]
with tf.device('/gpu:1'): with tf.device('/device:GPU:1'):
for units in [10, 20, 10]: for units in [10, 20, 10]:
net = tf.layers.dense(net, units=units, activation=tf.nn.relu) net = tf.layers.dense(net, units=units, activation=tf.nn.relu)
net = tf.layers.dropout(net, rate=0.1) net = tf.layers.dropout(net, rate=0.1)
with tf.device('/gpu:2'): with tf.device('/device:GPU:2'):
# Compute logits (1 per class). # Compute logits (1 per class).
logits = tf.layers.dense(net, 3, activation=None) logits = tf.layers.dense(net, 3, activation=None)

View File

@ -173,7 +173,7 @@ class SessionClusterSpecPropagationTest(test_util.TensorFlowTestCase):
# #
# W0718 17:14:41.521534 190121 device_mgr.cc:107] Unknown device: # W0718 17:14:41.521534 190121 device_mgr.cc:107] Unknown device:
# /job:worker/replica:0/task:0/device:CPU:0 all devices: # /job:worker/replica:0/task:0/device:CPU:0 all devices:
# /job:local/replica:0/task:0/gpu:0, # /job:local/replica:0/task:0/device:GPU:0,
# /job:local/replica:0/task:0/device:GPU:0, # /job:local/replica:0/task:0/device:GPU:0,
# /job:local/replica:0/task:0/cpu:1, CPU:0, GPU:0, # /job:local/replica:0/task:0/cpu:1, CPU:0, GPU:0,
# /job:local/replica:0/task:0/device:CPU:1, # /job:local/replica:0/task:0/device:CPU:1,
@ -198,7 +198,7 @@ class SessionClusterSpecPropagationTest(test_util.TensorFlowTestCase):
sum1 = input1 + input2 sum1 = input1 + input2
if test.is_gpu_available(): if test.is_gpu_available():
device_str = '/job:worker/task:0/gpu:0' device_str = '/job:worker/task:0/device:GPU:0'
else: else:
device_str = '/job:worker/task:0/cpu:1' device_str = '/job:worker/task:0/cpu:1'
with ops.device(device_str): with ops.device(device_str):

View File

@ -1124,7 +1124,7 @@ class SessionTest(test_util.TensorFlowTestCase):
# which is why placing this is invalid. If at some point # which is why placing this is invalid. If at some point
# GPU kernels are added to this test, some other different # GPU kernels are added to this test, some other different
# op / device combo should be chosen. # op / device combo should be chosen.
with ops.device('/gpu:0'): with ops.device('/device:GPU:0'):
a = constant_op.constant(1.0, shape=[1, 2]) a = constant_op.constant(1.0, shape=[1, 2])
b = constant_op.constant(1.0, shape=[1, 2]) b = constant_op.constant(1.0, shape=[1, 2])
@ -1145,7 +1145,7 @@ class SessionTest(test_util.TensorFlowTestCase):
# which is why placing this is invalid. If at some point # which is why placing this is invalid. If at some point
# GPU kernels are added to this test, some other different # GPU kernels are added to this test, some other different
# op / device combo should be chosen. # op / device combo should be chosen.
with ops.device('/gpu:0'): with ops.device('/device:GPU:0'):
_ = constant_op.constant(1.0, shape=[1, 2]) _ = constant_op.constant(1.0, shape=[1, 2])
b = constant_op.constant(1.0, shape=[1, 2]) b = constant_op.constant(1.0, shape=[1, 2])
@ -1494,7 +1494,7 @@ class SessionTest(test_util.TensorFlowTestCase):
allow_soft_placement=True, allow_soft_placement=True,
graph_options=config_pb2.GraphOptions(build_cost_model=100)) graph_options=config_pb2.GraphOptions(build_cost_model=100))
with session.Session(config=config) as sess: with session.Session(config=config) as sess:
with ops.device('/gpu:0'): with ops.device('/device:GPU:0'):
a = array_ops.placeholder(dtypes.float32, shape=[]) a = array_ops.placeholder(dtypes.float32, shape=[])
b = math_ops.add(a, a) b = math_ops.add(a, a)
c = array_ops.identity(b) c = array_ops.identity(b)

View File

@ -100,8 +100,8 @@ class TimelineTest(test.TestCase):
self.assertTrue(run_metadata.HasField('step_stats')) self.assertTrue(run_metadata.HasField('step_stats'))
step_stats = run_metadata.step_stats step_stats = run_metadata.step_stats
devices = [d.device for d in step_stats.dev_stats] devices = [d.device for d in step_stats.dev_stats]
self.assertTrue('/job:localhost/replica:0/task:0/gpu:0' in devices) self.assertTrue('/job:localhost/replica:0/task:0/device:GPU:0' in devices)
self.assertTrue('/gpu:0/stream:all' in devices) self.assertTrue('/device:GPU:0/stream:all' in devices)
tl = timeline.Timeline(step_stats) tl = timeline.Timeline(step_stats)
ctf = tl.generate_chrome_trace_format() ctf = tl.generate_chrome_trace_format()
self._validateTrace(ctf) self._validateTrace(ctf)

View File

@ -380,7 +380,8 @@ def device_path_to_device_name(device_dir):
path_items = os.path.basename(device_dir)[ path_items = os.path.basename(device_dir)[
len(METADATA_FILE_PREFIX) + len(DEVICE_TAG):].split(",") len(METADATA_FILE_PREFIX) + len(DEVICE_TAG):].split(",")
return "/".join([ return "/".join([
path_item.replace("_", ":", 1) for path_item in path_items]) path_item.replace("device_", "device:").replace("_", ":", 1)
for path_item in path_items])
class DebugTensorDatum(object): class DebugTensorDatum(object):

View File

@ -237,11 +237,11 @@ class DebugDumpDirTest(test_util.TensorFlowTestCase):
gpu_0_dir = os.path.join( gpu_0_dir = os.path.join(
self._dump_root, self._dump_root,
debug_data.METADATA_FILE_PREFIX + debug_data.DEVICE_TAG + debug_data.METADATA_FILE_PREFIX + debug_data.DEVICE_TAG +
",job_localhost,replica_0,task_0,gpu_0") ",job_localhost,replica_0,task_0,device_GPU_0")
gpu_1_dir = os.path.join( gpu_1_dir = os.path.join(
self._dump_root, self._dump_root,
debug_data.METADATA_FILE_PREFIX + debug_data.DEVICE_TAG + debug_data.METADATA_FILE_PREFIX + debug_data.DEVICE_TAG +
",job_localhost,replica_0,task_0,gpu_1") ",job_localhost,replica_0,task_0,device_GPU_1")
os.makedirs(cpu_0_dir) os.makedirs(cpu_0_dir)
os.makedirs(gpu_0_dir) os.makedirs(gpu_0_dir)
os.makedirs(gpu_1_dir) os.makedirs(gpu_1_dir)
@ -281,12 +281,12 @@ class DebugDumpDirTest(test_util.TensorFlowTestCase):
node = graph_gpu_0.node.add() node = graph_gpu_0.node.add()
node.name = "node_foo_1" node.name = "node_foo_1"
node.op = "FooOp" node.op = "FooOp"
node.device = "/job:localhost/replica:0/task:0/gpu:0" node.device = "/job:localhost/replica:0/task:0/device:GPU:0"
graph_gpu_1 = graph_pb2.GraphDef() graph_gpu_1 = graph_pb2.GraphDef()
node = graph_gpu_1.node.add() node = graph_gpu_1.node.add()
node.name = "node_foo_1" node.name = "node_foo_1"
node.op = "FooOp" node.op = "FooOp"
node.device = "/job:localhost/replica:0/task:0/gpu:1" node.device = "/job:localhost/replica:0/task:0/device:GPU:1"
dump_dir = debug_data.DebugDumpDir( dump_dir = debug_data.DebugDumpDir(
self._dump_root, self._dump_root,
@ -294,14 +294,14 @@ class DebugDumpDirTest(test_util.TensorFlowTestCase):
self.assertItemsEqual( self.assertItemsEqual(
["/job:localhost/replica:0/task:0/cpu:0", ["/job:localhost/replica:0/task:0/cpu:0",
"/job:localhost/replica:0/task:0/gpu:0", "/job:localhost/replica:0/task:0/device:GPU:0",
"/job:localhost/replica:0/task:0/gpu:1"], dump_dir.devices()) "/job:localhost/replica:0/task:0/device:GPU:1"], dump_dir.devices())
self.assertEqual(1472563253536385, dump_dir.t0) self.assertEqual(1472563253536385, dump_dir.t0)
self.assertEqual(3, dump_dir.size) self.assertEqual(3, dump_dir.size)
with self.assertRaisesRegexp( with self.assertRaisesRegexp(
ValueError, r"Invalid device name: "): ValueError, r"Invalid device name: "):
dump_dir.nodes("/job:localhost/replica:0/task:0/gpu:2") dump_dir.nodes("/job:localhost/replica:0/task:0/device:GPU:2")
self.assertItemsEqual(["node_foo_1", "node_foo_1", "node_foo_1"], self.assertItemsEqual(["node_foo_1", "node_foo_1", "node_foo_1"],
dump_dir.nodes()) dump_dir.nodes())
self.assertItemsEqual( self.assertItemsEqual(
@ -319,16 +319,16 @@ class DebugDumpDirTest(test_util.TensorFlowTestCase):
node = graph_gpu_0.node.add() node = graph_gpu_0.node.add()
node.name = "node_foo_1" node.name = "node_foo_1"
node.op = "FooOp" node.op = "FooOp"
node.device = "/job:localhost/replica:0/task:0/gpu:0" node.device = "/job:localhost/replica:0/task:0/device:GPU:0"
graph_gpu_1 = graph_pb2.GraphDef() graph_gpu_1 = graph_pb2.GraphDef()
node = graph_gpu_1.node.add() node = graph_gpu_1.node.add()
node.name = "node_foo_1" node.name = "node_foo_1"
node.op = "FooOp" node.op = "FooOp"
node.device = "/job:localhost/replica:0/task:0/gpu:1" node.device = "/job:localhost/replica:0/task:0/device:GPU:1"
node = graph_gpu_1.node.add() # Here is the duplicate. node = graph_gpu_1.node.add() # Here is the duplicate.
node.name = "node_foo_1" node.name = "node_foo_1"
node.op = "FooOp" node.op = "FooOp"
node.device = "/job:localhost/replica:0/task:0/gpu:1" node.device = "/job:localhost/replica:0/task:0/device:GPU:1"
with self.assertRaisesRegexp( with self.assertRaisesRegexp(
ValueError, r"Duplicate node name on device "): ValueError, r"Duplicate node name on device "):

View File

@ -711,7 +711,7 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
# Test node name list lookup of the DebugDumpDir object. # Test node name list lookup of the DebugDumpDir object.
if test_util.gpu_device_name(): if test_util.gpu_device_name():
node_names = dump.nodes( node_names = dump.nodes(
device_name="/job:localhost/replica:0/task:0/gpu:0") device_name="/job:localhost/replica:0/task:0/device:GPU:0")
else: else:
node_names = dump.nodes() node_names = dump.nodes()
self.assertTrue(u_name in node_names) self.assertTrue(u_name in node_names)

View File

@ -402,7 +402,7 @@ class LocalCLIDebugWrapperSessionTest(test_util.TensorFlowTestCase):
def testRuntimeErrorBeforeGraphExecutionIsRaised(self): def testRuntimeErrorBeforeGraphExecutionIsRaised(self):
# Use an impossible device name to cause an error before graph execution. # Use an impossible device name to cause an error before graph execution.
with ops.device("/gpu:1337"): with ops.device("/device:GPU:1337"):
w = variables.Variable([1.0] * 10, name="w") w = variables.Variable([1.0] * 10, name="w")
wrapped_sess = LocalCLIDebuggerWrapperSessionForTest( wrapped_sess = LocalCLIDebuggerWrapperSessionForTest(

View File

@ -79,17 +79,17 @@ class DeviceTest(test_util.TensorFlowTestCase):
self.assertEquals("/replica:1/task:0/device:CPU:0", d.to_string()) self.assertEquals("/replica:1/task:0/device:CPU:0", d.to_string())
d.parse_from_string("/replica:1/task:0/device:CPU:0") d.parse_from_string("/replica:1/task:0/device:CPU:0")
self.assertEquals("/replica:1/task:0/device:CPU:0", d.to_string()) self.assertEquals("/replica:1/task:0/device:CPU:0", d.to_string())
d.parse_from_string("/job:muu/gpu:2") d.parse_from_string("/job:muu/device:GPU:2")
self.assertEquals("/job:muu/device:GPU:2", d.to_string()) self.assertEquals("/job:muu/device:GPU:2", d.to_string())
with self.assertRaises(Exception) as e: with self.assertRaises(Exception) as e:
d.parse_from_string("/job:muu/gpu:2/cpu:0") d.parse_from_string("/job:muu/device:GPU:2/cpu:0")
self.assertTrue("Cannot specify multiple device" in str(e.exception)) self.assertTrue("Cannot specify multiple device" in str(e.exception))
def testFromString(self): def testFromString(self):
d = device.DeviceSpec.from_string("/job:foo/replica:0") d = device.DeviceSpec.from_string("/job:foo/replica:0")
self.assertEquals("/job:foo/replica:0", d.to_string()) self.assertEquals("/job:foo/replica:0", d.to_string())
with self.assertRaises(Exception) as e: with self.assertRaises(Exception) as e:
d = device.DeviceSpec.from_string("/job:muu/gpu:2/cpu:0") d = device.DeviceSpec.from_string("/job:muu/device:GPU:2/cpu:0")
self.assertTrue("Cannot specify multiple device" in str(e.exception)) self.assertTrue("Cannot specify multiple device" in str(e.exception))
d = device.DeviceSpec.from_string("/job:foo/replica:0/task:3/cpu:*") d = device.DeviceSpec.from_string("/job:foo/replica:0/task:3/cpu:*")
@ -102,13 +102,13 @@ class DeviceTest(test_util.TensorFlowTestCase):
def testMerge(self): def testMerge(self):
d = device.DeviceSpec.from_string("/job:foo/replica:0") d = device.DeviceSpec.from_string("/job:foo/replica:0")
self.assertEquals("/job:foo/replica:0", d.to_string()) self.assertEquals("/job:foo/replica:0", d.to_string())
d.merge_from(device.DeviceSpec.from_string("/task:1/gpu:2")) d.merge_from(device.DeviceSpec.from_string("/task:1/device:GPU:2"))
self.assertEquals("/job:foo/replica:0/task:1/device:GPU:2", d.to_string()) self.assertEquals("/job:foo/replica:0/task:1/device:GPU:2", d.to_string())
d = device.DeviceSpec() d = device.DeviceSpec()
d.merge_from(device.DeviceSpec.from_string("/task:1/cpu:0")) d.merge_from(device.DeviceSpec.from_string("/task:1/cpu:0"))
self.assertEquals("/task:1/device:CPU:0", d.to_string()) self.assertEquals("/task:1/device:CPU:0", d.to_string())
d.merge_from(device.DeviceSpec.from_string("/job:boo/gpu:0")) d.merge_from(device.DeviceSpec.from_string("/job:boo/device:GPU:0"))
self.assertEquals("/job:boo/task:1/device:GPU:0", d.to_string()) self.assertEquals("/job:boo/task:1/device:GPU:0", d.to_string())
d.merge_from(device.DeviceSpec.from_string("/job:muu/cpu:2")) d.merge_from(device.DeviceSpec.from_string("/job:muu/cpu:2"))
self.assertEquals("/job:muu/task:1/device:CPU:2", d.to_string()) self.assertEquals("/job:muu/task:1/device:CPU:2", d.to_string())
@ -134,10 +134,10 @@ class DeviceTest(test_util.TensorFlowTestCase):
self.assertEqual("/job:foo/replica:0/task:0/device:GPU:0", self.assertEqual("/job:foo/replica:0/task:0/device:GPU:0",
device.canonical_name( device.canonical_name(
"/job:foo/replica:0/task:0/gpu:0")) "/job:foo/replica:0/task:0/device:GPU:0"))
self.assertEqual("/job:foo/replica:0/task:0/device:GPU:0", self.assertEqual("/job:foo/replica:0/task:0/device:GPU:0",
device.canonical_name( device.canonical_name(
"/gpu:0/task:0/replica:0/job:foo")) "/device:GPU:0/task:0/replica:0/job:foo"))
def testCheckValid(self): def testCheckValid(self):
device.check_valid("/job:foo/replica:0") device.check_valid("/job:foo/replica:0")
@ -155,7 +155,7 @@ class DeviceTest(test_util.TensorFlowTestCase):
self.assertTrue("Unknown attribute: 'bar'" in str(e.exception)) self.assertTrue("Unknown attribute: 'bar'" in str(e.exception))
with self.assertRaises(Exception) as e: with self.assertRaises(Exception) as e:
device.check_valid("/cpu:0/gpu:2") device.check_valid("/cpu:0/device:GPU:2")
self.assertTrue("Cannot specify multiple device" in str(e.exception)) self.assertTrue("Cannot specify multiple device" in str(e.exception))

View File

@ -505,7 +505,7 @@ class FunctionTest(test.TestCase):
_ = PlusOne(1, name="p1") _ = PlusOne(1, name="p1")
with self.assertRaisesRegexp(ValueError, "Unknown keyword arguments"): with self.assertRaisesRegexp(ValueError, "Unknown keyword arguments"):
_ = PlusOne(1, device="/gpu:0") _ = PlusOne(1, device="/device:GPU:0")
def testFunctionDecorator(self): def testFunctionDecorator(self):

View File

@ -106,9 +106,9 @@ class DeviceFunctionsTest(test.TestCase):
var_0 = variables.Variable(0) var_0 = variables.Variable(0)
with ops.device(test_device_func_pin_variable_to_cpu): with ops.device(test_device_func_pin_variable_to_cpu):
var_1 = variables.Variable(1) var_1 = variables.Variable(1)
with ops.device(lambda op: "/gpu:0"): with ops.device(lambda op: "/device:GPU:0"):
var_2 = variables.Variable(2) var_2 = variables.Variable(2)
with ops.device("/gpu:0"): # Implicit merging device function. with ops.device("/device:GPU:0"): # Implicit merging device function.
var_3 = variables.Variable(3) var_3 = variables.Variable(3)
self.assertDeviceEqual(var_0.device, None) self.assertDeviceEqual(var_0.device, None)

View File

@ -878,7 +878,7 @@ class ImportGraphDefTest(test.TestCase):
self.assertEqual(c.device, c4.device) # worker overrides ps. self.assertEqual(c.device, c4.device) # worker overrides ps.
with ops.Graph().as_default(): with ops.Graph().as_default():
with ops.device(device.merge_device("/gpu:0")): with ops.device(device.merge_device("/device:GPU:0")):
a5, b5, c5 = importer.import_graph_def( a5, b5, c5 = importer.import_graph_def(
gdef, return_elements=["a", "b", "c"]) gdef, return_elements=["a", "b", "c"])
self.assertEqual("/device:GPU:0", a5.device) self.assertEqual("/device:GPU:0", a5.device)

View File

@ -550,7 +550,7 @@ class ScopedMetaGraphTest(test.TestCase):
a = variables.Variable( a = variables.Variable(
constant_op.constant( constant_op.constant(
1.0, shape=[2, 2]), name="a") 1.0, shape=[2, 2]), name="a")
with ops.device("/job:ps/replica:0/task:0/gpu:0"): with ops.device("/job:ps/replica:0/task:0/device:GPU:0"):
b = variables.Variable( b = variables.Variable(
constant_op.constant( constant_op.constant(
2.0, shape=[2, 2]), name="b") 2.0, shape=[2, 2]), name="b")

View File

@ -3342,7 +3342,7 @@ class Graph(object):
For example: For example:
```python ```python
with g.device('/gpu:0'): with g.device('/device:GPU:0'):
# All operations constructed in this context will be placed # All operations constructed in this context will be placed
# on GPU 0. # on GPU 0.
with g.device(None): with g.device(None):
@ -3352,7 +3352,7 @@ class Graph(object):
# Defines a function from `Operation` to device string. # Defines a function from `Operation` to device string.
def matmul_on_gpu(n): def matmul_on_gpu(n):
if n.type == "MatMul": if n.type == "MatMul":
return "/gpu:0" return "/device:GPU:0"
else: else:
return "/cpu:0" return "/cpu:0"

View File

@ -1555,26 +1555,26 @@ class ColocationGroupTest(test_util.TensorFlowTestCase):
def testColocationDeviceInteraction(self): def testColocationDeviceInteraction(self):
with ops.device("/cpu:0"): with ops.device("/cpu:0"):
with ops.device("/gpu:0"): with ops.device("/device:GPU:0"):
a = constant_op.constant([2.0], name="a") a = constant_op.constant([2.0], name="a")
with ops.colocate_with(a.op): with ops.colocate_with(a.op):
# 'b' is created in the scope of /cpu:0, but it is # 'b' is created in the scope of /cpu:0, but it is
# colocated with 'a', which is on '/gpu:0'. colocate_with # colocated with 'a', which is on '/device:GPU:0'. colocate_with
# overrides devices because it is a stronger constraint. # overrides devices because it is a stronger constraint.
b = constant_op.constant(3.0) b = constant_op.constant(3.0)
self.assertEqual([b"loc:@a"], b.op.colocation_groups()) self.assertEqual([b"loc:@a"], b.op.colocation_groups())
self.assertEqual(a.op.device, b.op.device) self.assertEqual(a.op.device, b.op.device)
def testColocationCanonicalization(self): def testColocationCanonicalization(self):
with ops.device("/gpu:0"): with ops.device("/device:GPU:0"):
_ = constant_op.constant(2.0) _ = constant_op.constant(2.0)
with ops.device(lambda op: "/gpu:0"): with ops.device(lambda op: "/device:GPU:0"):
b = constant_op.constant(3.0) b = constant_op.constant(3.0)
with ops.get_default_graph().colocate_with(b): with ops.get_default_graph().colocate_with(b):
with ops.device("/gpu:0"): with ops.device("/device:GPU:0"):
c = constant_op.constant(4.0) c = constant_op.constant(4.0)
# A's device will be /gpu:0 # A's device will be /device:GPU:0
# B's device will be /device:GPU:0 # B's device will be /device:GPU:0
# C's device will be /device:GPU:0 because it # C's device will be /device:GPU:0 because it
# inherits B's device name, after canonicalizing the names. # inherits B's device name, after canonicalizing the names.
@ -1582,10 +1582,10 @@ class ColocationGroupTest(test_util.TensorFlowTestCase):
def testLocationOverrides(self): def testLocationOverrides(self):
with ops.device("/cpu:0"): with ops.device("/cpu:0"):
with ops.device("/gpu:0"): with ops.device("/device:GPU:0"):
a = constant_op.constant([2.0], name="a") a = constant_op.constant([2.0], name="a")
# Note that this colocation is "redundant", since we are # Note that this colocation is "redundant", since we are
# within the scope of "/gpu:0". However, we would like to # within the scope of "/device:GPU:0". However, we would like to
# preserve in the GraphDef that these two ops should be # preserve in the GraphDef that these two ops should be
# colocated in a portable way. # colocated in a portable way.
with ops.colocate_with(a.op): with ops.colocate_with(a.op):
@ -1652,7 +1652,7 @@ class ColocationGroupTest(test_util.TensorFlowTestCase):
self.assertEqual([b"loc:@a"], b.op.colocation_groups()) self.assertEqual([b"loc:@a"], b.op.colocation_groups())
def testInconsistentDeviceWithinColocate(self): def testInconsistentDeviceWithinColocate(self):
with ops.device("/gpu:0"): with ops.device("/device:GPU:0"):
a = constant_op.constant([2.0], name="a") a = constant_op.constant([2.0], name="a")
with ops.colocate_with(a.op): with ops.colocate_with(a.op):
# This is allowed due to legacy but clearly wrong, since we # This is allowed due to legacy but clearly wrong, since we

View File

@ -405,7 +405,7 @@ class TensorFlowTestCase(googletest.TestCase):
trigger the creation of a new session. trigger the creation of a new session.
Use the `use_gpu` and `force_gpu` options to control where ops are run. If Use the `use_gpu` and `force_gpu` options to control where ops are run. If
`force_gpu` is True, all ops are pinned to `/gpu:0`. Otherwise, if `use_gpu` `force_gpu` is True, all ops are pinned to `/device:GPU:0`. Otherwise, if `use_gpu`
is True, TensorFlow tries to run as many ops on the GPU as possible. If both is True, TensorFlow tries to run as many ops on the GPU as possible. If both
`force_gpu and `use_gpu` are False, all ops are pinned to the CPU. `force_gpu and `use_gpu` are False, all ops are pinned to the CPU.
@ -427,7 +427,7 @@ class TensorFlowTestCase(googletest.TestCase):
config: An optional config_pb2.ConfigProto to use to configure the config: An optional config_pb2.ConfigProto to use to configure the
session. session.
use_gpu: If True, attempt to run as many ops as possible on GPU. use_gpu: If True, attempt to run as many ops as possible on GPU.
force_gpu: If True, pin all ops to `/gpu:0`. force_gpu: If True, pin all ops to `/device:GPU:0`.
Returns: Returns:
A Session object that should be used as a context manager to surround A Session object that should be used as a context manager to surround
@ -466,11 +466,11 @@ class TensorFlowTestCase(googletest.TestCase):
sess = self._cached_session sess = self._cached_session
with sess.graph.as_default(), sess.as_default(): with sess.graph.as_default(), sess.as_default():
if force_gpu: if force_gpu:
# Use the name of an actual device if one is detected, or '/gpu:0' # Use the name of an actual device if one is detected, or '/device:GPU:0'
# otherwise # otherwise
gpu_name = gpu_device_name() gpu_name = gpu_device_name()
if not gpu_name: if not gpu_name:
gpu_name = "/gpu:0" gpu_name = "/device:GPU:0"
with sess.graph.device(gpu_name): with sess.graph.device(gpu_name):
yield sess yield sess
elif use_gpu: elif use_gpu:
@ -481,11 +481,11 @@ class TensorFlowTestCase(googletest.TestCase):
else: else:
with session.Session(graph=graph, config=prepare_config(config)) as sess: with session.Session(graph=graph, config=prepare_config(config)) as sess:
if force_gpu: if force_gpu:
# Use the name of an actual device if one is detected, or '/gpu:0' # Use the name of an actual device if one is detected, or '/device:GPU:0'
# otherwise # otherwise
gpu_name = gpu_device_name() gpu_name = gpu_device_name()
if not gpu_name: if not gpu_name:
gpu_name = "/gpu:0" gpu_name = "/device:GPU:0"
with sess.graph.device(gpu_name): with sess.graph.device(gpu_name):
yield sess yield sess
elif use_gpu: elif use_gpu:

View File

@ -238,7 +238,7 @@ class GpuMultiSessionMemoryTest(test_util.TensorFlowTestCase):
n_iterations = 500 n_iterations = 500
with session as s: with session as s:
data = variables.Variable(1.0) data = variables.Variable(1.0)
with ops.device('/gpu:0'): with ops.device('/device:GPU:0'):
random_seed.set_random_seed(1) random_seed.set_random_seed(1)
matrix1 = variables.Variable( matrix1 = variables.Variable(
random_ops.truncated_normal([1024, 1]), name='matrix1') random_ops.truncated_normal([1024, 1]), name='matrix1')

View File

@ -311,7 +311,7 @@ class CholeskyBenchmark(test.Benchmark):
if test.is_gpu_available(True): if test.is_gpu_available(True):
with ops.Graph().as_default(), \ with ops.Graph().as_default(), \
session.Session() as sess, \ session.Session() as sess, \
ops.device("/gpu:0"): ops.device("/device:GPU:0"):
l = linalg_ops.cholesky(data) l = linalg_ops.cholesky(data)
self.run_op_benchmark( self.run_op_benchmark(
sess, sess,
@ -338,11 +338,11 @@ class CholeskyBenchmark(test.Benchmark):
if test.is_gpu_available(True): if test.is_gpu_available(True):
_BenchmarkGrad( _BenchmarkGrad(
MatrixInverseCompositeGrad, "composite_matrix_inverse", "/gpu:0") MatrixInverseCompositeGrad, "composite_matrix_inverse", "/device:GPU:0")
_BenchmarkGrad( _BenchmarkGrad(
TriAngInvCompositeGrad, "composite_tri_ang_inverse", "/gpu:0") TriAngInvCompositeGrad, "composite_tri_ang_inverse", "/device:GPU:0")
_BenchmarkGrad( _BenchmarkGrad(
TriAngSolveCompositeGrad, "composite_triangular_solve", "/gpu:0") TriAngSolveCompositeGrad, "composite_triangular_solve", "/device:GPU:0")
_BenchmarkGrad( _BenchmarkGrad(
MatrixInverseCompositeGrad, "composite_matrix_inverse", "/cpu:0") MatrixInverseCompositeGrad, "composite_matrix_inverse", "/cpu:0")

View File

@ -1423,9 +1423,8 @@ class ControlFlowTest(test.TestCase):
self.assertEqual(45, rx.eval()) self.assertEqual(45, rx.eval())
def _testWhileGrad_ColocateGradients(self, colocate): def _testWhileGrad_ColocateGradients(self, colocate):
gpu_dev_name = test.gpu_device_name().lower() if test.is_gpu_available( gpu_dev_name = test.gpu_device_name() if test.is_gpu_available(
) else "/gpu:0" ) else "/device:GPU:0"
gpu_short_name = gpu_dev_name.split("/")[-1]
with self.test_session(graph=ops.Graph()) as sess: with self.test_session(graph=ops.Graph()) as sess:
v = constant_op.constant(2.0, name="v") v = constant_op.constant(2.0, name="v")
@ -1439,19 +1438,19 @@ class ControlFlowTest(test.TestCase):
r = gradients_impl.gradients( r = gradients_impl.gradients(
loop, v, colocate_gradients_with_ops=colocate)[0] loop, v, colocate_gradients_with_ops=colocate)[0]
r_ops = r.graph.get_operations() r_ops = r.graph.get_operations()
r_devices = [(op.name, op.device.lower()) for op in r_ops] r_devices = [(op.name, op.device) for op in r_ops]
self.assertTrue(any("Square" in op.name for op in r_ops)) self.assertTrue(any("Square" in op.name for op in r_ops))
for (name, dev) in r_devices: for (name, dev) in r_devices:
if not colocate and name.endswith("Square"): if not colocate and name.endswith("Square"):
# Only forward graph contain gpu in Square device # Only forward graph contain gpu in Square device
self.assertTrue(gpu_short_name in dev) self.assertTrue(gpu_dev_name in dev)
elif colocate and "Square" in name: elif colocate and "Square" in name:
# Forward and backward graphs contain gpu in Square/Square_grad devices # Forward and backward graphs contain gpu in Square/Square_grad devices
self.assertTrue(gpu_short_name in dev) self.assertTrue(gpu_dev_name in dev)
else: else:
self.assertFalse(gpu_short_name in dev) self.assertFalse(gpu_dev_name in dev)
self.assertAllClose(1024.0, sess.run(r)) self.assertAllClose(1024.0, sess.run(r))
def testWhileGrad_ColocateGradients(self): def testWhileGrad_ColocateGradients(self):
@ -2426,7 +2425,7 @@ class ControlFlowTest(test.TestCase):
# device set on tensor, default device on graph => default device on dep. # device set on tensor, default device on graph => default device on dep.
vdef = variables.Variable([0.0], name="vdef") vdef = variables.Variable([0.0], name="vdef")
with ops.device("/job:worker/gpu:1"): with ops.device("/job:worker/device:GPU:1"):
with_vdef_dep = control_flow_ops.with_dependencies([vdef.initializer], with_vdef_dep = control_flow_ops.with_dependencies([vdef.initializer],
vdef) vdef)
# The device is empty, but the colocation constraint is set. # The device is empty, but the colocation constraint is set.

View File

@ -347,7 +347,7 @@ def sparse_tensor_dense_vs_dense_matmul_benchmark(thresh,
ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_dense( ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_dense(
x_t, y_t, adjoint_a, adjoint_b) x_t, y_t, adjoint_a, adjoint_b)
else: else:
with ops.device("/gpu:0"): with ops.device("/device:GPU:0"):
x_t = constant_op.constant(x) x_t = constant_op.constant(x)
y_t = constant_op.constant(y) y_t = constant_op.constant(y)
ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_dense( ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_dense(
@ -365,7 +365,7 @@ def sparse_tensor_dense_vs_dense_matmul_benchmark(thresh,
ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_sparse( ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_sparse(
x_ind, x_val, x_shape, y_t, adjoint_a, adjoint_b) x_ind, x_val, x_shape, y_t, adjoint_a, adjoint_b)
else: else:
with ops.device("/gpu:0"): with ops.device("/device:GPU:0"):
x_ind = constant_op.constant(np.vstack(np.where(x)).astype(np.int64).T) x_ind = constant_op.constant(np.vstack(np.where(x)).astype(np.int64).T)
x_val = constant_op.constant(x[np.where(x)]) x_val = constant_op.constant(x[np.where(x)])
x_shape = constant_op.constant(np.array(x.shape).astype(np.int64)) x_shape = constant_op.constant(np.array(x.shape).astype(np.int64))

View File

@ -722,7 +722,7 @@ class VariableScopeTest(test.TestCase):
def device_func(op): def device_func(op):
if op.type in ["Variable", "VariableV2", "VarHandleOp"]: if op.type in ["Variable", "VariableV2", "VarHandleOp"]:
varname_type.append((op.name, op.get_attr("dtype"))) varname_type.append((op.name, op.get_attr("dtype")))
return "/gpu:0" return "/device:GPU:0"
with g.as_default(): with g.as_default():
with ops.device(device_func): with ops.device(device_func):

View File

@ -163,20 +163,20 @@ class GradientsTest(test_util.TensorFlowTestCase):
with ops.Graph().as_default() as g: with ops.Graph().as_default() as g:
w = constant(1.0, shape=[1, 1]) w = constant(1.0, shape=[1, 1])
x = constant(1.0, shape=[1, 2]) x = constant(1.0, shape=[1, 2])
with g.device("/gpu:0"): with g.device("/device:GPU:0"):
wx = math_ops.matmul(w, x) wx = math_ops.matmul(w, x)
gw = gradients.gradients(wx, [w], colocate_gradients_with_ops=True)[0] gw = gradients.gradients(wx, [w], colocate_gradients_with_ops=True)[0]
self.assertEqual(gw.op.colocation_groups(), wx.op.colocation_groups()) self.assertEqual(gw.op.colocation_groups(), wx.op.colocation_groups())
def testColocateGradientsWithAggregation(self): def testColocateGradientsWithAggregation(self):
with ops.Graph().as_default() as g: with ops.Graph().as_default() as g:
with g.device("/gpu:1"): with g.device("/device:GPU:1"):
w = constant(1.0, shape=[1, 1]) w = constant(1.0, shape=[1, 1])
x = constant(1.0, shape=[1, 2]) x = constant(1.0, shape=[1, 2])
y = constant(1.0, shape=[1, 2]) y = constant(1.0, shape=[1, 2])
wx = math_ops.matmul(w, x) wx = math_ops.matmul(w, x)
wy = math_ops.matmul(w, y) wy = math_ops.matmul(w, y)
with g.device("/gpu:0"): with g.device("/device:GPU:0"):
z = wx + wy z = wx + wy
gw1 = gradients.gradients(z, [w], colocate_gradients_with_ops=True)[0] gw1 = gradients.gradients(z, [w], colocate_gradients_with_ops=True)[0]
@ -187,7 +187,7 @@ class GradientsTest(test_util.TensorFlowTestCase):
def testColocateGradientsWithAggregationInMultipleDevices(self): def testColocateGradientsWithAggregationInMultipleDevices(self):
with ops.Graph().as_default() as g: with ops.Graph().as_default() as g:
with g.device("/gpu:1"): with g.device("/device:GPU:1"):
w = constant(1.0, shape=[1, 1]) w = constant(1.0, shape=[1, 1])
x = constant(1.0, shape=[1, 2]) x = constant(1.0, shape=[1, 2])
y = constant(1.0, shape=[1, 2]) y = constant(1.0, shape=[1, 2])
@ -195,7 +195,7 @@ class GradientsTest(test_util.TensorFlowTestCase):
wx = math_ops.matmul(w, x) wx = math_ops.matmul(w, x)
with g.device("/task:2"): with g.device("/task:2"):
wy = math_ops.matmul(w, y) wy = math_ops.matmul(w, y)
with g.device("/gpu:0"): with g.device("/device:GPU:0"):
z = wx + wy z = wx + wy
gw1 = gradients.gradients(z, [w], colocate_gradients_with_ops=True)[0] gw1 = gradients.gradients(z, [w], colocate_gradients_with_ops=True)[0]

View File

@ -47,7 +47,7 @@ def build_graph(device, n, m, k, transpose_a, transpose_b, dtype):
Returns: Returns:
A matmul operation to run() A matmul operation to run()
""" """
with ops.device('/%s:0' % device): with ops.device('%s' % device):
if not transpose_a: if not transpose_a:
x = variables.Variable(random_ops.random_uniform([n, m], dtype=dtype)) x = variables.Variable(random_ops.random_uniform([n, m], dtype=dtype))
else: else:
@ -112,7 +112,7 @@ class MatmulBenchmark(test.Benchmark):
return duration return duration
def run_test_gpu(self, n, m, k, transpose_a, transpose_b, dtype, num_iters): def run_test_gpu(self, n, m, k, transpose_a, transpose_b, dtype, num_iters):
self.run_graph('gpu', n, m, k, transpose_a, transpose_b, num_iters, dtype) self.run_graph(test.gpu_device_name(), n, m, k, transpose_a, transpose_b, num_iters, dtype)
def test_round(self, num_iters): def test_round(self, num_iters):
dtypes = [np.float32, np.float64] dtypes = [np.float32, np.float64]

View File

@ -71,37 +71,39 @@ class MatmulBenchmarkTest(googletest.TestCase):
def _VerifyBuildGraph(self, n, m, k, transpose_a, transpose_b, dtype): def _VerifyBuildGraph(self, n, m, k, transpose_a, transpose_b, dtype):
graph = ops.Graph() graph = ops.Graph()
with graph.as_default(): with graph.as_default():
matmul_benchmark.build_graph("gpu", n, m, k, transpose_a, transpose_b, matmul_benchmark.build_graph(googletest.gpu_device_name(), n, m, k, transpose_a, transpose_b,
dtype) dtype)
gd = graph.as_graph_def() gd = graph.as_graph_def()
self.assertProtoEquals(""" dev=googletest.gpu_device_name()
node { name: "random_uniform/shape" op: "Const" device: "/device:GPU:0" } proto_expected = """
node { name: "random_uniform/min" op: "Const" device: "/device:GPU:0" } node { name: "random_uniform/shape" op: "Const" device: \""""+ dev +"""\" }
node { name: "random_uniform/max" op: "Const" device: "/device:GPU:0" } node { name: "random_uniform/min" op: "Const" device: \""""+ dev +"""\" }
node { name: "random_uniform/RandomUniform" op: "RandomUniform" input: "random_uniform/shape" device: "/device:GPU:0" } node { name: "random_uniform/max" op: "Const" device: \""""+ dev +"""\" }
node { name: "random_uniform/sub" op: "Sub" input: "random_uniform/max" input: "random_uniform/min" device: "/device:GPU:0" } node { name: "random_uniform/RandomUniform" op: "RandomUniform" input: "random_uniform/shape" device: \""""+ dev +"""\" }
node { name: "random_uniform/mul" op: "Mul" input: "random_uniform/RandomUniform" input: "random_uniform/sub" device: "/device:GPU:0" } node { name: "random_uniform/sub" op: "Sub" input: "random_uniform/max" input: "random_uniform/min" device: \""""+ dev +"""\" }
node { name: "random_uniform" op: "Add" input: "random_uniform/mul" input: "random_uniform/min" device: "/device:GPU:0" } node { name: "random_uniform/mul" op: "Mul" input: "random_uniform/RandomUniform" input: "random_uniform/sub" device: \""""+ dev +"""\" }
node { name: "Variable" op: "VariableV2" device: "/device:GPU:0" } node { name: "random_uniform" op: "Add" input: "random_uniform/mul" input: "random_uniform/min" device: \""""+ dev +"""\" }
node { name: "Variable/Assign" op: "Assign" input: "Variable" input: "random_uniform" device: "/device:GPU:0" } node { name: "Variable" op: "VariableV2" device: \""""+ dev +"""\" }
node { name: "Variable/read" op: "Identity" input: "Variable" device: "/device:GPU:0" } node { name: "Variable/Assign" op: "Assign" input: "Variable" input: "random_uniform" device: \""""+ dev +"""\" }
node { name: "random_uniform_1/shape" op: "Const" device: "/device:GPU:0" } node { name: "Variable/read" op: "Identity" input: "Variable" device: \""""+ dev +"""\" }
node { name: "random_uniform_1/min" op: "Const" device: "/device:GPU:0" } node { name: "random_uniform_1/shape" op: "Const" device: \""""+ dev +"""\" }
node { name: "random_uniform_1/max" op: "Const" device: "/device:GPU:0" } node { name: "random_uniform_1/min" op: "Const" device: \""""+ dev +"""\" }
node { name: "random_uniform_1/RandomUniform" op: "RandomUniform" input: "random_uniform_1/shape" device: "/device:GPU:0" } node { name: "random_uniform_1/max" op: "Const" device: \""""+ dev +"""\" }
node { name: "random_uniform_1/sub" op: "Sub" input: "random_uniform_1/max" input: "random_uniform_1/min" device: "/device:GPU:0" } node { name: "random_uniform_1/RandomUniform" op: "RandomUniform" input: "random_uniform_1/shape" device: \""""+ dev +"""\" }
node { name: "random_uniform_1/mul" op: "Mul" input: "random_uniform_1/RandomUniform" input: "random_uniform_1/sub" device: "/device:GPU:0" } node { name: "random_uniform_1/sub" op: "Sub" input: "random_uniform_1/max" input: "random_uniform_1/min" device: \""""+ dev +"""\" }
node { name: "random_uniform_1" op: "Add" input: "random_uniform_1/mul" input: "random_uniform_1/min" device: "/device:GPU:0" } node { name: "random_uniform_1/mul" op: "Mul" input: "random_uniform_1/RandomUniform" input: "random_uniform_1/sub" device: \""""+ dev +"""\" }
node { name: "Variable_1" op: "VariableV2" device: "/device:GPU:0" } node { name: "random_uniform_1" op: "Add" input: "random_uniform_1/mul" input: "random_uniform_1/min" device: \""""+ dev +"""\" }
node { name: "Variable_1/Assign" op: "Assign" input: "Variable_1" input: "random_uniform_1" device: "/device:GPU:0" } node { name: "Variable_1" op: "VariableV2" device: \""""+ dev +"""\" }
node { name: "Variable_1/read" op: "Identity" input: "Variable_1" device: "/device:GPU:0" } node { name: "Variable_1/Assign" op: "Assign" input: "Variable_1" input: "random_uniform_1" device: \""""+ dev +"""\" }
node { name: "MatMul" op: "MatMul" input: "Variable/read" input: "Variable_1/read" device: "/device:GPU:0" } node { name: "Variable_1/read" op: "Identity" input: "Variable_1" device: \""""+ dev +"""\" }
node { name: "group_deps" op: "NoOp" input: "^MatMul" device: "/device:GPU:0" } node { name: "MatMul" op: "MatMul" input: "Variable/read" input: "Variable_1/read" device: \""""+ dev +"""\" }
""", self._StripGraph(gd)) node { name: "group_deps" op: "NoOp" input: "^MatMul" device: \""""+ dev +"""\" }
"""
self.assertProtoEquals(str(proto_expected), self._StripGraph(gd))
def _VerifyRunGraph(self, n, m, k, transpose_a, transpose_b, dtype): def _VerifyRunGraph(self, n, m, k, transpose_a, transpose_b, dtype):
benchmark_instance = matmul_benchmark.MatmulBenchmark() benchmark_instance = matmul_benchmark.MatmulBenchmark()
duration = benchmark_instance.run_graph("gpu", n, m, k, transpose_a, duration = benchmark_instance.run_graph(googletest.gpu_device_name(), n, m, k, transpose_a,
transpose_b, 1, dtype) transpose_b, 1, dtype)
self.assertTrue(duration > 1e-6) self.assertTrue(duration > 1e-6)

View File

@ -97,21 +97,22 @@ class RunMetadataTest(test.TestCase):
if not test.is_gpu_available(cuda_only=True): if not test.is_gpu_available(cuda_only=True):
return return
gpu_dev = test.gpu_device_name()
ops.reset_default_graph() ops.reset_default_graph()
with ops.device('/gpu:0'): with ops.device(gpu_dev):
tfprof_node, run_meta = _run_model() tfprof_node, run_meta = _run_model()
self.assertEqual(tfprof_node.children[0].name, 'MatMul') self.assertEqual(tfprof_node.children[0].name, 'MatMul')
self.assertGreater(tfprof_node.children[0].exec_micros, 10) self.assertGreater(tfprof_node.children[0].exec_micros, 10)
ret = _extract_node(run_meta, ['MatMul', 'MatMul:MatMul']) ret = _extract_node(run_meta, ['MatMul', 'MatMul:MatMul'])
self.assertEqual(len(ret), 3) self.assertEqual(len(ret), 3)
self.assertTrue('/job:localhost/replica:0/task:0/gpu:0' in ret) self.assertTrue('/job:localhost/replica:0/task:0' + gpu_dev in ret)
del ret['/job:localhost/replica:0/task:0/gpu:0'] del ret['/job:localhost/replica:0/task:0' + gpu_dev]
has_all_stream = False has_all_stream = False
for k, _ in six.iteritems(ret): for k, _ in six.iteritems(ret):
self.assertTrue('gpu:0/stream' in k) self.assertTrue(gpu_dev + '/stream' in k)
if 'gpu:0/stream:all' in k: if gpu_dev + '/stream:all' in k:
has_all_stream = True has_all_stream = True
self.assertTrue(has_all_stream) self.assertTrue(has_all_stream)
@ -159,24 +160,24 @@ class RunMetadataTest(test.TestCase):
return return
ops.reset_default_graph() ops.reset_default_graph()
with ops.device('/gpu:0'): with ops.device('/device:GPU:0'):
tfprof_node, run_meta = _run_loop_model() tfprof_node, run_meta = _run_loop_model()
# The while-loop caused a node to appear 4 times in scheduling. # The while-loop caused a node to appear 4 times in scheduling.
ret = _extract_node(run_meta, ret = _extract_node(run_meta,
'rnn/while/rnn/basic_rnn_cell/basic_rnn_cell/MatMul') 'rnn/while/rnn/basic_rnn_cell/basic_rnn_cell/MatMul')
self.assertEqual(len(ret['/job:localhost/replica:0/task:0/gpu:0']), 4) self.assertEqual(len(ret['/job:localhost/replica:0/task:0/device:GPU:0']), 4)
total_cpu_execs = 0 total_cpu_execs = 0
for node in ret['/job:localhost/replica:0/task:0/gpu:0']: for node in ret['/job:localhost/replica:0/task:0/device:GPU:0']:
total_cpu_execs += node.op_end_rel_micros total_cpu_execs += node.op_end_rel_micros
ret = _extract_node( ret = _extract_node(
run_meta, run_meta,
'rnn/while/rnn/basic_rnn_cell/basic_rnn_cell/MatMul:MatMul') 'rnn/while/rnn/basic_rnn_cell/basic_rnn_cell/MatMul:MatMul')
self.assertGreaterEqual(len(ret['/gpu:0/stream:all']), 4) self.assertGreaterEqual(len(ret['/device:GPU:0/stream:all']), 4)
total_accelerator_execs = 0 total_accelerator_execs = 0
for node in ret['/gpu:0/stream:all']: for node in ret['/device:GPU:0/stream:all']:
total_accelerator_execs += node.op_end_rel_micros total_accelerator_execs += node.op_end_rel_micros
mm_node = lib.SearchTFProfNode( mm_node = lib.SearchTFProfNode(

View File

@ -315,7 +315,7 @@ class ProfileOptionBuilder(object):
"""Selectively counting statistics based on node types. """Selectively counting statistics based on node types.
Here, 'types' means the profiler nodes' properties. Profiler by default Here, 'types' means the profiler nodes' properties. Profiler by default
consider device name (e.g. /job:xx/.../gpu:0) and operation type consider device name (e.g. /job:xx/.../device:GPU:0) and operation type
(e.g. MatMul) as profiler nodes' properties. User can also associate (e.g. MatMul) as profiler nodes' properties. User can also associate
customized 'types' to profiler nodes through OpLogProto proto. customized 'types' to profiler nodes through OpLogProto proto.

View File

@ -50,7 +50,7 @@ class RemoveDeviceTest : public ::testing::Test {
add_node2->set_op("Add"); add_node2->set_op("Add");
add_node2->add_input("const_node1"); add_node2->add_input("const_node1");
add_node2->add_input("const_node2"); add_node2->add_input("const_node2");
add_node2->set_device("//gpu:1"); add_node2->set_device("//device:GPU:1");
NodeDef* add_node3 = graph_def.add_node(); NodeDef* add_node3 = graph_def.add_node();
add_node3->set_name("add_node3"); add_node3->set_name("add_node3");