From ab96f41fb4c4f17e96fd6177aa589c19df580456 Mon Sep 17 00:00:00 2001 From: Luke Iwanski Date: Fri, 11 Aug 2017 01:35:21 +0100 Subject: [PATCH] [OpenCL] Extends matmul_benchmark.py to cover SYCL (#11697) * [OpenCL] Extends matmul_benchmark.py to cover SYCL * Fixed typo * /gpu:0 -> /device:GPU:0 * Fixes control_flow_ops_py_test * /gpu: -> /device:GPU: * Fixes //tensorflow/python/profiler/internal:run_metadata_test * gpu: -> GPU: * Fixes tfprof_node * [OpenCL] Fixes device path to name with many colons (#123) The device path is constructed from a device name by replacing all colons with underscores. Some device names contain more than one colon, for example 'device:SYCL:0' which gives a path 'device_SYCL_0'. The previous code would not convert this back to the original device name, but rather to 'device:SYCL_0'. An alternative fix would be to convert all underscores to colons in the device name (i.e. remove the restriction inside `replace("_", ":", 1)`), however I'm not sure if there are any device names which contain underscores. * If no gpu device aviable fake one * gpu: -> device:GPU * Fixes profiler test * /gpu:x -> /device:GPU:x * Fixes debug_io_utils_test.cc test * Fixes device_name_utils_test.cc --- tensorflow/cc/tutorials/example_trainer.cc | 2 +- .../kernel_tests/cudnn_rnn_ops_benchmark.py | 6 +- .../python/kernel_tests/mixture_test.py | 2 +- .../framework/python/ops/variables_test.py | 8 +- .../contrib/nccl/python/ops/nccl_ops_test.py | 6 +- .../python/kernel_tests/core_rnn_cell_test.py | 7 +- .../rnn/python/kernel_tests/core_rnn_test.py | 19 ++-- .../rnn/python/kernel_tests/gru_ops_test.py | 6 +- .../kernel_tests/beam_search_ops_test.py | 2 +- tensorflow/core/common_runtime/device.h | 2 +- .../common_runtime/direct_session_test.cc | 4 +- ...direct_session_with_tracking_alloc_test.cc | 4 +- .../core/common_runtime/gpu/gpu_device.cc | 4 +- .../core/common_runtime/gpu/gpu_device.h | 2 +- .../gpu/gpu_stream_util_test.cc | 4 +- .../core/common_runtime/memory_types_test.cc | 2 +- tensorflow/core/debug/debug_gateway.cc | 2 +- tensorflow/core/debug/debug_gateway_test.cc | 8 +- tensorflow/core/debug/debug_io_utils_test.cc | 6 +- .../core/distributed_runtime/executor_test.cc | 2 +- .../rpc/grpc_channel_test.cc | 4 +- tensorflow/core/framework/node_def.proto | 4 +- tensorflow/core/framework/rendezvous_test.cc | 10 +-- tensorflow/core/graph/graph_partition_test.cc | 2 +- tensorflow/core/graph/mkl_layout_pass_test.cc | 2 +- .../core/grappler/clusters/single_machine.cc | 2 +- .../costs/analytical_cost_estimator_test.cc | 2 +- .../grappler/costs/virtual_placer_test.cc | 24 ++--- .../grappler/optimizers/model_pruner_test.cc | 10 +-- .../core/platform/default/gpu_tracer.cc | 4 +- tensorflow/core/platform/gpu_tracer_test.cc | 6 +- tensorflow/core/profiler/README.md | 8 +- tensorflow/core/profiler/g3doc/advise.md | 8 +- .../core/profiler/g3doc/profile_time.md | 2 +- .../internal/advisor/tfprof_advisor_test.cc | 6 +- .../core/profiler/internal/tfprof_node.cc | 4 +- tensorflow/core/protobuf/config.proto | 2 +- .../core/util/device_name_utils_test.cc | 88 +++++++++---------- .../api_guides/python/contrib.seq2seq.md | 6 +- .../docs_src/programmers_guide/variables.md | 2 +- tensorflow/docs_src/tutorials/deep_cnn.md | 2 +- tensorflow/docs_src/tutorials/using_gpu.md | 46 +++++----- tensorflow/examples/learn/multiple_gpu.py | 4 +- .../client/session_clusterspec_prop_test.py | 4 +- tensorflow/python/client/session_test.py | 6 +- tensorflow/python/client/timeline_test.py | 4 +- tensorflow/python/debug/lib/debug_data.py | 3 +- .../python/debug/lib/debug_data_test.py | 20 ++--- .../python/debug/lib/session_debug_testlib.py | 2 +- .../debug/wrappers/local_cli_wrapper_test.py | 2 +- tensorflow/python/framework/device_test.py | 16 ++-- tensorflow/python/framework/function_test.py | 2 +- .../python/framework/graph_util_test.py | 4 +- tensorflow/python/framework/importer_test.py | 2 +- .../python/framework/meta_graph_test.py | 2 +- tensorflow/python/framework/ops.py | 4 +- tensorflow/python/framework/ops_test.py | 18 ++-- tensorflow/python/framework/test_util.py | 12 +-- .../python/kernel_tests/basic_gpu_test.py | 2 +- .../python/kernel_tests/cholesky_op_test.py | 8 +- .../kernel_tests/control_flow_ops_py_test.py | 15 ++-- .../sparse_tensor_dense_matmul_op_test.py | 4 +- .../kernel_tests/variable_scope_test.py | 2 +- tensorflow/python/ops/gradients_test.py | 10 +-- tensorflow/python/ops/matmul_benchmark.py | 4 +- .../python/ops/matmul_benchmark_test.py | 54 ++++++------ .../profiler/internal/run_metadata_test.py | 21 ++--- tensorflow/python/profiler/option_builder.py | 2 +- .../graph_transforms/remove_device_test.cc | 2 +- 69 files changed, 286 insertions(+), 285 deletions(-) diff --git a/tensorflow/cc/tutorials/example_trainer.cc b/tensorflow/cc/tutorials/example_trainer.cc index 49d3cca3a4e..3675d72ee35 100644 --- a/tensorflow/cc/tutorials/example_trainer.cc +++ b/tensorflow/cc/tutorials/example_trainer.cc @@ -101,7 +101,7 @@ void ConcurrentSteps(const Options* opts, int session_index) { std::unique_ptr session(NewSession(options)); GraphDef def = CreateGraphDef(); if (options.target.empty()) { - graph::SetDefaultDevice(opts->use_gpu ? "/gpu:0" : "/cpu:0", &def); + graph::SetDefaultDevice(opts->use_gpu ? "/device:GPU:0" : "/cpu:0", &def); } TF_CHECK_OK(session->Create(def)); diff --git a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_benchmark.py b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_benchmark.py index 6ca38c2e479..ff409ac7182 100644 --- a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_benchmark.py +++ b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_benchmark.py @@ -93,7 +93,7 @@ class CudnnRNNBenchmark(test.Benchmark): batch_size = config["batch_size"] seq_length = config["seq_length"] - with ops.Graph().as_default(), ops.device("/gpu:0"): + with ops.Graph().as_default(), ops.device("/device:GPU:0"): model = cudnn_rnn_ops.CudnnLSTM(num_layers, num_units, num_units) params_size_t = model.params_size() input_data = variables.Variable( @@ -125,7 +125,7 @@ class CudnnRNNBenchmark(test.Benchmark): batch_size = config["batch_size"] seq_length = config["seq_length"] - with ops.Graph().as_default(), ops.device("/gpu:0"): + with ops.Graph().as_default(), ops.device("/device:GPU:0"): inputs = seq_length * [ array_ops.zeros([batch_size, num_units], dtypes.float32) ] @@ -153,7 +153,7 @@ class CudnnRNNBenchmark(test.Benchmark): batch_size = config["batch_size"] seq_length = config["seq_length"] - with ops.Graph().as_default(), ops.device("/gpu:0"): + with ops.Graph().as_default(), ops.device("/device:GPU:0"): inputs = seq_length * [ array_ops.zeros([batch_size, num_units], dtypes.float32) ] diff --git a/tensorflow/contrib/distributions/python/kernel_tests/mixture_test.py b/tensorflow/contrib/distributions/python/kernel_tests/mixture_test.py index aa523a95118..2705b96f271 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/mixture_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/mixture_test.py @@ -634,7 +634,7 @@ class MixtureBenchmark(test.Benchmark): np.random.seed(127) with session.Session(config=config, graph=ops.Graph()) as sess: random_seed.set_random_seed(0) - with ops.device("/gpu:0" if use_gpu else "/cpu:0"): + with ops.device("/device:GPU:0" if use_gpu else "/cpu:0"): mixture = create_distribution( num_components=num_components, batch_size=batch_size, diff --git a/tensorflow/contrib/framework/python/ops/variables_test.py b/tensorflow/contrib/framework/python/ops/variables_test.py index cb278707202..6a74e4e8666 100644 --- a/tensorflow/contrib/framework/python/ops/variables_test.py +++ b/tensorflow/contrib/framework/python/ops/variables_test.py @@ -443,19 +443,19 @@ class VariablesTest(test.TestCase): e = variables_lib2.variable('e', initializer=e_init) # The values below highlight how the VariableDeviceChooser puts initial # values on the same device as the variable job. - self.assertDeviceEqual(a.device, '/gpu:0') + self.assertDeviceEqual(a.device, '/device:GPU:0') self.assertEqual(a.initial_value.op.colocation_groups(), a.op.colocation_groups()) - self.assertDeviceEqual(b.device, '/gpu:0') + self.assertDeviceEqual(b.device, '/device:GPU:0') self.assertEqual(b.initial_value.op.colocation_groups(), b.op.colocation_groups()) self.assertDeviceEqual(c.device, '/cpu:12') self.assertEqual(c.initial_value.op.colocation_groups(), c.op.colocation_groups()) - self.assertDeviceEqual(d.device, '/gpu:0') + self.assertDeviceEqual(d.device, '/device:GPU:0') self.assertEqual(d.initial_value.op.colocation_groups(), d.op.colocation_groups()) - self.assertDeviceEqual(e.device, '/gpu:0') + self.assertDeviceEqual(e.device, '/device:GPU:0') self.assertDeviceEqual(e.initial_value.device, '/cpu:99') diff --git a/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py b/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py index 130cb4ca12c..ae658e73227 100644 --- a/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py +++ b/tensorflow/contrib/nccl/python/ops/nccl_ops_test.py @@ -43,7 +43,7 @@ class AllReduceTest(test.TestCase): self._testSingleAllReduce(sess, dtype, nccl.all_max, np.maximum) def _testSingleAllReduce(self, sess, np_type, nccl_fn, numpy_accumulation_fn): - for devices in [['/gpu:0', '/gpu:0', '/gpu:0'], ['/gpu:0', '/gpu:0']]: + for devices in [['/device:GPU:0', '/device:GPU:0', '/device:GPU:0'], ['/device:GPU:0', '/device:GPU:0']]: shape = (3, 4) np_ans = None tensors = [] @@ -84,7 +84,7 @@ class BroadcastTest(test.TestCase): # Create session inside outer loop to test use of # same communicator across multiple sessions. with self.test_session(use_gpu=True) as sess: - for devices in [['/gpu:0', '/gpu:0', '/gpu:0'], ['/gpu:0', '/gpu:0']]: + for devices in [['/device:GPU:0', '/device:GPU:0', '/device:GPU:0'], ['/device:GPU:0', '/device:GPU:0']]: shape = (3, 4) sender = np.random.randint(0, len(devices) - 1) with ops.device(devices[sender]): @@ -115,7 +115,7 @@ class CombinedTest(test.TestCase): # Create session inside outer loop to test use of # same communicator across multiple sessions. with self.test_session(use_gpu=True) as sess: - for devices in [['/gpu:0', '/gpu:0', '/gpu:0'], ['/gpu:0', '/gpu:0']]: + for devices in [['/device:GPU:0', '/device:GPU:0', '/device:GPU:0'], ['/device:GPU:0', '/device:GPU:0']]: shape = (3, 4) # all-reduce diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py index faab0992498..a77097e0c3a 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py @@ -446,12 +446,12 @@ class RNNCellTest(test.TestCase): # Can't perform this test w/o a GPU return + gpu_dev = test.gpu_device_name() with self.test_session(use_gpu=True) as sess: with variable_scope.variable_scope( "root", initializer=init_ops.constant_initializer(0.5)): x = array_ops.zeros([1, 1, 3]) - cell = rnn_cell_impl.DeviceWrapper( - rnn_cell_impl.GRUCell(3), test_util.gpu_device_name()) + cell = rnn_cell_impl.DeviceWrapper(rnn_cell_impl.GRUCell(3), gpu_dev) with ops.device("/cpu:0"): outputs, _ = rnn.dynamic_rnn( cell=cell, inputs=x, dtype=dtypes.float32) @@ -463,8 +463,7 @@ class RNNCellTest(test.TestCase): _ = sess.run(outputs, options=opts, run_metadata=run_metadata) step_stats = run_metadata.step_stats - ix = 0 if (("gpu" in step_stats.dev_stats[0].device) or - ("sycl" in step_stats.dev_stats[0].device)) else 1 + ix = 0 if gpu_dev in step_stats.dev_stats[0].device else 1 gpu_stats = step_stats.dev_stats[ix].node_stats cpu_stats = step_stats.dev_stats[1 - ix].node_stats self.assertFalse([s for s in cpu_stats if "gru_cell" in s.node_name]) diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py index 701590a8feb..40a3fb2fb0b 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py @@ -42,7 +42,6 @@ from tensorflow.python.ops import variables as variables_lib from tensorflow.python.platform import test from tensorflow.python.platform import tf_logging from tensorflow.python.util import nest -from tensorflow.python.framework import test_util class Plus1RNNCell(rnn_lib.RNNCell): """RNN Cell generating (output, new_state) = (input + 1, state + 1).""" @@ -2208,11 +2207,11 @@ class TensorArrayOnCorrectDeviceTest(test.TestCase): if not test.is_gpu_available(): return # Test requires access to a GPU + gpu_dev = test.gpu_device_name() run_metadata = self._execute_rnn_on( - rnn_device="/cpu:0", cell_device=test_util.gpu_device_name()) + rnn_device="/cpu:0", cell_device=gpu_dev) step_stats = run_metadata.step_stats - ix = 0 if (("gpu" in step_stats.dev_stats[0].device) or - ("sycl" in step_stats.dev_stats[0].device)) else 1 + ix = 0 if (gpu_dev in step_stats.dev_stats[0].device) else 1 gpu_stats = step_stats.dev_stats[ix].node_stats cpu_stats = step_stats.dev_stats[1 - ix].node_stats @@ -2233,12 +2232,12 @@ class TensorArrayOnCorrectDeviceTest(test.TestCase): if not test.is_gpu_available(): return # Test requires access to a GPU + gpu_dev = test.gpu_device_name() run_metadata = self._execute_rnn_on( rnn_device="/cpu:0", cell_device="/cpu:0", - input_device=test_util.gpu_device_name()) + input_device=gpu_dev) step_stats = run_metadata.step_stats - ix = 0 if (("gpu" in step_stats.dev_stats[0].device) or - ("sycl" in step_stats.dev_stats[0].device)) else 1 + ix = 0 if (gpu_dev in step_stats.dev_stats[0].device) else 1 gpu_stats = step_stats.dev_stats[ix].node_stats cpu_stats = step_stats.dev_stats[1 - ix].node_stats @@ -2253,11 +2252,11 @@ class TensorArrayOnCorrectDeviceTest(test.TestCase): if not test.is_gpu_available(): return # Test requires access to a GPU + gpu_dev = test.gpu_device_name() run_metadata = self._execute_rnn_on( - input_device=test_util.gpu_device_name()) + input_device=gpu_dev) step_stats = run_metadata.step_stats - ix = 0 if (("gpu" in step_stats.dev_stats[0].device) or - ("sycl" in step_stats.dev_stats[0].device)) else 1 + ix = 0 if (gpu_dev in step_stats.dev_stats[0].device) else 1 gpu_stats = step_stats.dev_stats[ix].node_stats cpu_stats = step_stats.dev_stats[1 - ix].node_stats diff --git a/tensorflow/contrib/rnn/python/kernel_tests/gru_ops_test.py b/tensorflow/contrib/rnn/python/kernel_tests/gru_ops_test.py index baf17431f35..4239e32ab93 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/gru_ops_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/gru_ops_test.py @@ -357,7 +357,7 @@ def training_gru_block_vs_gru_cell(batch_size, ops.reset_default_graph() with session.Session(graph=ops.Graph()) as sess: # Specify the device which is been used. - with ops.device("/cpu:0" if not use_gpu else "/gpu:0"): + with ops.device("/cpu:0" if not use_gpu else "/device:GPU:0"): # Random initializers. seed = 1994 @@ -429,7 +429,7 @@ def inference_gru_block_vs_gru_cell(batch_size, """Benchmark inference speed between GRUBlockCell vs GRUCell.""" ops.reset_default_graph() with session.Session(graph=ops.Graph()) as sess: - with ops.device("/cpu:0" if not use_gpu else "/gpu:0"): + with ops.device("/cpu:0" if not use_gpu else "/device:GPU:0"): # Random initializers. seed = 1994 @@ -484,7 +484,7 @@ def single_bprop_step_gru_block_vs_gru_cell(batch_size, """Benchmark single bprop step speed between GRUBlockCell vs GRUCell.""" ops.reset_default_graph() with session.Session(graph=ops.Graph()) as sess: - with ops.device("/cpu:0" if not use_gpu else "/gpu:0"): + with ops.device("/cpu:0" if not use_gpu else "/device:GPU:0"): initializer = init_ops.random_uniform_initializer(-1, 1, seed=1989) # Inputs x = vs.get_variable("x", [batch_size, input_size]) diff --git a/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_ops_test.py b/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_ops_test.py index 3496b355b4b..50cccf392fd 100644 --- a/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_ops_test.py +++ b/tensorflow/contrib/seq2seq/python/kernel_tests/beam_search_ops_test.py @@ -78,7 +78,7 @@ class GatherTreeTest(test.TestCase): sequence_length = [[3, 3, 3]] expected_result = _transpose_batch_time( [[[2, -1, 2], [6, 5, 6], [7, 8, 9], [-1, -1, -1]]]) - with ops.device("/gpu:0"): + with ops.device("/device:GPU:0"): beams = beam_search_ops.gather_tree( step_ids=step_ids, parent_ids=parent_ids, sequence_length=sequence_length) diff --git a/tensorflow/core/common_runtime/device.h b/tensorflow/core/common_runtime/device.h index ded7e383d17..1d450aad7ff 100644 --- a/tensorflow/core/common_runtime/device.h +++ b/tensorflow/core/common_runtime/device.h @@ -22,7 +22,7 @@ limitations under the License. // Device names // * Every Device should have a unique name with the format: // /job:___/replica:___/task:___/(gpu|cpu):___ -// An example name would be "/job:train/replica:0/task:3/gpu:2". +// An example name would be "/job:train/replica:0/task:3/device:GPU:2". // * Task numbers are within the specified replica, so there are as // many "task zeros" as replicas. diff --git a/tensorflow/core/common_runtime/direct_session_test.cc b/tensorflow/core/common_runtime/direct_session_test.cc index 097dab8406f..05f683f6082 100644 --- a/tensorflow/core/common_runtime/direct_session_test.cc +++ b/tensorflow/core/common_runtime/direct_session_test.cc @@ -476,7 +476,7 @@ TEST(DirectSessionTest, PlacePrunedGraph) { vx.scalar()() = 1.0; Node* x = test::graph::Constant(&g, vx); Node* y = test::graph::Unary(&g, "Darth", x); - y->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0"); + y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0"); GraphDef def; test::graph::ToGraphDef(&g, &def); @@ -494,7 +494,7 @@ TEST(DirectSessionTest, PlacePrunedGraph) { vx.scalar()() = 1.0; Node* x = test::graph::Constant(&g, vx); Node* y = test::graph::Unary(&g, "Darth", x); - y->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0"); + y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0"); GraphDef def; test::graph::ToGraphDef(&g, &def); diff --git a/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc b/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc index da76ac83db7..459c20ef20b 100644 --- a/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc +++ b/tensorflow/core/common_runtime/direct_session_with_tracking_alloc_test.cc @@ -154,14 +154,14 @@ static void TestHWAccelerator(bool enableHWTrace) { Tensor x_tensor(DT_FLOAT, TensorShape({2, 1})); test::FillValues(&x_tensor, {1, 1}); Node* x = test::graph::Constant(&graph, x_tensor); - x->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0"); + x->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0"); #ifdef TENSORFLOW_USE_SYCL x->set_assigned_device_name("/job:localhost/replica:0/task:0/device:SYCL:0"); #endif // TENSORFLOW_USE_SYCL // y = A * x Node* y = test::graph::Matmul(&graph, a, x, false, false); - y->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0"); + y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0"); #ifdef TENSORFLOW_USE_SYCL y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:SYCL:0"); #endif // TENSORFLOW_USE_SYCL diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc index fbc2be18ccd..63956afce25 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc @@ -588,7 +588,7 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options, for (int i = 0; i < n; i++) { BaseGPUDevice* gpu_device; TF_RETURN_IF_ERROR(CreateGPUDevice(options, - strings::StrCat(name_prefix, "/gpu:", i), + strings::StrCat(name_prefix, "/device:GPU:", i), valid_gpu_ids[i], &gpu_device)); TF_RETURN_IF_ERROR(gpu_device->Init(options)); devices->push_back(gpu_device); @@ -1049,7 +1049,7 @@ Status BaseGPUDeviceFactory::GetValidDeviceIds( size_t new_id = ids->size(); ids->push_back(visible_gpu_id); - LOG(INFO) << "Creating TensorFlow device (/gpu:" << new_id << ") -> " + LOG(INFO) << "Creating TensorFlow device (/device:GPU:" << new_id << ") -> " << "(" << GetShortDeviceDescription(visible_gpu_id, desc) << ")"; } diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.h b/tensorflow/core/common_runtime/gpu/gpu_device.h index 08c58867eed..a7e078e97cc 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.h +++ b/tensorflow/core/common_runtime/gpu/gpu_device.h @@ -141,7 +141,7 @@ class BaseGPUDeviceFactory : public DeviceFactory { Allocator* cpu_allocator) = 0; // Returns into 'ids' the list of valid GPU ids, in the order that - // they should map to logical gpu ids "/gpu:0", "/gpu:1", etc, based + // they should map to logical gpu ids "/device:GPU:0", "/device:GPU:1", etc, based // upon 'visible_device_list', a comma-separated list of 'visible // gpu ids'. Status GetValidDeviceIds(const string& visible_device_list, diff --git a/tensorflow/core/common_runtime/gpu/gpu_stream_util_test.cc b/tensorflow/core/common_runtime/gpu/gpu_stream_util_test.cc index a8bad5b94dc..003e416bbe6 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_stream_util_test.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_stream_util_test.cc @@ -106,9 +106,9 @@ TEST_F(GpuStreamUtilTest, SimpleGraphManyStreams) { TEST_F(GpuStreamUtilTest, StreamOverrides) { auto root = Scope::NewRootScope().ExitOnError(); ops::_Recv(root.WithOpName("input"), DT_FLOAT, "input", "/cpu:0", 0, - "/gpu:0"); + "/device:GPU:0"); Output n = ops::MatMul(root, {}, {}); - ops::_Send(root.WithOpName("output"), n, "output", "/gpu:0", 0, "/cpu:0"); + ops::_Send(root.WithOpName("output"), n, "output", "/device:GPU:0", 0, "/cpu:0"); Graph g(OpRegistry::Global()); TF_ASSERT_OK(root.ToGraph(&g)); diff --git a/tensorflow/core/common_runtime/memory_types_test.cc b/tensorflow/core/common_runtime/memory_types_test.cc index b3a43d35046..2a834ddca42 100644 --- a/tensorflow/core/common_runtime/memory_types_test.cc +++ b/tensorflow/core/common_runtime/memory_types_test.cc @@ -53,7 +53,7 @@ TEST(MemoryTypeChecker, Int32NotOk) { EXPECT_TRUE(errors::IsInternal(ValidateMemoryTypes(DEVICE_GPU, g))); // But we can insert _HostSend/_HostRecv to ensure the invariant. - TF_EXPECT_OK(EnsureMemoryTypes(DEVICE_GPU, "/gpu:0", g)); + TF_EXPECT_OK(EnsureMemoryTypes(DEVICE_GPU, "/device:GPU:0", g)); TF_EXPECT_OK(ValidateMemoryTypes(DEVICE_GPU, g)); #endif // GOOGLE_CUDA #ifdef TENSORFLOW_USE_SYCL diff --git a/tensorflow/core/debug/debug_gateway.cc b/tensorflow/core/debug/debug_gateway.cc index 2aaed9563a6..616ced3d0f3 100644 --- a/tensorflow/core/debug/debug_gateway.cc +++ b/tensorflow/core/debug/debug_gateway.cc @@ -86,7 +86,7 @@ void DebugGateway::CopyTensor(const string& node_name, const int output_slot, // Determine if the tensor is on device (GPU) or host (CPU). // The second part of the check is necessary because even an OpKernel on // may have output tensors allocated on CPU. - if ((device->name().find("gpu:") != string::npos || device->name().find("SYCL:") != string::npos) && + if ((device->name().find("GPU:") != string::npos || device->name().find("SYCL:") != string::npos) && !ctx->output_alloc_attr(output_slot).on_host()) { // GPU tensors: Copy it to host (CPU). DeviceContext* device_ctxt = ctx->op_device_context(); diff --git a/tensorflow/core/debug/debug_gateway_test.cc b/tensorflow/core/debug/debug_gateway_test.cc index f25d91a3c27..9a74a4bb4cf 100644 --- a/tensorflow/core/debug/debug_gateway_test.cc +++ b/tensorflow/core/debug/debug_gateway_test.cc @@ -47,7 +47,7 @@ class SessionDebugMinusAXTest : public ::testing::Test { Graph graph(OpRegistry::Global()); #if GOOGLE_CUDA - const string kDeviceName = "/job:localhost/replica:0/task:0/gpu:0"; + const string kDeviceName = "/job:localhost/replica:0/task:0/device:GPU:0"; #elif defined(TENSORFLOW_USE_SYCL) const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0"; #else @@ -505,7 +505,7 @@ class SessionDebugOutputSlotWithoutOngoingEdgeTest : public ::testing::Test { Graph graph(OpRegistry::Global()); #if GOOGLE_CUDA - const string kDeviceName = "/job:localhost/replica:0/task:0/gpu:0"; + const string kDeviceName = "/job:localhost/replica:0/task:0/device:GPU:0"; #elif defined(TENSORFLOW_USE_SYCL) const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0"; #else @@ -607,7 +607,7 @@ class SessionDebugVariableTest : public ::testing::Test { Graph graph(OpRegistry::Global()); #if GOOGLE_CUDA - const string kDeviceName = "/job:localhost/replica:0/task:0/gpu:0"; + const string kDeviceName = "/job:localhost/replica:0/task:0/device:GPU:0"; #elif defined(TENSORFLOW_USE_SYCL) const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0"; #else @@ -879,7 +879,7 @@ class SessionDebugGPUSwitchTest : public ::testing::Test { Graph graph(OpRegistry::Global()); #ifdef GOOGLE_CUDA - const string kDeviceName = "/job:localhost/replica:0/task:0/gpu:0"; + const string kDeviceName = "/job:localhost/replica:0/task:0/device:GPU:0"; #elif TENSORFLOW_USE_SYCL const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0"; #endif diff --git a/tensorflow/core/debug/debug_io_utils_test.cc b/tensorflow/core/debug/debug_io_utils_test.cc index df6fb1d2fe1..0aef15e4853 100644 --- a/tensorflow/core/debug/debug_io_utils_test.cc +++ b/tensorflow/core/debug/debug_io_utils_test.cc @@ -51,14 +51,14 @@ class DebugIOUtilsTest : public ::testing::Test { }; TEST_F(DebugIOUtilsTest, ConstructDebugNodeKey) { - DebugNodeKey debug_node_key("/job:worker/replica:1/task:0/gpu:2", + DebugNodeKey debug_node_key("/job:worker/replica:1/task:0/device:GPU:2", "hidden_1/MatMul", 0, "DebugIdentity"); - EXPECT_EQ("/job:worker/replica:1/task:0/gpu:2", debug_node_key.device_name); + EXPECT_EQ("/job:worker/replica:1/task:0/device:GPU:2", debug_node_key.device_name); EXPECT_EQ("hidden_1/MatMul", debug_node_key.node_name); EXPECT_EQ(0, debug_node_key.output_slot); EXPECT_EQ("DebugIdentity", debug_node_key.debug_op); EXPECT_EQ("hidden_1/MatMul:0:DebugIdentity", debug_node_key.debug_node_name); - EXPECT_EQ("_tfdbg_device_,job_worker,replica_1,task_0,gpu_2", + EXPECT_EQ("_tfdbg_device_,job_worker,replica_1,task_0,device_GPU_2", debug_node_key.device_path); } diff --git a/tensorflow/core/distributed_runtime/executor_test.cc b/tensorflow/core/distributed_runtime/executor_test.cc index 1a4980a61b2..5b115f9a4d4 100644 --- a/tensorflow/core/distributed_runtime/executor_test.cc +++ b/tensorflow/core/distributed_runtime/executor_test.cc @@ -140,7 +140,7 @@ Rendezvous::ParsedKey Key(const string& sender, const uint64 incarnation, } #define ALICE "/job:j/replica:0/task:0/cpu:0" -#define BOB "/job:j/replica:0/task:0/gpu:0" +#define BOB "/job:j/replica:0/task:0/device:GPU:0" TEST_F(ExecutorTest, SimpleAdd) { // c = a + b diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_channel_test.cc b/tensorflow/core/distributed_runtime/rpc/grpc_channel_test.cc index c975563a21f..a17acc85b38 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_channel_test.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_channel_test.cc @@ -31,9 +31,9 @@ TEST(GrpcChannelTest, IsSameAddressSpace) { EXPECT_TRUE(IsSameAddrSp("/job:mnist/replica:10/task:10/cpu:0", "/job:mnist/replica:10/task:10/cpu:1")); EXPECT_TRUE(IsSameAddrSp("/job:mnist/replica:10/task:10/cpu:0", - "/job:mnist/replica:10/task:10/gpu:2")); + "/job:mnist/replica:10/task:10/device:GPU:2")); EXPECT_TRUE(IsSameAddrSp("/job:mnist/replica:10/task:10", - "/job:mnist/replica:10/task:10/gpu:2")); + "/job:mnist/replica:10/task:10/device:GPU:2")); EXPECT_TRUE(IsSameAddrSp("/job:mnist/replica:10/task:10/cpu:1", "/job:mnist/replica:10/task:10")); diff --git a/tensorflow/core/framework/node_def.proto b/tensorflow/core/framework/node_def.proto index d145fac8c14..53aa03108ab 100644 --- a/tensorflow/core/framework/node_def.proto +++ b/tensorflow/core/framework/node_def.proto @@ -38,8 +38,8 @@ message NodeDef { // | ( ("gpu" | "cpu") ":" ([1-9][0-9]* | "*") ) // // Valid values for this string include: - // * "/job:worker/replica:0/task:1/gpu:3" (full specification) - // * "/job:worker/gpu:3" (partial specification) + // * "/job:worker/replica:0/task:1/device:GPU:3" (full specification) + // * "/job:worker/device:GPU:3" (partial specification) // * "" (no specification) // // If the constraints do not resolve to a single device (or if this diff --git a/tensorflow/core/framework/rendezvous_test.cc b/tensorflow/core/framework/rendezvous_test.cc index fe37b16bb6c..32b8ad784d5 100644 --- a/tensorflow/core/framework/rendezvous_test.cc +++ b/tensorflow/core/framework/rendezvous_test.cc @@ -39,11 +39,11 @@ namespace { TEST(RendezvousTest, Key) { const string key = Rendezvous::CreateKey( "/job:mnist/replica:1/task:2/CPU:0", 7890, - "/job:mnist/replica:1/task:2/GPU:0", "var0", FrameAndIter(0, 0)); + "/job:mnist/replica:1/task:2/device:GPU:0", "var0", FrameAndIter(0, 0)); EXPECT_EQ(key, "/job:mnist/replica:1/task:2/CPU:0;" "0000000000001ed2;" // 7890 = 0x1ed2 - "/job:mnist/replica:1/task:2/GPU:0;" + "/job:mnist/replica:1/task:2/device:GPU:0;" "var0;" "0:0"); Rendezvous::ParsedKey parsed; @@ -51,12 +51,12 @@ TEST(RendezvousTest, Key) { EXPECT_EQ(parsed.src_device, "/job:mnist/replica:1/task:2/CPU:0"); EXPECT_EQ(parsed.src_incarnation, 7890); EXPECT_EQ(parsed.src.type, "CPU"); - EXPECT_EQ(parsed.dst_device, "/job:mnist/replica:1/task:2/GPU:0"); + EXPECT_EQ(parsed.dst_device, "/job:mnist/replica:1/task:2/device:GPU:0"); EXPECT_EQ(parsed.dst.type, "GPU"); EXPECT_FALSE(Rendezvous::ParseKey("foo;bar;baz", &parsed).ok()); EXPECT_FALSE(Rendezvous::ParseKey("/job:mnist/replica:1/task:2/CPU:0;" - "/job:mnist/replica:1/task:2/GPU:0;", + "/job:mnist/replica:1/task:2/device:GPU:0;", &parsed) .ok()); EXPECT_FALSE( @@ -99,7 +99,7 @@ string V(const Tensor& tensor) { Rendezvous::ParsedKey MakeKey(const string& name) { string s = Rendezvous::CreateKey("/job:mnist/replica:1/task:2/CPU:0", 7890, - "/job:mnist/replica:1/task:2/GPU:0", name, + "/job:mnist/replica:1/task:2/device:GPU:0", name, FrameAndIter(0, 0)); Rendezvous::ParsedKey k; TF_EXPECT_OK(Rendezvous::ParseKey(s, &k)); diff --git a/tensorflow/core/graph/graph_partition_test.cc b/tensorflow/core/graph/graph_partition_test.cc index 3c12ed2689e..d84c62d4546 100644 --- a/tensorflow/core/graph/graph_partition_test.cc +++ b/tensorflow/core/graph/graph_partition_test.cc @@ -50,7 +50,7 @@ extern Status TopologicalSortNodesWithTimePriority( namespace { -const char gpu_device[] = "/job:a/replica:0/task:0/gpu:0"; +const char gpu_device[] = "/job:a/replica:0/task:0/device:GPU:0"; string SplitByDevice(const Node* node) { return node->assigned_device_name(); } diff --git a/tensorflow/core/graph/mkl_layout_pass_test.cc b/tensorflow/core/graph/mkl_layout_pass_test.cc index efbe2134e0f..482e339802f 100644 --- a/tensorflow/core/graph/mkl_layout_pass_test.cc +++ b/tensorflow/core/graph/mkl_layout_pass_test.cc @@ -40,7 +40,7 @@ namespace tensorflow { namespace { const char kCPUDevice[] = "/job:a/replica:0/task:0/cpu:0"; -const char kGPUDevice[] = "/job:a/replica:0/task:0/gpu:0"; +const char kGPUDevice[] = "/job:a/replica:0/task:0/device:GPU:0"; static void InitGraph(const string& s, Graph* graph, const string& device = kCPUDevice) { diff --git a/tensorflow/core/grappler/clusters/single_machine.cc b/tensorflow/core/grappler/clusters/single_machine.cc index 3481b2b158d..1f95a9aa88c 100644 --- a/tensorflow/core/grappler/clusters/single_machine.cc +++ b/tensorflow/core/grappler/clusters/single_machine.cc @@ -89,7 +89,7 @@ Status SingleMachine::Provision() { VLOG(1) << "Number of GPUs: " << num_gpus_; for (int i = 0; i < num_gpus_; ++i) { string device_name = - strings::StrCat("/job:localhost/replica:0/task:0/gpu:", i); + strings::StrCat("/job:localhost/replica:0/task:0/device:GPU:", i); VLOG(1) << "Adding GPU device " << device_name; devices_[device_name] = GetLocalGPUInfo(i); } diff --git a/tensorflow/core/grappler/costs/analytical_cost_estimator_test.cc b/tensorflow/core/grappler/costs/analytical_cost_estimator_test.cc index 02156fbf580..d1f3e36aa81 100644 --- a/tensorflow/core/grappler/costs/analytical_cost_estimator_test.cc +++ b/tensorflow/core/grappler/costs/analytical_cost_estimator_test.cc @@ -42,7 +42,7 @@ class AnalyticalCostEstimatorTest : public ::testing::Test { gpu_device.set_frequency(1100); gpu_device.set_bandwidth(180 * 1024 * 1024); (*gpu_device.mutable_environment())["architecture"] = "6"; - devices["/job:localhost/replica:0/task:0/gpu:0"] = gpu_device; + devices["/job:localhost/replica:0/task:0/device:GPU:0"] = gpu_device; cluster_.reset(new VirtualCluster(devices)); } diff --git a/tensorflow/core/grappler/costs/virtual_placer_test.cc b/tensorflow/core/grappler/costs/virtual_placer_test.cc index 65a03fb5575..a16455cb703 100644 --- a/tensorflow/core/grappler/costs/virtual_placer_test.cc +++ b/tensorflow/core/grappler/costs/virtual_placer_test.cc @@ -30,14 +30,14 @@ TEST(VirtualPlacerTest, LocalDevices) { devices["/job:localhost/replica:0/task:0/cpu:0"] = cpu_device; DeviceProperties gpu_device; gpu_device.set_type("GPU"); - devices["/job:localhost/replica:0/task:0/gpu:0"] = gpu_device; + devices["/job:localhost/replica:0/task:0/device:GPU:0"] = gpu_device; VirtualCluster cluster(devices); VirtualPlacer placer(&cluster); NodeDef node; node.set_op("Conv2D"); EXPECT_EQ("GPU", placer.get_device(node).type()); - EXPECT_EQ("/job:localhost/replica:0/task:0/gpu:0", + EXPECT_EQ("/job:localhost/replica:0/task:0/device:GPU:0", placer.get_canonical_device_name(node)); node.set_device("CPU"); @@ -47,7 +47,7 @@ TEST(VirtualPlacerTest, LocalDevices) { node.set_device("GPU:0"); EXPECT_EQ("GPU", placer.get_device(node).type()); - EXPECT_EQ("/job:localhost/replica:0/task:0/gpu:0", + EXPECT_EQ("/job:localhost/replica:0/task:0/device:GPU:0", placer.get_canonical_device_name(node)); } @@ -60,7 +60,7 @@ TEST(VirtualPlacerTest, EmptyJobBecomesLocalhost) { devices["/job:localhost/replica:0/task:0/cpu:0"] = cpu_device; DeviceProperties gpu_device; gpu_device.set_type("GPU"); - devices["/job:localhost/replica:0/task:0/gpu:0"] = gpu_device; + devices["/job:localhost/replica:0/task:0/device:GPU:0"] = gpu_device; VirtualCluster cluster(devices); VirtualPlacer placer(&cluster); @@ -70,7 +70,7 @@ TEST(VirtualPlacerTest, EmptyJobBecomesLocalhost) { EXPECT_EQ("/job:localhost/replica:0/task:0/cpu:0", placer.get_canonical_device_name(node)); node.set_device("/device:GPU:0"); - EXPECT_EQ("/job:localhost/replica:0/task:0/gpu:0", + EXPECT_EQ("/job:localhost/replica:0/task:0/device:GPU:0", placer.get_canonical_device_name(node)); } @@ -113,7 +113,7 @@ TEST(VirtualPlacerTest, RemoteDevices) { devices["/job:my_job/replica:0/task:0/cpu:0"] = cpu_device; DeviceProperties gpu_device; gpu_device.set_type("GPU"); - devices["/job:my_job/replica:0/task:0/gpu:0"] = gpu_device; + devices["/job:my_job/replica:0/task:0/device:GPU:0"] = gpu_device; VirtualCluster cluster(devices); VirtualPlacer placer(&cluster); @@ -122,7 +122,7 @@ TEST(VirtualPlacerTest, RemoteDevices) { // Device falls back to GPU. EXPECT_EQ("GPU", placer.get_device(node).type()); - EXPECT_EQ("/job:my_job/replica:0/task:0/gpu:0", + EXPECT_EQ("/job:my_job/replica:0/task:0/device:GPU:0", placer.get_canonical_device_name(node)); node.set_device("/job:my_job/replica:0/task:0/cpu:0"); @@ -130,27 +130,27 @@ TEST(VirtualPlacerTest, RemoteDevices) { EXPECT_EQ("/job:my_job/replica:0/task:0/cpu:0", placer.get_canonical_device_name(node)); - node.set_device("/job:my_job/replica:0/task:0/gpu:0"); + node.set_device("/job:my_job/replica:0/task:0/device:GPU:0"); EXPECT_EQ("GPU", placer.get_device(node).type()); - EXPECT_EQ("/job:my_job/replica:0/task:0/gpu:0", + EXPECT_EQ("/job:my_job/replica:0/task:0/device:GPU:0", placer.get_canonical_device_name(node)); // There is no local cpu available. Device falls back to GPU. node.set_device("CPU"); EXPECT_EQ("GPU", placer.get_device(node).type()); - EXPECT_EQ("/job:my_job/replica:0/task:0/gpu:0", + EXPECT_EQ("/job:my_job/replica:0/task:0/device:GPU:0", placer.get_canonical_device_name(node)); node.set_device("GPU:0"); // There is no local GPU available. Fall back to default GPU. EXPECT_EQ("GPU", placer.get_device(node).type()); - EXPECT_EQ("/job:my_job/replica:0/task:0/gpu:0", + EXPECT_EQ("/job:my_job/replica:0/task:0/device:GPU:0", placer.get_canonical_device_name(node)); // This isn't a valid name. Fall back to GPU. node.set_device("/job:my_job/replica:0/task:0"); EXPECT_EQ("GPU", placer.get_device(node).type()); - EXPECT_EQ("/job:my_job/replica:0/task:0/gpu:0", + EXPECT_EQ("/job:my_job/replica:0/task:0/device:GPU:0", placer.get_canonical_device_name(node)); } diff --git a/tensorflow/core/grappler/optimizers/model_pruner_test.cc b/tensorflow/core/grappler/optimizers/model_pruner_test.cc index aea1fcd7c93..ee722f311ed 100644 --- a/tensorflow/core/grappler/optimizers/model_pruner_test.cc +++ b/tensorflow/core/grappler/optimizers/model_pruner_test.cc @@ -320,14 +320,14 @@ TEST_F(ModelPrunerTest, PruningPerservesCrossDeviceIdentity) { Output c = ops::Const(s.WithOpName("c").WithDevice("/cpu:0"), 0.0f, {10, 10}); // Node i1 should be preserved. - Output i1 = ops::Identity(s.WithOpName("i1").WithDevice("/gpu:0"), c); - Output a1 = ops::Sqrt(s.WithOpName("a1").WithDevice("/gpu:0"), {i1}); - Output a2 = ops::Sqrt(s.WithOpName("a2").WithDevice("/gpu:0"), {i1}); + Output i1 = ops::Identity(s.WithOpName("i1").WithDevice("/device:GPU:0"), c); + Output a1 = ops::Sqrt(s.WithOpName("a1").WithDevice("/device:GPU:0"), {i1}); + Output a2 = ops::Sqrt(s.WithOpName("a2").WithDevice("/device:GPU:0"), {i1}); // Node i2 should be pruned since it resides on the sender's device. Output i2 = ops::Identity(s.WithOpName("i2").WithDevice("/cpu:0"), c); - Output a3 = ops::Sqrt(s.WithOpName("a3").WithDevice("/gpu:0"), {i2}); - Output a4 = ops::Sqrt(s.WithOpName("a4").WithDevice("/gpu:0"), {i2}); + Output a3 = ops::Sqrt(s.WithOpName("a3").WithDevice("/device:GPU:0"), {i2}); + Output a4 = ops::Sqrt(s.WithOpName("a4").WithDevice("/device:GPU:0"), {i2}); GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); diff --git a/tensorflow/core/platform/default/gpu_tracer.cc b/tensorflow/core/platform/default/gpu_tracer.cc index 50c27b3cf6b..3f855461276 100644 --- a/tensorflow/core/platform/default/gpu_tracer.cc +++ b/tensorflow/core/platform/default/gpu_tracer.cc @@ -579,8 +579,8 @@ Status GPUTracerImpl::Collect(StepStatsCollector *collector) { // TODO(pbar) Handle device IDs and prefix properly. const string prefix = ""; const int id = 0; - const string stream_device = strings::StrCat(prefix, "/gpu:", id, "/stream:"); - const string memcpy_device = strings::StrCat(prefix, "/gpu:", id, "/memcpy"); + const string stream_device = strings::StrCat(prefix, "/device:GPU:", id, "/stream:"); + const string memcpy_device = strings::StrCat(prefix, "/device:GPU:", id, "/memcpy"); mutex_lock l2(trace_mu_); for (const auto &rec : kernel_records_) { diff --git a/tensorflow/core/platform/gpu_tracer_test.cc b/tensorflow/core/platform/gpu_tracer_test.cc index 713282c1fd8..f6c2c6cb379 100644 --- a/tensorflow/core/platform/gpu_tracer_test.cc +++ b/tensorflow/core/platform/gpu_tracer_test.cc @@ -63,12 +63,12 @@ class GPUTracerTest : public ::testing::Test { Tensor x_tensor(DT_FLOAT, TensorShape({2, 1})); test::FillValues(&x_tensor, {1, 1}); Node* x = test::graph::Constant(&graph, x_tensor); - x->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0"); + x->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0"); x_ = x->name(); // y = A * x Node* y = test::graph::Matmul(&graph, a, x, false, false); - y->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0"); + y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0"); y_ = y->name(); // Use an Identity op to force a memcpy to CPU and back to GPU. @@ -77,7 +77,7 @@ class GPUTracerTest : public ::testing::Test { Node* y_neg = test::graph::Unary(&graph, "Neg", i); y_neg_ = y_neg->name(); - y_neg->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0"); + y_neg->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0"); test::graph::ToGraphDef(&graph, &def_); } diff --git a/tensorflow/core/profiler/README.md b/tensorflow/core/profiler/README.md index 6db38a59aef..06118e6eb21 100644 --- a/tensorflow/core/profiler/README.md +++ b/tensorflow/core/profiler/README.md @@ -127,10 +127,10 @@ tfprof> advise Not running under xxxx. Skip JobChecker. AcceleratorUtilizationChecker: -device: /job:worker/replica:0/task:0/gpu:0 low utilization: 0.03 -device: /job:worker/replica:0/task:0/gpu:1 low utilization: 0.08 -device: /job:worker/replica:0/task:0/gpu:2 low utilization: 0.04 -device: /job:worker/replica:0/task:0/gpu:3 low utilization: 0.21 +device: /job:worker/replica:0/task:0/device:GPU:0 low utilization: 0.03 +device: /job:worker/replica:0/task:0/device:GPU:1 low utilization: 0.08 +device: /job:worker/replica:0/task:0/device:GPU:2 low utilization: 0.04 +device: /job:worker/replica:0/task:0/device:GPU:3 low utilization: 0.21 OperationChecker: Found operation using NHWC data_format on GPU. Maybe NCHW is faster. diff --git a/tensorflow/core/profiler/g3doc/advise.md b/tensorflow/core/profiler/g3doc/advise.md index cc16c8fdffd..d87b0d8603d 100644 --- a/tensorflow/core/profiler/g3doc/advise.md +++ b/tensorflow/core/profiler/g3doc/advise.md @@ -31,10 +31,10 @@ tfprof --graph_path=graph.pbtxt \ tfprof> advise AcceleratorUtilizationChecker: -device: /job:worker/replica:0/task:0/gpu:0 low utilization: 0.03 -device: /job:worker/replica:0/task:0/gpu:1 low utilization: 0.08 -device: /job:worker/replica:0/task:0/gpu:2 low utilization: 0.04 -device: /job:worker/replica:0/task:0/gpu:3 low utilization: 0.21 +device: /job:worker/replica:0/task:0/device:GPU:0 low utilization: 0.03 +device: /job:worker/replica:0/task:0/device:GPU:1 low utilization: 0.08 +device: /job:worker/replica:0/task:0/device:GPU:2 low utilization: 0.04 +device: /job:worker/replica:0/task:0/device:GPU:3 low utilization: 0.21 OperationChecker: Found operation using NHWC data_format on GPU. Maybe NCHW is faster. diff --git a/tensorflow/core/profiler/g3doc/profile_time.md b/tensorflow/core/profiler/g3doc/profile_time.md index db555b36174..e11a75553b2 100644 --- a/tensorflow/core/profiler/g3doc/profile_time.md +++ b/tensorflow/core/profiler/g3doc/profile_time.md @@ -134,7 +134,7 @@ AddN 50.10ms (17.33%, 1.34%), 5481 tfprof> op -select micros,device -order_by micros node name | execution time | assigned devices SoftmaxCrossEntropyWithLogits 1.37sec (100.00%, 36.44%), /job:worker/replica:0/task:0/cpu:0 -MatMul 618.97ms (63.56%, 16.51%), |/job:worker/replica:0/task:0/cpu:0|/job:worker/replica:0/task:0/gpu:0|/job:worker/replica:0/task:0/gpu:1|/job:worker/replica:0/task:0/gpu:2|/job:worker/replica:0/task:0/gpu:3 +MatMul 618.97ms (63.56%, 16.51%), |/job:worker/replica:0/task:0/cpu:0|/job:worker/replica:0/task:0/device:GPU:0|/job:worker/replica:0/task:0/device:GPU:1|/job:worker/replica:0/task:0/device:GPU:2|/job:worker/replica:0/task:0/device:GPU:3 ``` diff --git a/tensorflow/core/profiler/internal/advisor/tfprof_advisor_test.cc b/tensorflow/core/profiler/internal/advisor/tfprof_advisor_test.cc index 096c1d915ca..23ed287f7bb 100644 --- a/tensorflow/core/profiler/internal/advisor/tfprof_advisor_test.cc +++ b/tensorflow/core/profiler/internal/advisor/tfprof_advisor_test.cc @@ -53,10 +53,10 @@ class TFProfAdvisorTest : public ::testing::Test { NodeExecStats node_stat; node_stat.set_all_start_micros(start_miros); node_stat.set_op_end_rel_micros(end_rel_micros); - node->AddStepStat(step, "/job:localhost/replica:0/task:0/gpu:0", node_stat); - node->AddStepStat(step, "/job:localhost/replica:0/task:0/gpu:0:stream:all", + node->AddStepStat(step, "/job:localhost/replica:0/task:0/device:GPU:0", node_stat); + node->AddStepStat(step, "/job:localhost/replica:0/task:0/device:GPU:0:stream:all", node_stat); - node->AddStepStat(step, "/job:localhost/replica:0/task:0/gpu:0:stream:0", + node->AddStepStat(step, "/job:localhost/replica:0/task:0/device:GPU:0:stream:0", node_stat); return node; } diff --git a/tensorflow/core/profiler/internal/tfprof_node.cc b/tensorflow/core/profiler/internal/tfprof_node.cc index 70b91c37e4b..d4a784ffaa6 100644 --- a/tensorflow/core/profiler/internal/tfprof_node.cc +++ b/tensorflow/core/profiler/internal/tfprof_node.cc @@ -25,7 +25,7 @@ bool CountAsAcceleratorTime(const string& device) { } bool CountAsCPUTime(const string& device) { - return RE2::FullMatch(device, ".*/(gpu|cpu|device:sycl):\\d+"); + return RE2::FullMatch(device, ".*/(device:gpu|gpu|cpu|device:sycl):\\d+"); } bool IsCanonicalDevice(const string& device) { return CountAsCPUTime(device); } @@ -143,7 +143,7 @@ void TFGraphNode::AddStepStat(int64 step, const string& device, // TODO(xpan): Make this more robust? // See run_metadata_test.py - // It can be /job:0/replica:0/xxxx/gpu:0, or simply /gpu:0. + // It can be /job:0/replica:0/xxxx/device:GPU:0, or simply /device:GPU:0. // It can has some ad-hoc suffix, such as /stream:xx or /memcpy:xx. if (IsCanonicalDevice(dev)) { if (!canonical_device_.empty()) { diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto index 69311e3a7f3..56bb709e119 100644 --- a/tensorflow/core/protobuf/config.proto +++ b/tensorflow/core/protobuf/config.proto @@ -42,7 +42,7 @@ message GPUOptions { // A comma-separated list of GPU ids that determines the 'visible' // to 'virtual' mapping of GPU devices. For example, if TensorFlow // can see 8 GPU devices in the process, and one wanted to map - // visible GPU devices 5 and 3 as "/gpu:0", and "/gpu:1", then one + // visible GPU devices 5 and 3 as "/device:GPU:0", and "/device:GPU:1", then one // would specify this field as "5,3". This field is similar in // spirit to the CUDA_VISIBLE_DEVICES environment variable, except // it applies to the visible GPU devices in the process. diff --git a/tensorflow/core/util/device_name_utils_test.cc b/tensorflow/core/util/device_name_utils_test.cc index 008100aa446..9a3f8849a65 100644 --- a/tensorflow/core/util/device_name_utils_test.cc +++ b/tensorflow/core/util/device_name_utils_test.cc @@ -76,21 +76,21 @@ TEST(DeviceNameUtilsTest, Basic) { DeviceNameUtils::ParsedName p; EXPECT_FALSE(DeviceNameUtils::ParseFullName("foobar", &p)); EXPECT_FALSE( - DeviceNameUtils::ParseFullName("/job:123/replica:1/task:2/gpu:3", &p)); + DeviceNameUtils::ParseFullName("/job:123/replica:1/task:2/device:GPU:3", &p)); EXPECT_FALSE( DeviceNameUtils::ParseFullName("/job:123/replica:1/task:2/gpu:", &p)); EXPECT_FALSE(DeviceNameUtils::ParseFullName( "/job:123/replica:1/task:2/device:gpu:", &p)); EXPECT_FALSE( - DeviceNameUtils::ParseFullName("/job:foo/replica:-1/task:2/gpu:3", &p)); + DeviceNameUtils::ParseFullName("/job:foo/replica:-1/task:2/device:GPU:3", &p)); EXPECT_FALSE( - DeviceNameUtils::ParseFullName("/job:foo/replica:1/task:-2/gpu:3", &p)); + DeviceNameUtils::ParseFullName("/job:foo/replica:1/task:-2/device:GPU:3", &p)); EXPECT_FALSE( DeviceNameUtils::ParseFullName("/job:foo/replica:1/task:2/bar:3", &p)); EXPECT_FALSE(DeviceNameUtils::ParseFullName( - "/job:foo/replica:1/task:2/gpu:3/extra", &p)); + "/job:foo/replica:1/task:2/device:GPU:3/extra", &p)); EXPECT_TRUE( - DeviceNameUtils::ParseFullName("/job:foo/replica:1/task:2/gpu:3", &p)); + DeviceNameUtils::ParseFullName("/job:foo/replica:1/task:2/device:GPU:3", &p)); EXPECT_TRUE(p.has_job); EXPECT_TRUE(p.has_replica); EXPECT_TRUE(p.has_task); @@ -106,7 +106,7 @@ TEST(DeviceNameUtilsTest, Basic) { // Allow _ in job names. DeviceNameUtils::ParsedName p; EXPECT_TRUE(DeviceNameUtils::ParseFullName( - "/job:foo_bar/replica:1/task:2/gpu:3", &p)); + "/job:foo_bar/replica:1/task:2/device:GPU:3", &p)); EXPECT_TRUE(p.has_job); EXPECT_TRUE(p.has_replica); EXPECT_TRUE(p.has_task); @@ -193,7 +193,7 @@ TEST(DeviceNameUtilsTest, Basic) { } { DeviceNameUtils::ParsedName p; - EXPECT_TRUE(DeviceNameUtils::ParseFullName("/job:*/replica:4/gpu:5", &p)); + EXPECT_TRUE(DeviceNameUtils::ParseFullName("/job:*/replica:4/device:GPU:5", &p)); EXPECT_FALSE(p.has_job); EXPECT_TRUE(p.has_replica); EXPECT_FALSE(p.has_task); @@ -216,13 +216,13 @@ TEST(DeviceNameUtilsTest, Basic) { } EXPECT_TRUE(DeviceNameUtils::IsSameAddressSpace( - "/job:foo/replica:1/task:2/cpu:3", "/job:foo/replica:1/task:2/gpu:4")); + "/job:foo/replica:1/task:2/cpu:3", "/job:foo/replica:1/task:2/device:GPU:4")); EXPECT_FALSE(DeviceNameUtils::IsSameAddressSpace( - "/job:foo/replica:1/task:2/cpu:3", "/job:foo/replica:1/task:3/gpu:4")); + "/job:foo/replica:1/task:2/cpu:3", "/job:foo/replica:1/task:3/device:GPU:4")); EXPECT_FALSE(DeviceNameUtils::IsSameAddressSpace( - "/job:foo/replica:1/task:2/cpu:3", "/job:foo/replica:10/task:2/gpu:4")); + "/job:foo/replica:1/task:2/cpu:3", "/job:foo/replica:10/task:2/device:GPU:4")); EXPECT_FALSE(DeviceNameUtils::IsSameAddressSpace( - "/job:foo/replica:1/task:2/cpu:3", "/job:bar/replica:1/task:2/gpu:4")); + "/job:foo/replica:1/task:2/cpu:3", "/job:bar/replica:1/task:2/device:GPU:4")); EXPECT_EQ(DeviceNameUtils::LocalName("CPU", 1), "CPU:1"); EXPECT_EQ(DeviceNameUtils::LocalName("GPU", 2), "GPU:2"); @@ -284,17 +284,17 @@ static bool IsCSHelper(StringPiece pattern, StringPiece actual) { } TEST(DeviceNameUtilsTest, IsCompleteSpecification) { - EXPECT_TRUE(IsCSHelper("/job:*", "/job:work/replica:1/task:2/gpu:3")); + EXPECT_TRUE(IsCSHelper("/job:*", "/job:work/replica:1/task:2/device:GPU:3")); EXPECT_TRUE( - IsCSHelper("/job:*/replica:*", "/job:work/replica:1/task:2/gpu:3")); - EXPECT_TRUE(IsCSHelper("/job:*/task:*", "/job:work/replica:1/task:2/gpu:3")); + IsCSHelper("/job:*/replica:*", "/job:work/replica:1/task:2/device:GPU:3")); + EXPECT_TRUE(IsCSHelper("/job:*/task:*", "/job:work/replica:1/task:2/device:GPU:3")); EXPECT_TRUE(IsCSHelper("/job:*/replica:*/task:*", - "/job:work/replica:1/task:2/gpu:3")); + "/job:work/replica:1/task:2/device:GPU:3")); EXPECT_TRUE( - IsCSHelper("/job:*/replica:*/gpu:*", "/job:work/replica:1/task:2/gpu:3")); - EXPECT_FALSE(IsCSHelper("/cpu:*", "/job:worker/replica:1/task:2/gpu:3")); - EXPECT_FALSE(IsCSHelper("/gpu:2", "/job:worker/replica:1/task:2/gpu:1")); - EXPECT_TRUE(IsCSHelper("/gpu:*", "/job:worker/replica:1/task:2/gpu:3")); + IsCSHelper("/job:*/replica:*/gpu:*", "/job:work/replica:1/task:2/device:GPU:3")); + EXPECT_FALSE(IsCSHelper("/cpu:*", "/job:worker/replica:1/task:2/device:GPU:3")); + EXPECT_FALSE(IsCSHelper("/device:GPU:2", "/job:worker/replica:1/task:2/device:GPU:1")); + EXPECT_TRUE(IsCSHelper("/gpu:*", "/job:worker/replica:1/task:2/device:GPU:3")); } static bool IsSpecHelper(StringPiece pattern, StringPiece actual) { @@ -305,36 +305,36 @@ static bool IsSpecHelper(StringPiece pattern, StringPiece actual) { } TEST(DeviceNameUtilsTest, IsSpecification) { - EXPECT_TRUE(IsSpecHelper("/job:*", "/job:work/replica:1/task:2/gpu:3")); - EXPECT_TRUE(IsSpecHelper("/job:*", "/job:work/replica:1/gpu:3")); + EXPECT_TRUE(IsSpecHelper("/job:*", "/job:work/replica:1/task:2/device:GPU:3")); + EXPECT_TRUE(IsSpecHelper("/job:*", "/job:work/replica:1/device:GPU:3")); EXPECT_TRUE(IsSpecHelper("/job:*", "/job:work/replica:1")); EXPECT_TRUE(IsSpecHelper("/job:*", "/replica:1")); EXPECT_TRUE(IsSpecHelper("/job:*", "/job:work")); EXPECT_TRUE( - IsSpecHelper("/job:*/replica:*", "/job:work/replica:1/task:2/gpu:3")); + IsSpecHelper("/job:*/replica:*", "/job:work/replica:1/task:2/device:GPU:3")); EXPECT_TRUE(IsSpecHelper("/job:work/replica:1/gpu:*", - "/job:work/replica:1/task:2/gpu:3")); - EXPECT_TRUE(IsSpecHelper("/job:work/replica:1/gpu:3", - "/job:work/replica:1/task:2/gpu:3")); + "/job:work/replica:1/task:2/device:GPU:3")); + EXPECT_TRUE(IsSpecHelper("/job:work/replica:1/device:GPU:3", + "/job:work/replica:1/task:2/device:GPU:3")); EXPECT_TRUE(IsSpecHelper("/job:work/replica:1/task:2", - "/job:work/replica:1/task:2/gpu:3")); + "/job:work/replica:1/task:2/device:GPU:3")); EXPECT_TRUE(IsSpecHelper("/job:work/replica:*/task:2", - "/job:work/replica:1/task:2/gpu:3")); - EXPECT_TRUE(IsSpecHelper("/task:*", "/job:*/replica:1/task:2/gpu:3")); - EXPECT_TRUE(IsSpecHelper("/task:2", "/job:*/replica:1/task:2/gpu:3")); + "/job:work/replica:1/task:2/device:GPU:3")); + EXPECT_TRUE(IsSpecHelper("/task:*", "/job:*/replica:1/task:2/device:GPU:3")); + EXPECT_TRUE(IsSpecHelper("/task:2", "/job:*/replica:1/task:2/device:GPU:3")); EXPECT_TRUE(IsSpecHelper("/cpu:*", "/job:*/replica:1/task:2/cpu:1")); EXPECT_TRUE(IsSpecHelper("/cpu:0", "/cpu:0")); - EXPECT_TRUE(IsSpecHelper("/gpu:*", "/job:worker/replica:1/task:2/gpu:3")); + EXPECT_TRUE(IsSpecHelper("/gpu:*", "/job:worker/replica:1/task:2/device:GPU:3")); - EXPECT_FALSE(IsSpecHelper("/job:worker/replica:1/task:2/gpu:3", "/gpu:*")); + EXPECT_FALSE(IsSpecHelper("/job:worker/replica:1/task:2/device:GPU:3", "/gpu:*")); EXPECT_FALSE(IsSpecHelper("/cpu:*", "/job:*/replica:1/task:2")); - EXPECT_FALSE(IsSpecHelper("/cpu:*", "/job:*/replica:1/task:2/gpu:1")); - EXPECT_FALSE(IsSpecHelper("/cpu:*", "/job:worker/replica:1/task:2/gpu:3")); - EXPECT_FALSE(IsSpecHelper("/gpu:2", "/job:worker/replica:1/task:2/gpu:1")); + EXPECT_FALSE(IsSpecHelper("/cpu:*", "/job:*/replica:1/task:2/device:GPU:1")); + EXPECT_FALSE(IsSpecHelper("/cpu:*", "/job:worker/replica:1/task:2/device:GPU:3")); + EXPECT_FALSE(IsSpecHelper("/device:GPU:2", "/job:worker/replica:1/task:2/device:GPU:1")); EXPECT_FALSE(IsSpecHelper("/job:work/replica:*/task:0", - "/job:work/replica:1/task:2/gpu:3")); + "/job:work/replica:1/task:2/device:GPU:3")); EXPECT_FALSE(IsSpecHelper("/job:work/replica:0/task:2", - "/job:work/replica:*/task:2/gpu:3")); + "/job:work/replica:*/task:2/device:GPU:3")); } TEST(DeviceNameUtilsTest, SplitDeviceName) { @@ -348,7 +348,7 @@ TEST(DeviceNameUtilsTest, SplitDeviceName) { "/job:foo/cpu:1/task:2/replica:1", &task, &device)); EXPECT_EQ("/job:foo/replica:1/task:2", task); EXPECT_EQ("CPU:1", device); - EXPECT_TRUE(DeviceNameUtils::SplitDeviceName("/gpu:3", &task, &device)); + EXPECT_TRUE(DeviceNameUtils::SplitDeviceName("/device:GPU:3", &task, &device)); EXPECT_EQ("", task); EXPECT_EQ("GPU:3", device); EXPECT_FALSE(DeviceNameUtils::SplitDeviceName("gpu:3", &task, &device)); @@ -413,11 +413,11 @@ TEST(DeviceNameUtilsTest, MergeDevNames) { MergeDevNamesHelper("", "/job:foo", "/job:foo"); MergeDevNamesHelper("", "/replica:2", "/replica:2"); MergeDevNamesHelper("", "/task:7", "/task:7"); - // MergeDevNamesHelper("", "/gpu:1", "/gpu:1"); + // MergeDevNamesHelper("", "/device:GPU:1", "/device:GPU:1"); // Combining disjoint names. MergeDevNamesHelper("/job:foo", "/task:7", "/job:foo/task:7"); - MergeDevNamesHelper("/job:foo", "/gpu:1", "/job:foo/gpu:1"); + MergeDevNamesHelper("/job:foo", "/device:GPU:1", "/job:foo/device:GPU:1"); // Combining overlapping names. MergeDevNamesHelper("/job:foo/replica:0", "/replica:0/task:1", @@ -426,25 +426,25 @@ TEST(DeviceNameUtilsTest, MergeDevNames) { // Wildcard tests. MergeDevNamesHelper("", "/gpu:*", "/gpu:*"); MergeDevNamesHelper("/gpu:*", "/gpu:*", "/gpu:*"); - MergeDevNamesHelper("/gpu:1", "/gpu:*", "/gpu:1"); + MergeDevNamesHelper("/device:GPU:1", "/gpu:*", "/device:GPU:1"); // Incompatible components. MergeDevNamesError("/job:foo", "/job:bar", "incompatible jobs"); MergeDevNamesError("/replica:0", "/replica:1", "incompatible replicas"); MergeDevNamesError("/task:0", "/task:1", "incompatible tasks"); MergeDevNamesError("/gpu:*", "/cpu:*", "incompatible types"); - MergeDevNamesError("/gpu:0", "/gpu:1", "incompatible ids"); + MergeDevNamesError("/device:GPU:0", "/device:GPU:1", "incompatible ids"); } TEST(DeviceNameUtilsTest, MergeDevNamesAllowSoftPlacement) { // Incompatible components with allow_soft_placement. MergeDevNamesHelperAllowSoftPlacement("/gpu:*", "/cpu:1", ""); - MergeDevNamesHelperAllowSoftPlacement("/cpu:*", "/gpu:1", ""); - MergeDevNamesHelperAllowSoftPlacement("/gpu:1", "/gpu:2", "/gpu:*"); + MergeDevNamesHelperAllowSoftPlacement("/cpu:*", "/device:GPU:1", ""); + MergeDevNamesHelperAllowSoftPlacement("/device:GPU:1", "/device:GPU:2", "/device:GPU:*"); } TEST(DeviceNameUtilsTest, GetNamesForDeviceMappings) { - DeviceNameUtils::ParsedName p = Name("/job:foo/replica:10/task:0/gpu:1"); + DeviceNameUtils::ParsedName p = Name("/job:foo/replica:10/task:0/device:GPU:1"); EXPECT_EQ(str_util::Join(DeviceNameUtils::GetNamesForDeviceMappings(p), ","), "/job:foo/replica:10/task:0/device:GPU:1," "/job:foo/replica:10/task:0/gpu:1"); diff --git a/tensorflow/docs_src/api_guides/python/contrib.seq2seq.md b/tensorflow/docs_src/api_guides/python/contrib.seq2seq.md index b56a4884b4c..496d43dfd7e 100644 --- a/tensorflow/docs_src/api_guides/python/contrib.seq2seq.md +++ b/tensorflow/docs_src/api_guides/python/contrib.seq2seq.md @@ -73,12 +73,12 @@ other wrappers and the dynamic decoder described below. For example, one can write: ```python -cell = tf.contrib.rnn.DeviceWrapper(LSTMCell(512), "/gpu:0") +cell = tf.contrib.rnn.DeviceWrapper(LSTMCell(512), "/device:GPU:0") attention_mechanism = tf.contrib.seq2seq.LuongAttention(512, encoder_outputs) attn_cell = tf.contrib.seq2seq.AttentionWrapper( cell, attention_mechanism, attention_size=256) -attn_cell = tf.contrib.rnn.DeviceWrapper(attn_cell, "/gpu:1") -top_cell = tf.contrib.rnn.DeviceWrapper(LSTMCell(512), "/gpu:1") +attn_cell = tf.contrib.rnn.DeviceWrapper(attn_cell, "/device:GPU:1") +top_cell = tf.contrib.rnn.DeviceWrapper(LSTMCell(512), "/device:GPU:1") multi_cell = MultiRNNCell([attn_cell, top_cell]) ``` diff --git a/tensorflow/docs_src/programmers_guide/variables.md b/tensorflow/docs_src/programmers_guide/variables.md index dd18760e1dd..b265dbbe3e1 100644 --- a/tensorflow/docs_src/programmers_guide/variables.md +++ b/tensorflow/docs_src/programmers_guide/variables.md @@ -110,7 +110,7 @@ devices. For example, the following snippet creates a variable named `v` and places it on the second GPU device: ``` python -with tf.device("/gpu:1"): +with tf.device("/device:GPU:1"): v = tf.get_variable("v", [1]) ``` diff --git a/tensorflow/docs_src/tutorials/deep_cnn.md b/tensorflow/docs_src/tutorials/deep_cnn.md index a9e9dda12b9..de3f8338d30 100644 --- a/tensorflow/docs_src/tutorials/deep_cnn.md +++ b/tensorflow/docs_src/tutorials/deep_cnn.md @@ -411,7 +411,7 @@ the first tower are prepended with `tower_0`, e.g. `tower_0/conv1/Conv2D`. * A preferred hardware device to run the operation within a tower. @{tf.device} specifies this. For -instance, all operations in the first tower reside within `device('/gpu:0')` +instance, all operations in the first tower reside within `device('/device:GPU:0')` scope indicating that they should be run on the first GPU. All variables are pinned to the CPU and accessed via diff --git a/tensorflow/docs_src/tutorials/using_gpu.md b/tensorflow/docs_src/tutorials/using_gpu.md index dcec62d2749..b6edbe33451 100644 --- a/tensorflow/docs_src/tutorials/using_gpu.md +++ b/tensorflow/docs_src/tutorials/using_gpu.md @@ -7,8 +7,8 @@ supported device types are `CPU` and `GPU`. They are represented as `strings`. For example: * `"/cpu:0"`: The CPU of your machine. -* `"/gpu:0"`: The GPU of your machine, if you have one. -* `"/gpu:1"`: The second GPU of your machine, etc. +* `"/device:GPU:0"`: The GPU of your machine, if you have one. +* `"/device:GPU:1"`: The second GPU of your machine, etc. If a TensorFlow operation has both CPU and GPU implementations, the GPU devices will be given priority when the operation is assigned to a device. For example, @@ -35,11 +35,11 @@ You should see the following output: ``` Device mapping: -/job:localhost/replica:0/task:0/gpu:0 -> device: 0, name: Tesla K40c, pci bus +/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: Tesla K40c, pci bus id: 0000:05:00.0 -b: /job:localhost/replica:0/task:0/gpu:0 -a: /job:localhost/replica:0/task:0/gpu:0 -MatMul: /job:localhost/replica:0/task:0/gpu:0 +b: /job:localhost/replica:0/task:0/device:GPU:0 +a: /job:localhost/replica:0/task:0/device:GPU:0 +MatMul: /job:localhost/replica:0/task:0/device:GPU:0 [[ 22. 28.] [ 49. 64.]] @@ -71,11 +71,11 @@ example) and automatically copy tensors between devices if required. ``` Device mapping: -/job:localhost/replica:0/task:0/gpu:0 -> device: 0, name: Tesla K40c, pci bus +/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: Tesla K40c, pci bus id: 0000:05:00.0 b: /job:localhost/replica:0/task:0/cpu:0 a: /job:localhost/replica:0/task:0/cpu:0 -MatMul: /job:localhost/replica:0/task:0/gpu:0 +MatMul: /job:localhost/replica:0/task:0/device:GPU:0 [[ 22. 28.] [ 49. 64.]] ``` @@ -127,7 +127,7 @@ to specify the preference explicitly: ```python # Creates a graph. -with tf.device('/gpu:2'): +with tf.device('/device:GPU:2'): a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a') b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b') c = tf.matmul(a, b) @@ -142,9 +142,9 @@ If the device you have specified does not exist, you will get ``` InvalidArgumentError: Invalid argument: Cannot assign a device to node 'b': -Could not satisfy explicit device specification '/gpu:2' +Could not satisfy explicit device specification '/device:GPU:2' [[Node: b = Const[dtype=DT_FLOAT, value=Tensor, _device="/gpu:2"]()]] + values: 1 2 3...>, _device="/device:GPU:2"]()]] ``` If you would like TensorFlow to automatically choose an existing and supported @@ -154,7 +154,7 @@ the session. ```python # Creates a graph. -with tf.device('/gpu:2'): +with tf.device('/device:GPU:2'): a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a') b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b') c = tf.matmul(a, b) @@ -175,7 +175,7 @@ For example: ``` # Creates a graph. c = [] -for d in ['/gpu:2', '/gpu:3']: +for d in ['/device:GPU:2', '/device:GPU:3']: with tf.device(d): a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3]) b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2]) @@ -192,20 +192,20 @@ You will see the following output. ``` Device mapping: -/job:localhost/replica:0/task:0/gpu:0 -> device: 0, name: Tesla K20m, pci bus +/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: Tesla K20m, pci bus id: 0000:02:00.0 -/job:localhost/replica:0/task:0/gpu:1 -> device: 1, name: Tesla K20m, pci bus +/job:localhost/replica:0/task:0/device:GPU:1 -> device: 1, name: Tesla K20m, pci bus id: 0000:03:00.0 -/job:localhost/replica:0/task:0/gpu:2 -> device: 2, name: Tesla K20m, pci bus +/job:localhost/replica:0/task:0/device:GPU:2 -> device: 2, name: Tesla K20m, pci bus id: 0000:83:00.0 -/job:localhost/replica:0/task:0/gpu:3 -> device: 3, name: Tesla K20m, pci bus +/job:localhost/replica:0/task:0/device:GPU:3 -> device: 3, name: Tesla K20m, pci bus id: 0000:84:00.0 -Const_3: /job:localhost/replica:0/task:0/gpu:3 -Const_2: /job:localhost/replica:0/task:0/gpu:3 -MatMul_1: /job:localhost/replica:0/task:0/gpu:3 -Const_1: /job:localhost/replica:0/task:0/gpu:2 -Const: /job:localhost/replica:0/task:0/gpu:2 -MatMul: /job:localhost/replica:0/task:0/gpu:2 +Const_3: /job:localhost/replica:0/task:0/device:GPU:3 +Const_2: /job:localhost/replica:0/task:0/device:GPU:3 +MatMul_1: /job:localhost/replica:0/task:0/device:GPU:3 +Const_1: /job:localhost/replica:0/task:0/device:GPU:2 +Const: /job:localhost/replica:0/task:0/device:GPU:2 +MatMul: /job:localhost/replica:0/task:0/device:GPU:2 AddN: /job:localhost/replica:0/task:0/cpu:0 [[ 44. 56.] [ 98. 128.]] diff --git a/tensorflow/examples/learn/multiple_gpu.py b/tensorflow/examples/learn/multiple_gpu.py index c7364d1f720..a294950a386 100644 --- a/tensorflow/examples/learn/multiple_gpu.py +++ b/tensorflow/examples/learn/multiple_gpu.py @@ -47,12 +47,12 @@ def my_model(features, labels, mode): # Create three fully connected layers respectively of size 10, 20, and 10 with # each layer having a dropout probability of 0.1. net = features[X_FEATURE] - with tf.device('/gpu:1'): + with tf.device('/device:GPU:1'): for units in [10, 20, 10]: net = tf.layers.dense(net, units=units, activation=tf.nn.relu) net = tf.layers.dropout(net, rate=0.1) - with tf.device('/gpu:2'): + with tf.device('/device:GPU:2'): # Compute logits (1 per class). logits = tf.layers.dense(net, 3, activation=None) diff --git a/tensorflow/python/client/session_clusterspec_prop_test.py b/tensorflow/python/client/session_clusterspec_prop_test.py index 6a89755bbda..b77912b4f74 100644 --- a/tensorflow/python/client/session_clusterspec_prop_test.py +++ b/tensorflow/python/client/session_clusterspec_prop_test.py @@ -173,7 +173,7 @@ class SessionClusterSpecPropagationTest(test_util.TensorFlowTestCase): # # W0718 17:14:41.521534 190121 device_mgr.cc:107] Unknown device: # /job:worker/replica:0/task:0/device:CPU:0 all devices: - # /job:local/replica:0/task:0/gpu:0, + # /job:local/replica:0/task:0/device:GPU:0, # /job:local/replica:0/task:0/device:GPU:0, # /job:local/replica:0/task:0/cpu:1, CPU:0, GPU:0, # /job:local/replica:0/task:0/device:CPU:1, @@ -198,7 +198,7 @@ class SessionClusterSpecPropagationTest(test_util.TensorFlowTestCase): sum1 = input1 + input2 if test.is_gpu_available(): - device_str = '/job:worker/task:0/gpu:0' + device_str = '/job:worker/task:0/device:GPU:0' else: device_str = '/job:worker/task:0/cpu:1' with ops.device(device_str): diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py index 15e7ae18bb0..b4f0fd6f404 100644 --- a/tensorflow/python/client/session_test.py +++ b/tensorflow/python/client/session_test.py @@ -1124,7 +1124,7 @@ class SessionTest(test_util.TensorFlowTestCase): # which is why placing this is invalid. If at some point # GPU kernels are added to this test, some other different # op / device combo should be chosen. - with ops.device('/gpu:0'): + with ops.device('/device:GPU:0'): a = constant_op.constant(1.0, shape=[1, 2]) b = constant_op.constant(1.0, shape=[1, 2]) @@ -1145,7 +1145,7 @@ class SessionTest(test_util.TensorFlowTestCase): # which is why placing this is invalid. If at some point # GPU kernels are added to this test, some other different # op / device combo should be chosen. - with ops.device('/gpu:0'): + with ops.device('/device:GPU:0'): _ = constant_op.constant(1.0, shape=[1, 2]) b = constant_op.constant(1.0, shape=[1, 2]) @@ -1494,7 +1494,7 @@ class SessionTest(test_util.TensorFlowTestCase): allow_soft_placement=True, graph_options=config_pb2.GraphOptions(build_cost_model=100)) with session.Session(config=config) as sess: - with ops.device('/gpu:0'): + with ops.device('/device:GPU:0'): a = array_ops.placeholder(dtypes.float32, shape=[]) b = math_ops.add(a, a) c = array_ops.identity(b) diff --git a/tensorflow/python/client/timeline_test.py b/tensorflow/python/client/timeline_test.py index e8797712e91..8396df5f400 100644 --- a/tensorflow/python/client/timeline_test.py +++ b/tensorflow/python/client/timeline_test.py @@ -100,8 +100,8 @@ class TimelineTest(test.TestCase): self.assertTrue(run_metadata.HasField('step_stats')) step_stats = run_metadata.step_stats devices = [d.device for d in step_stats.dev_stats] - self.assertTrue('/job:localhost/replica:0/task:0/gpu:0' in devices) - self.assertTrue('/gpu:0/stream:all' in devices) + self.assertTrue('/job:localhost/replica:0/task:0/device:GPU:0' in devices) + self.assertTrue('/device:GPU:0/stream:all' in devices) tl = timeline.Timeline(step_stats) ctf = tl.generate_chrome_trace_format() self._validateTrace(ctf) diff --git a/tensorflow/python/debug/lib/debug_data.py b/tensorflow/python/debug/lib/debug_data.py index 044a91a7ce6..b2b3ec5d470 100644 --- a/tensorflow/python/debug/lib/debug_data.py +++ b/tensorflow/python/debug/lib/debug_data.py @@ -380,7 +380,8 @@ def device_path_to_device_name(device_dir): path_items = os.path.basename(device_dir)[ len(METADATA_FILE_PREFIX) + len(DEVICE_TAG):].split(",") return "/".join([ - path_item.replace("_", ":", 1) for path_item in path_items]) + path_item.replace("device_", "device:").replace("_", ":", 1) + for path_item in path_items]) class DebugTensorDatum(object): diff --git a/tensorflow/python/debug/lib/debug_data_test.py b/tensorflow/python/debug/lib/debug_data_test.py index eff70b662bd..694010a23cd 100644 --- a/tensorflow/python/debug/lib/debug_data_test.py +++ b/tensorflow/python/debug/lib/debug_data_test.py @@ -237,11 +237,11 @@ class DebugDumpDirTest(test_util.TensorFlowTestCase): gpu_0_dir = os.path.join( self._dump_root, debug_data.METADATA_FILE_PREFIX + debug_data.DEVICE_TAG + - ",job_localhost,replica_0,task_0,gpu_0") + ",job_localhost,replica_0,task_0,device_GPU_0") gpu_1_dir = os.path.join( self._dump_root, debug_data.METADATA_FILE_PREFIX + debug_data.DEVICE_TAG + - ",job_localhost,replica_0,task_0,gpu_1") + ",job_localhost,replica_0,task_0,device_GPU_1") os.makedirs(cpu_0_dir) os.makedirs(gpu_0_dir) os.makedirs(gpu_1_dir) @@ -281,12 +281,12 @@ class DebugDumpDirTest(test_util.TensorFlowTestCase): node = graph_gpu_0.node.add() node.name = "node_foo_1" node.op = "FooOp" - node.device = "/job:localhost/replica:0/task:0/gpu:0" + node.device = "/job:localhost/replica:0/task:0/device:GPU:0" graph_gpu_1 = graph_pb2.GraphDef() node = graph_gpu_1.node.add() node.name = "node_foo_1" node.op = "FooOp" - node.device = "/job:localhost/replica:0/task:0/gpu:1" + node.device = "/job:localhost/replica:0/task:0/device:GPU:1" dump_dir = debug_data.DebugDumpDir( self._dump_root, @@ -294,14 +294,14 @@ class DebugDumpDirTest(test_util.TensorFlowTestCase): self.assertItemsEqual( ["/job:localhost/replica:0/task:0/cpu:0", - "/job:localhost/replica:0/task:0/gpu:0", - "/job:localhost/replica:0/task:0/gpu:1"], dump_dir.devices()) + "/job:localhost/replica:0/task:0/device:GPU:0", + "/job:localhost/replica:0/task:0/device:GPU:1"], dump_dir.devices()) self.assertEqual(1472563253536385, dump_dir.t0) self.assertEqual(3, dump_dir.size) with self.assertRaisesRegexp( ValueError, r"Invalid device name: "): - dump_dir.nodes("/job:localhost/replica:0/task:0/gpu:2") + dump_dir.nodes("/job:localhost/replica:0/task:0/device:GPU:2") self.assertItemsEqual(["node_foo_1", "node_foo_1", "node_foo_1"], dump_dir.nodes()) self.assertItemsEqual( @@ -319,16 +319,16 @@ class DebugDumpDirTest(test_util.TensorFlowTestCase): node = graph_gpu_0.node.add() node.name = "node_foo_1" node.op = "FooOp" - node.device = "/job:localhost/replica:0/task:0/gpu:0" + node.device = "/job:localhost/replica:0/task:0/device:GPU:0" graph_gpu_1 = graph_pb2.GraphDef() node = graph_gpu_1.node.add() node.name = "node_foo_1" node.op = "FooOp" - node.device = "/job:localhost/replica:0/task:0/gpu:1" + node.device = "/job:localhost/replica:0/task:0/device:GPU:1" node = graph_gpu_1.node.add() # Here is the duplicate. node.name = "node_foo_1" node.op = "FooOp" - node.device = "/job:localhost/replica:0/task:0/gpu:1" + node.device = "/job:localhost/replica:0/task:0/device:GPU:1" with self.assertRaisesRegexp( ValueError, r"Duplicate node name on device "): diff --git a/tensorflow/python/debug/lib/session_debug_testlib.py b/tensorflow/python/debug/lib/session_debug_testlib.py index e54590adfea..08b3e75e7c8 100644 --- a/tensorflow/python/debug/lib/session_debug_testlib.py +++ b/tensorflow/python/debug/lib/session_debug_testlib.py @@ -711,7 +711,7 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase): # Test node name list lookup of the DebugDumpDir object. if test_util.gpu_device_name(): node_names = dump.nodes( - device_name="/job:localhost/replica:0/task:0/gpu:0") + device_name="/job:localhost/replica:0/task:0/device:GPU:0") else: node_names = dump.nodes() self.assertTrue(u_name in node_names) diff --git a/tensorflow/python/debug/wrappers/local_cli_wrapper_test.py b/tensorflow/python/debug/wrappers/local_cli_wrapper_test.py index 575d74bbf09..3d18d7727ab 100644 --- a/tensorflow/python/debug/wrappers/local_cli_wrapper_test.py +++ b/tensorflow/python/debug/wrappers/local_cli_wrapper_test.py @@ -402,7 +402,7 @@ class LocalCLIDebugWrapperSessionTest(test_util.TensorFlowTestCase): def testRuntimeErrorBeforeGraphExecutionIsRaised(self): # Use an impossible device name to cause an error before graph execution. - with ops.device("/gpu:1337"): + with ops.device("/device:GPU:1337"): w = variables.Variable([1.0] * 10, name="w") wrapped_sess = LocalCLIDebuggerWrapperSessionForTest( diff --git a/tensorflow/python/framework/device_test.py b/tensorflow/python/framework/device_test.py index e6dc3c80637..0859e956ffd 100644 --- a/tensorflow/python/framework/device_test.py +++ b/tensorflow/python/framework/device_test.py @@ -79,17 +79,17 @@ class DeviceTest(test_util.TensorFlowTestCase): self.assertEquals("/replica:1/task:0/device:CPU:0", d.to_string()) d.parse_from_string("/replica:1/task:0/device:CPU:0") self.assertEquals("/replica:1/task:0/device:CPU:0", d.to_string()) - d.parse_from_string("/job:muu/gpu:2") + d.parse_from_string("/job:muu/device:GPU:2") self.assertEquals("/job:muu/device:GPU:2", d.to_string()) with self.assertRaises(Exception) as e: - d.parse_from_string("/job:muu/gpu:2/cpu:0") + d.parse_from_string("/job:muu/device:GPU:2/cpu:0") self.assertTrue("Cannot specify multiple device" in str(e.exception)) def testFromString(self): d = device.DeviceSpec.from_string("/job:foo/replica:0") self.assertEquals("/job:foo/replica:0", d.to_string()) with self.assertRaises(Exception) as e: - d = device.DeviceSpec.from_string("/job:muu/gpu:2/cpu:0") + d = device.DeviceSpec.from_string("/job:muu/device:GPU:2/cpu:0") self.assertTrue("Cannot specify multiple device" in str(e.exception)) d = device.DeviceSpec.from_string("/job:foo/replica:0/task:3/cpu:*") @@ -102,13 +102,13 @@ class DeviceTest(test_util.TensorFlowTestCase): def testMerge(self): d = device.DeviceSpec.from_string("/job:foo/replica:0") self.assertEquals("/job:foo/replica:0", d.to_string()) - d.merge_from(device.DeviceSpec.from_string("/task:1/gpu:2")) + d.merge_from(device.DeviceSpec.from_string("/task:1/device:GPU:2")) self.assertEquals("/job:foo/replica:0/task:1/device:GPU:2", d.to_string()) d = device.DeviceSpec() d.merge_from(device.DeviceSpec.from_string("/task:1/cpu:0")) self.assertEquals("/task:1/device:CPU:0", d.to_string()) - d.merge_from(device.DeviceSpec.from_string("/job:boo/gpu:0")) + d.merge_from(device.DeviceSpec.from_string("/job:boo/device:GPU:0")) self.assertEquals("/job:boo/task:1/device:GPU:0", d.to_string()) d.merge_from(device.DeviceSpec.from_string("/job:muu/cpu:2")) self.assertEquals("/job:muu/task:1/device:CPU:2", d.to_string()) @@ -134,10 +134,10 @@ class DeviceTest(test_util.TensorFlowTestCase): self.assertEqual("/job:foo/replica:0/task:0/device:GPU:0", device.canonical_name( - "/job:foo/replica:0/task:0/gpu:0")) + "/job:foo/replica:0/task:0/device:GPU:0")) self.assertEqual("/job:foo/replica:0/task:0/device:GPU:0", device.canonical_name( - "/gpu:0/task:0/replica:0/job:foo")) + "/device:GPU:0/task:0/replica:0/job:foo")) def testCheckValid(self): device.check_valid("/job:foo/replica:0") @@ -155,7 +155,7 @@ class DeviceTest(test_util.TensorFlowTestCase): self.assertTrue("Unknown attribute: 'bar'" in str(e.exception)) with self.assertRaises(Exception) as e: - device.check_valid("/cpu:0/gpu:2") + device.check_valid("/cpu:0/device:GPU:2") self.assertTrue("Cannot specify multiple device" in str(e.exception)) diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py index c94e05c4ee9..589db9ef4dc 100644 --- a/tensorflow/python/framework/function_test.py +++ b/tensorflow/python/framework/function_test.py @@ -505,7 +505,7 @@ class FunctionTest(test.TestCase): _ = PlusOne(1, name="p1") with self.assertRaisesRegexp(ValueError, "Unknown keyword arguments"): - _ = PlusOne(1, device="/gpu:0") + _ = PlusOne(1, device="/device:GPU:0") def testFunctionDecorator(self): diff --git a/tensorflow/python/framework/graph_util_test.py b/tensorflow/python/framework/graph_util_test.py index f6e9bc9dad3..647ed1583a0 100644 --- a/tensorflow/python/framework/graph_util_test.py +++ b/tensorflow/python/framework/graph_util_test.py @@ -106,9 +106,9 @@ class DeviceFunctionsTest(test.TestCase): var_0 = variables.Variable(0) with ops.device(test_device_func_pin_variable_to_cpu): var_1 = variables.Variable(1) - with ops.device(lambda op: "/gpu:0"): + with ops.device(lambda op: "/device:GPU:0"): var_2 = variables.Variable(2) - with ops.device("/gpu:0"): # Implicit merging device function. + with ops.device("/device:GPU:0"): # Implicit merging device function. var_3 = variables.Variable(3) self.assertDeviceEqual(var_0.device, None) diff --git a/tensorflow/python/framework/importer_test.py b/tensorflow/python/framework/importer_test.py index cfba6af5232..8ce8e76629d 100644 --- a/tensorflow/python/framework/importer_test.py +++ b/tensorflow/python/framework/importer_test.py @@ -878,7 +878,7 @@ class ImportGraphDefTest(test.TestCase): self.assertEqual(c.device, c4.device) # worker overrides ps. with ops.Graph().as_default(): - with ops.device(device.merge_device("/gpu:0")): + with ops.device(device.merge_device("/device:GPU:0")): a5, b5, c5 = importer.import_graph_def( gdef, return_elements=["a", "b", "c"]) self.assertEqual("/device:GPU:0", a5.device) diff --git a/tensorflow/python/framework/meta_graph_test.py b/tensorflow/python/framework/meta_graph_test.py index 13a92c3c7ec..65abb695991 100644 --- a/tensorflow/python/framework/meta_graph_test.py +++ b/tensorflow/python/framework/meta_graph_test.py @@ -550,7 +550,7 @@ class ScopedMetaGraphTest(test.TestCase): a = variables.Variable( constant_op.constant( 1.0, shape=[2, 2]), name="a") - with ops.device("/job:ps/replica:0/task:0/gpu:0"): + with ops.device("/job:ps/replica:0/task:0/device:GPU:0"): b = variables.Variable( constant_op.constant( 2.0, shape=[2, 2]), name="b") diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index ee7f77d5527..9a7f76cb588 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -3342,7 +3342,7 @@ class Graph(object): For example: ```python - with g.device('/gpu:0'): + with g.device('/device:GPU:0'): # All operations constructed in this context will be placed # on GPU 0. with g.device(None): @@ -3352,7 +3352,7 @@ class Graph(object): # Defines a function from `Operation` to device string. def matmul_on_gpu(n): if n.type == "MatMul": - return "/gpu:0" + return "/device:GPU:0" else: return "/cpu:0" diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index 4a8e30f4cbc..5507585a663 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -1555,26 +1555,26 @@ class ColocationGroupTest(test_util.TensorFlowTestCase): def testColocationDeviceInteraction(self): with ops.device("/cpu:0"): - with ops.device("/gpu:0"): + with ops.device("/device:GPU:0"): a = constant_op.constant([2.0], name="a") with ops.colocate_with(a.op): # 'b' is created in the scope of /cpu:0, but it is - # colocated with 'a', which is on '/gpu:0'. colocate_with + # colocated with 'a', which is on '/device:GPU:0'. colocate_with # overrides devices because it is a stronger constraint. b = constant_op.constant(3.0) self.assertEqual([b"loc:@a"], b.op.colocation_groups()) self.assertEqual(a.op.device, b.op.device) def testColocationCanonicalization(self): - with ops.device("/gpu:0"): + with ops.device("/device:GPU:0"): _ = constant_op.constant(2.0) - with ops.device(lambda op: "/gpu:0"): + with ops.device(lambda op: "/device:GPU:0"): b = constant_op.constant(3.0) with ops.get_default_graph().colocate_with(b): - with ops.device("/gpu:0"): + with ops.device("/device:GPU:0"): c = constant_op.constant(4.0) - # A's device will be /gpu:0 + # A's device will be /device:GPU:0 # B's device will be /device:GPU:0 # C's device will be /device:GPU:0 because it # inherits B's device name, after canonicalizing the names. @@ -1582,10 +1582,10 @@ class ColocationGroupTest(test_util.TensorFlowTestCase): def testLocationOverrides(self): with ops.device("/cpu:0"): - with ops.device("/gpu:0"): + with ops.device("/device:GPU:0"): a = constant_op.constant([2.0], name="a") # Note that this colocation is "redundant", since we are - # within the scope of "/gpu:0". However, we would like to + # within the scope of "/device:GPU:0". However, we would like to # preserve in the GraphDef that these two ops should be # colocated in a portable way. with ops.colocate_with(a.op): @@ -1652,7 +1652,7 @@ class ColocationGroupTest(test_util.TensorFlowTestCase): self.assertEqual([b"loc:@a"], b.op.colocation_groups()) def testInconsistentDeviceWithinColocate(self): - with ops.device("/gpu:0"): + with ops.device("/device:GPU:0"): a = constant_op.constant([2.0], name="a") with ops.colocate_with(a.op): # This is allowed due to legacy but clearly wrong, since we diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index d9e507d23ce..e159cfa44bd 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -405,7 +405,7 @@ class TensorFlowTestCase(googletest.TestCase): trigger the creation of a new session. Use the `use_gpu` and `force_gpu` options to control where ops are run. If - `force_gpu` is True, all ops are pinned to `/gpu:0`. Otherwise, if `use_gpu` + `force_gpu` is True, all ops are pinned to `/device:GPU:0`. Otherwise, if `use_gpu` is True, TensorFlow tries to run as many ops on the GPU as possible. If both `force_gpu and `use_gpu` are False, all ops are pinned to the CPU. @@ -427,7 +427,7 @@ class TensorFlowTestCase(googletest.TestCase): config: An optional config_pb2.ConfigProto to use to configure the session. use_gpu: If True, attempt to run as many ops as possible on GPU. - force_gpu: If True, pin all ops to `/gpu:0`. + force_gpu: If True, pin all ops to `/device:GPU:0`. Returns: A Session object that should be used as a context manager to surround @@ -466,11 +466,11 @@ class TensorFlowTestCase(googletest.TestCase): sess = self._cached_session with sess.graph.as_default(), sess.as_default(): if force_gpu: - # Use the name of an actual device if one is detected, or '/gpu:0' + # Use the name of an actual device if one is detected, or '/device:GPU:0' # otherwise gpu_name = gpu_device_name() if not gpu_name: - gpu_name = "/gpu:0" + gpu_name = "/device:GPU:0" with sess.graph.device(gpu_name): yield sess elif use_gpu: @@ -481,11 +481,11 @@ class TensorFlowTestCase(googletest.TestCase): else: with session.Session(graph=graph, config=prepare_config(config)) as sess: if force_gpu: - # Use the name of an actual device if one is detected, or '/gpu:0' + # Use the name of an actual device if one is detected, or '/device:GPU:0' # otherwise gpu_name = gpu_device_name() if not gpu_name: - gpu_name = "/gpu:0" + gpu_name = "/device:GPU:0" with sess.graph.device(gpu_name): yield sess elif use_gpu: diff --git a/tensorflow/python/kernel_tests/basic_gpu_test.py b/tensorflow/python/kernel_tests/basic_gpu_test.py index 155aad8bd9a..405651e8ae9 100644 --- a/tensorflow/python/kernel_tests/basic_gpu_test.py +++ b/tensorflow/python/kernel_tests/basic_gpu_test.py @@ -238,7 +238,7 @@ class GpuMultiSessionMemoryTest(test_util.TensorFlowTestCase): n_iterations = 500 with session as s: data = variables.Variable(1.0) - with ops.device('/gpu:0'): + with ops.device('/device:GPU:0'): random_seed.set_random_seed(1) matrix1 = variables.Variable( random_ops.truncated_normal([1024, 1]), name='matrix1') diff --git a/tensorflow/python/kernel_tests/cholesky_op_test.py b/tensorflow/python/kernel_tests/cholesky_op_test.py index 5369d2d5c49..d783522e820 100644 --- a/tensorflow/python/kernel_tests/cholesky_op_test.py +++ b/tensorflow/python/kernel_tests/cholesky_op_test.py @@ -311,7 +311,7 @@ class CholeskyBenchmark(test.Benchmark): if test.is_gpu_available(True): with ops.Graph().as_default(), \ session.Session() as sess, \ - ops.device("/gpu:0"): + ops.device("/device:GPU:0"): l = linalg_ops.cholesky(data) self.run_op_benchmark( sess, @@ -338,11 +338,11 @@ class CholeskyBenchmark(test.Benchmark): if test.is_gpu_available(True): _BenchmarkGrad( - MatrixInverseCompositeGrad, "composite_matrix_inverse", "/gpu:0") + MatrixInverseCompositeGrad, "composite_matrix_inverse", "/device:GPU:0") _BenchmarkGrad( - TriAngInvCompositeGrad, "composite_tri_ang_inverse", "/gpu:0") + TriAngInvCompositeGrad, "composite_tri_ang_inverse", "/device:GPU:0") _BenchmarkGrad( - TriAngSolveCompositeGrad, "composite_triangular_solve", "/gpu:0") + TriAngSolveCompositeGrad, "composite_triangular_solve", "/device:GPU:0") _BenchmarkGrad( MatrixInverseCompositeGrad, "composite_matrix_inverse", "/cpu:0") diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py index 91694cd0b25..e3aac5019c1 100644 --- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py +++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py @@ -1423,9 +1423,8 @@ class ControlFlowTest(test.TestCase): self.assertEqual(45, rx.eval()) def _testWhileGrad_ColocateGradients(self, colocate): - gpu_dev_name = test.gpu_device_name().lower() if test.is_gpu_available( - ) else "/gpu:0" - gpu_short_name = gpu_dev_name.split("/")[-1] + gpu_dev_name = test.gpu_device_name() if test.is_gpu_available( + ) else "/device:GPU:0" with self.test_session(graph=ops.Graph()) as sess: v = constant_op.constant(2.0, name="v") @@ -1439,19 +1438,19 @@ class ControlFlowTest(test.TestCase): r = gradients_impl.gradients( loop, v, colocate_gradients_with_ops=colocate)[0] r_ops = r.graph.get_operations() - r_devices = [(op.name, op.device.lower()) for op in r_ops] + r_devices = [(op.name, op.device) for op in r_ops] self.assertTrue(any("Square" in op.name for op in r_ops)) for (name, dev) in r_devices: if not colocate and name.endswith("Square"): # Only forward graph contain gpu in Square device - self.assertTrue(gpu_short_name in dev) + self.assertTrue(gpu_dev_name in dev) elif colocate and "Square" in name: # Forward and backward graphs contain gpu in Square/Square_grad devices - self.assertTrue(gpu_short_name in dev) + self.assertTrue(gpu_dev_name in dev) else: - self.assertFalse(gpu_short_name in dev) + self.assertFalse(gpu_dev_name in dev) self.assertAllClose(1024.0, sess.run(r)) def testWhileGrad_ColocateGradients(self): @@ -2426,7 +2425,7 @@ class ControlFlowTest(test.TestCase): # device set on tensor, default device on graph => default device on dep. vdef = variables.Variable([0.0], name="vdef") - with ops.device("/job:worker/gpu:1"): + with ops.device("/job:worker/device:GPU:1"): with_vdef_dep = control_flow_ops.with_dependencies([vdef.initializer], vdef) # The device is empty, but the colocation constraint is set. diff --git a/tensorflow/python/kernel_tests/sparse_tensor_dense_matmul_op_test.py b/tensorflow/python/kernel_tests/sparse_tensor_dense_matmul_op_test.py index a0bd178e247..e20c6992525 100644 --- a/tensorflow/python/kernel_tests/sparse_tensor_dense_matmul_op_test.py +++ b/tensorflow/python/kernel_tests/sparse_tensor_dense_matmul_op_test.py @@ -347,7 +347,7 @@ def sparse_tensor_dense_vs_dense_matmul_benchmark(thresh, ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_dense( x_t, y_t, adjoint_a, adjoint_b) else: - with ops.device("/gpu:0"): + with ops.device("/device:GPU:0"): x_t = constant_op.constant(x) y_t = constant_op.constant(y) ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_dense( @@ -365,7 +365,7 @@ def sparse_tensor_dense_vs_dense_matmul_benchmark(thresh, ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_sparse( x_ind, x_val, x_shape, y_t, adjoint_a, adjoint_b) else: - with ops.device("/gpu:0"): + with ops.device("/device:GPU:0"): x_ind = constant_op.constant(np.vstack(np.where(x)).astype(np.int64).T) x_val = constant_op.constant(x[np.where(x)]) x_shape = constant_op.constant(np.array(x.shape).astype(np.int64)) diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py index c8dafa4c3dd..b1a9458f7a2 100644 --- a/tensorflow/python/kernel_tests/variable_scope_test.py +++ b/tensorflow/python/kernel_tests/variable_scope_test.py @@ -722,7 +722,7 @@ class VariableScopeTest(test.TestCase): def device_func(op): if op.type in ["Variable", "VariableV2", "VarHandleOp"]: varname_type.append((op.name, op.get_attr("dtype"))) - return "/gpu:0" + return "/device:GPU:0" with g.as_default(): with ops.device(device_func): diff --git a/tensorflow/python/ops/gradients_test.py b/tensorflow/python/ops/gradients_test.py index aefed34d744..11c204b5b7f 100644 --- a/tensorflow/python/ops/gradients_test.py +++ b/tensorflow/python/ops/gradients_test.py @@ -163,20 +163,20 @@ class GradientsTest(test_util.TensorFlowTestCase): with ops.Graph().as_default() as g: w = constant(1.0, shape=[1, 1]) x = constant(1.0, shape=[1, 2]) - with g.device("/gpu:0"): + with g.device("/device:GPU:0"): wx = math_ops.matmul(w, x) gw = gradients.gradients(wx, [w], colocate_gradients_with_ops=True)[0] self.assertEqual(gw.op.colocation_groups(), wx.op.colocation_groups()) def testColocateGradientsWithAggregation(self): with ops.Graph().as_default() as g: - with g.device("/gpu:1"): + with g.device("/device:GPU:1"): w = constant(1.0, shape=[1, 1]) x = constant(1.0, shape=[1, 2]) y = constant(1.0, shape=[1, 2]) wx = math_ops.matmul(w, x) wy = math_ops.matmul(w, y) - with g.device("/gpu:0"): + with g.device("/device:GPU:0"): z = wx + wy gw1 = gradients.gradients(z, [w], colocate_gradients_with_ops=True)[0] @@ -187,7 +187,7 @@ class GradientsTest(test_util.TensorFlowTestCase): def testColocateGradientsWithAggregationInMultipleDevices(self): with ops.Graph().as_default() as g: - with g.device("/gpu:1"): + with g.device("/device:GPU:1"): w = constant(1.0, shape=[1, 1]) x = constant(1.0, shape=[1, 2]) y = constant(1.0, shape=[1, 2]) @@ -195,7 +195,7 @@ class GradientsTest(test_util.TensorFlowTestCase): wx = math_ops.matmul(w, x) with g.device("/task:2"): wy = math_ops.matmul(w, y) - with g.device("/gpu:0"): + with g.device("/device:GPU:0"): z = wx + wy gw1 = gradients.gradients(z, [w], colocate_gradients_with_ops=True)[0] diff --git a/tensorflow/python/ops/matmul_benchmark.py b/tensorflow/python/ops/matmul_benchmark.py index b777ace9d0c..f95cf08de1a 100644 --- a/tensorflow/python/ops/matmul_benchmark.py +++ b/tensorflow/python/ops/matmul_benchmark.py @@ -47,7 +47,7 @@ def build_graph(device, n, m, k, transpose_a, transpose_b, dtype): Returns: A matmul operation to run() """ - with ops.device('/%s:0' % device): + with ops.device('%s' % device): if not transpose_a: x = variables.Variable(random_ops.random_uniform([n, m], dtype=dtype)) else: @@ -112,7 +112,7 @@ class MatmulBenchmark(test.Benchmark): return duration def run_test_gpu(self, n, m, k, transpose_a, transpose_b, dtype, num_iters): - self.run_graph('gpu', n, m, k, transpose_a, transpose_b, num_iters, dtype) + self.run_graph(test.gpu_device_name(), n, m, k, transpose_a, transpose_b, num_iters, dtype) def test_round(self, num_iters): dtypes = [np.float32, np.float64] diff --git a/tensorflow/python/ops/matmul_benchmark_test.py b/tensorflow/python/ops/matmul_benchmark_test.py index a7914dba787..5a9c0a7a495 100644 --- a/tensorflow/python/ops/matmul_benchmark_test.py +++ b/tensorflow/python/ops/matmul_benchmark_test.py @@ -71,37 +71,39 @@ class MatmulBenchmarkTest(googletest.TestCase): def _VerifyBuildGraph(self, n, m, k, transpose_a, transpose_b, dtype): graph = ops.Graph() with graph.as_default(): - matmul_benchmark.build_graph("gpu", n, m, k, transpose_a, transpose_b, + matmul_benchmark.build_graph(googletest.gpu_device_name(), n, m, k, transpose_a, transpose_b, dtype) gd = graph.as_graph_def() - self.assertProtoEquals(""" - node { name: "random_uniform/shape" op: "Const" device: "/device:GPU:0" } - node { name: "random_uniform/min" op: "Const" device: "/device:GPU:0" } - node { name: "random_uniform/max" op: "Const" device: "/device:GPU:0" } - node { name: "random_uniform/RandomUniform" op: "RandomUniform" input: "random_uniform/shape" device: "/device:GPU:0" } - node { name: "random_uniform/sub" op: "Sub" input: "random_uniform/max" input: "random_uniform/min" device: "/device:GPU:0" } - node { name: "random_uniform/mul" op: "Mul" input: "random_uniform/RandomUniform" input: "random_uniform/sub" device: "/device:GPU:0" } - node { name: "random_uniform" op: "Add" input: "random_uniform/mul" input: "random_uniform/min" device: "/device:GPU:0" } - node { name: "Variable" op: "VariableV2" device: "/device:GPU:0" } - node { name: "Variable/Assign" op: "Assign" input: "Variable" input: "random_uniform" device: "/device:GPU:0" } - node { name: "Variable/read" op: "Identity" input: "Variable" device: "/device:GPU:0" } - node { name: "random_uniform_1/shape" op: "Const" device: "/device:GPU:0" } - node { name: "random_uniform_1/min" op: "Const" device: "/device:GPU:0" } - node { name: "random_uniform_1/max" op: "Const" device: "/device:GPU:0" } - node { name: "random_uniform_1/RandomUniform" op: "RandomUniform" input: "random_uniform_1/shape" device: "/device:GPU:0" } - node { name: "random_uniform_1/sub" op: "Sub" input: "random_uniform_1/max" input: "random_uniform_1/min" device: "/device:GPU:0" } - node { name: "random_uniform_1/mul" op: "Mul" input: "random_uniform_1/RandomUniform" input: "random_uniform_1/sub" device: "/device:GPU:0" } - node { name: "random_uniform_1" op: "Add" input: "random_uniform_1/mul" input: "random_uniform_1/min" device: "/device:GPU:0" } - node { name: "Variable_1" op: "VariableV2" device: "/device:GPU:0" } - node { name: "Variable_1/Assign" op: "Assign" input: "Variable_1" input: "random_uniform_1" device: "/device:GPU:0" } - node { name: "Variable_1/read" op: "Identity" input: "Variable_1" device: "/device:GPU:0" } - node { name: "MatMul" op: "MatMul" input: "Variable/read" input: "Variable_1/read" device: "/device:GPU:0" } - node { name: "group_deps" op: "NoOp" input: "^MatMul" device: "/device:GPU:0" } - """, self._StripGraph(gd)) + dev=googletest.gpu_device_name() + proto_expected = """ + node { name: "random_uniform/shape" op: "Const" device: \""""+ dev +"""\" } + node { name: "random_uniform/min" op: "Const" device: \""""+ dev +"""\" } + node { name: "random_uniform/max" op: "Const" device: \""""+ dev +"""\" } + node { name: "random_uniform/RandomUniform" op: "RandomUniform" input: "random_uniform/shape" device: \""""+ dev +"""\" } + node { name: "random_uniform/sub" op: "Sub" input: "random_uniform/max" input: "random_uniform/min" device: \""""+ dev +"""\" } + node { name: "random_uniform/mul" op: "Mul" input: "random_uniform/RandomUniform" input: "random_uniform/sub" device: \""""+ dev +"""\" } + node { name: "random_uniform" op: "Add" input: "random_uniform/mul" input: "random_uniform/min" device: \""""+ dev +"""\" } + node { name: "Variable" op: "VariableV2" device: \""""+ dev +"""\" } + node { name: "Variable/Assign" op: "Assign" input: "Variable" input: "random_uniform" device: \""""+ dev +"""\" } + node { name: "Variable/read" op: "Identity" input: "Variable" device: \""""+ dev +"""\" } + node { name: "random_uniform_1/shape" op: "Const" device: \""""+ dev +"""\" } + node { name: "random_uniform_1/min" op: "Const" device: \""""+ dev +"""\" } + node { name: "random_uniform_1/max" op: "Const" device: \""""+ dev +"""\" } + node { name: "random_uniform_1/RandomUniform" op: "RandomUniform" input: "random_uniform_1/shape" device: \""""+ dev +"""\" } + node { name: "random_uniform_1/sub" op: "Sub" input: "random_uniform_1/max" input: "random_uniform_1/min" device: \""""+ dev +"""\" } + node { name: "random_uniform_1/mul" op: "Mul" input: "random_uniform_1/RandomUniform" input: "random_uniform_1/sub" device: \""""+ dev +"""\" } + node { name: "random_uniform_1" op: "Add" input: "random_uniform_1/mul" input: "random_uniform_1/min" device: \""""+ dev +"""\" } + node { name: "Variable_1" op: "VariableV2" device: \""""+ dev +"""\" } + node { name: "Variable_1/Assign" op: "Assign" input: "Variable_1" input: "random_uniform_1" device: \""""+ dev +"""\" } + node { name: "Variable_1/read" op: "Identity" input: "Variable_1" device: \""""+ dev +"""\" } + node { name: "MatMul" op: "MatMul" input: "Variable/read" input: "Variable_1/read" device: \""""+ dev +"""\" } + node { name: "group_deps" op: "NoOp" input: "^MatMul" device: \""""+ dev +"""\" } + """ + self.assertProtoEquals(str(proto_expected), self._StripGraph(gd)) def _VerifyRunGraph(self, n, m, k, transpose_a, transpose_b, dtype): benchmark_instance = matmul_benchmark.MatmulBenchmark() - duration = benchmark_instance.run_graph("gpu", n, m, k, transpose_a, + duration = benchmark_instance.run_graph(googletest.gpu_device_name(), n, m, k, transpose_a, transpose_b, 1, dtype) self.assertTrue(duration > 1e-6) diff --git a/tensorflow/python/profiler/internal/run_metadata_test.py b/tensorflow/python/profiler/internal/run_metadata_test.py index 62b2314aea0..b758edf87ef 100644 --- a/tensorflow/python/profiler/internal/run_metadata_test.py +++ b/tensorflow/python/profiler/internal/run_metadata_test.py @@ -97,21 +97,22 @@ class RunMetadataTest(test.TestCase): if not test.is_gpu_available(cuda_only=True): return + gpu_dev = test.gpu_device_name() ops.reset_default_graph() - with ops.device('/gpu:0'): + with ops.device(gpu_dev): tfprof_node, run_meta = _run_model() self.assertEqual(tfprof_node.children[0].name, 'MatMul') self.assertGreater(tfprof_node.children[0].exec_micros, 10) ret = _extract_node(run_meta, ['MatMul', 'MatMul:MatMul']) self.assertEqual(len(ret), 3) - self.assertTrue('/job:localhost/replica:0/task:0/gpu:0' in ret) - del ret['/job:localhost/replica:0/task:0/gpu:0'] + self.assertTrue('/job:localhost/replica:0/task:0' + gpu_dev in ret) + del ret['/job:localhost/replica:0/task:0' + gpu_dev] has_all_stream = False for k, _ in six.iteritems(ret): - self.assertTrue('gpu:0/stream' in k) - if 'gpu:0/stream:all' in k: + self.assertTrue(gpu_dev + '/stream' in k) + if gpu_dev + '/stream:all' in k: has_all_stream = True self.assertTrue(has_all_stream) @@ -159,24 +160,24 @@ class RunMetadataTest(test.TestCase): return ops.reset_default_graph() - with ops.device('/gpu:0'): + with ops.device('/device:GPU:0'): tfprof_node, run_meta = _run_loop_model() # The while-loop caused a node to appear 4 times in scheduling. ret = _extract_node(run_meta, 'rnn/while/rnn/basic_rnn_cell/basic_rnn_cell/MatMul') - self.assertEqual(len(ret['/job:localhost/replica:0/task:0/gpu:0']), 4) + self.assertEqual(len(ret['/job:localhost/replica:0/task:0/device:GPU:0']), 4) total_cpu_execs = 0 - for node in ret['/job:localhost/replica:0/task:0/gpu:0']: + for node in ret['/job:localhost/replica:0/task:0/device:GPU:0']: total_cpu_execs += node.op_end_rel_micros ret = _extract_node( run_meta, 'rnn/while/rnn/basic_rnn_cell/basic_rnn_cell/MatMul:MatMul') - self.assertGreaterEqual(len(ret['/gpu:0/stream:all']), 4) + self.assertGreaterEqual(len(ret['/device:GPU:0/stream:all']), 4) total_accelerator_execs = 0 - for node in ret['/gpu:0/stream:all']: + for node in ret['/device:GPU:0/stream:all']: total_accelerator_execs += node.op_end_rel_micros mm_node = lib.SearchTFProfNode( diff --git a/tensorflow/python/profiler/option_builder.py b/tensorflow/python/profiler/option_builder.py index e2e022425dd..502fc49bb62 100644 --- a/tensorflow/python/profiler/option_builder.py +++ b/tensorflow/python/profiler/option_builder.py @@ -315,7 +315,7 @@ class ProfileOptionBuilder(object): """Selectively counting statistics based on node types. Here, 'types' means the profiler nodes' properties. Profiler by default - consider device name (e.g. /job:xx/.../gpu:0) and operation type + consider device name (e.g. /job:xx/.../device:GPU:0) and operation type (e.g. MatMul) as profiler nodes' properties. User can also associate customized 'types' to profiler nodes through OpLogProto proto. diff --git a/tensorflow/tools/graph_transforms/remove_device_test.cc b/tensorflow/tools/graph_transforms/remove_device_test.cc index 554c5e35952..17a87cd2366 100644 --- a/tensorflow/tools/graph_transforms/remove_device_test.cc +++ b/tensorflow/tools/graph_transforms/remove_device_test.cc @@ -50,7 +50,7 @@ class RemoveDeviceTest : public ::testing::Test { add_node2->set_op("Add"); add_node2->add_input("const_node1"); add_node2->add_input("const_node2"); - add_node2->set_device("//gpu:1"); + add_node2->set_device("//device:GPU:1"); NodeDef* add_node3 = graph_def.add_node(); add_node3->set_name("add_node3");