[OpenCL] Extends matmul_benchmark.py to cover SYCL (#11697)

* [OpenCL] Extends matmul_benchmark.py to cover SYCL

* Fixed typo

* /gpu:0 -> /device:GPU:0

* Fixes control_flow_ops_py_test

* /gpu: -> /device:GPU:

* Fixes //tensorflow/python/profiler/internal:run_metadata_test

* gpu: -> GPU:

* Fixes tfprof_node

* [OpenCL] Fixes device path to name with many colons (#123)

The device path is constructed from a device name by replacing all
colons with underscores. Some device names contain more than one colon,
for example 'device:SYCL:0' which gives a path 'device_SYCL_0'. The
previous code would not convert this back to the original device name,
but rather to 'device:SYCL_0'.

An alternative fix would be to convert all underscores to colons in the
device name (i.e. remove the restriction inside `replace("_", ":", 1)`),
however I'm not sure if there are any device names which contain
underscores.

* If no gpu device aviable fake one

* gpu: -> device:GPU

* Fixes profiler test

* /gpu:x -> /device:GPU:x

* Fixes debug_io_utils_test.cc test

* Fixes device_name_utils_test.cc
This commit is contained in:
Luke Iwanski 2017-08-11 01:35:21 +01:00 committed by Rasmus Munk Larsen
parent 35e7a36658
commit ab96f41fb4
69 changed files with 286 additions and 285 deletions

View File

@ -101,7 +101,7 @@ void ConcurrentSteps(const Options* opts, int session_index) {
std::unique_ptr<Session> session(NewSession(options));
GraphDef def = CreateGraphDef();
if (options.target.empty()) {
graph::SetDefaultDevice(opts->use_gpu ? "/gpu:0" : "/cpu:0", &def);
graph::SetDefaultDevice(opts->use_gpu ? "/device:GPU:0" : "/cpu:0", &def);
}
TF_CHECK_OK(session->Create(def));

View File

@ -93,7 +93,7 @@ class CudnnRNNBenchmark(test.Benchmark):
batch_size = config["batch_size"]
seq_length = config["seq_length"]
with ops.Graph().as_default(), ops.device("/gpu:0"):
with ops.Graph().as_default(), ops.device("/device:GPU:0"):
model = cudnn_rnn_ops.CudnnLSTM(num_layers, num_units, num_units)
params_size_t = model.params_size()
input_data = variables.Variable(
@ -125,7 +125,7 @@ class CudnnRNNBenchmark(test.Benchmark):
batch_size = config["batch_size"]
seq_length = config["seq_length"]
with ops.Graph().as_default(), ops.device("/gpu:0"):
with ops.Graph().as_default(), ops.device("/device:GPU:0"):
inputs = seq_length * [
array_ops.zeros([batch_size, num_units], dtypes.float32)
]
@ -153,7 +153,7 @@ class CudnnRNNBenchmark(test.Benchmark):
batch_size = config["batch_size"]
seq_length = config["seq_length"]
with ops.Graph().as_default(), ops.device("/gpu:0"):
with ops.Graph().as_default(), ops.device("/device:GPU:0"):
inputs = seq_length * [
array_ops.zeros([batch_size, num_units], dtypes.float32)
]

View File

@ -634,7 +634,7 @@ class MixtureBenchmark(test.Benchmark):
np.random.seed(127)
with session.Session(config=config, graph=ops.Graph()) as sess:
random_seed.set_random_seed(0)
with ops.device("/gpu:0" if use_gpu else "/cpu:0"):
with ops.device("/device:GPU:0" if use_gpu else "/cpu:0"):
mixture = create_distribution(
num_components=num_components,
batch_size=batch_size,

View File

@ -443,19 +443,19 @@ class VariablesTest(test.TestCase):
e = variables_lib2.variable('e', initializer=e_init)
# The values below highlight how the VariableDeviceChooser puts initial
# values on the same device as the variable job.
self.assertDeviceEqual(a.device, '/gpu:0')
self.assertDeviceEqual(a.device, '/device:GPU:0')
self.assertEqual(a.initial_value.op.colocation_groups(),
a.op.colocation_groups())
self.assertDeviceEqual(b.device, '/gpu:0')
self.assertDeviceEqual(b.device, '/device:GPU:0')
self.assertEqual(b.initial_value.op.colocation_groups(),
b.op.colocation_groups())
self.assertDeviceEqual(c.device, '/cpu:12')
self.assertEqual(c.initial_value.op.colocation_groups(),
c.op.colocation_groups())
self.assertDeviceEqual(d.device, '/gpu:0')
self.assertDeviceEqual(d.device, '/device:GPU:0')
self.assertEqual(d.initial_value.op.colocation_groups(),
d.op.colocation_groups())
self.assertDeviceEqual(e.device, '/gpu:0')
self.assertDeviceEqual(e.device, '/device:GPU:0')
self.assertDeviceEqual(e.initial_value.device, '/cpu:99')

View File

@ -43,7 +43,7 @@ class AllReduceTest(test.TestCase):
self._testSingleAllReduce(sess, dtype, nccl.all_max, np.maximum)
def _testSingleAllReduce(self, sess, np_type, nccl_fn, numpy_accumulation_fn):
for devices in [['/gpu:0', '/gpu:0', '/gpu:0'], ['/gpu:0', '/gpu:0']]:
for devices in [['/device:GPU:0', '/device:GPU:0', '/device:GPU:0'], ['/device:GPU:0', '/device:GPU:0']]:
shape = (3, 4)
np_ans = None
tensors = []
@ -84,7 +84,7 @@ class BroadcastTest(test.TestCase):
# Create session inside outer loop to test use of
# same communicator across multiple sessions.
with self.test_session(use_gpu=True) as sess:
for devices in [['/gpu:0', '/gpu:0', '/gpu:0'], ['/gpu:0', '/gpu:0']]:
for devices in [['/device:GPU:0', '/device:GPU:0', '/device:GPU:0'], ['/device:GPU:0', '/device:GPU:0']]:
shape = (3, 4)
sender = np.random.randint(0, len(devices) - 1)
with ops.device(devices[sender]):
@ -115,7 +115,7 @@ class CombinedTest(test.TestCase):
# Create session inside outer loop to test use of
# same communicator across multiple sessions.
with self.test_session(use_gpu=True) as sess:
for devices in [['/gpu:0', '/gpu:0', '/gpu:0'], ['/gpu:0', '/gpu:0']]:
for devices in [['/device:GPU:0', '/device:GPU:0', '/device:GPU:0'], ['/device:GPU:0', '/device:GPU:0']]:
shape = (3, 4)
# all-reduce

View File

@ -446,12 +446,12 @@ class RNNCellTest(test.TestCase):
# Can't perform this test w/o a GPU
return
gpu_dev = test.gpu_device_name()
with self.test_session(use_gpu=True) as sess:
with variable_scope.variable_scope(
"root", initializer=init_ops.constant_initializer(0.5)):
x = array_ops.zeros([1, 1, 3])
cell = rnn_cell_impl.DeviceWrapper(
rnn_cell_impl.GRUCell(3), test_util.gpu_device_name())
cell = rnn_cell_impl.DeviceWrapper(rnn_cell_impl.GRUCell(3), gpu_dev)
with ops.device("/cpu:0"):
outputs, _ = rnn.dynamic_rnn(
cell=cell, inputs=x, dtype=dtypes.float32)
@ -463,8 +463,7 @@ class RNNCellTest(test.TestCase):
_ = sess.run(outputs, options=opts, run_metadata=run_metadata)
step_stats = run_metadata.step_stats
ix = 0 if (("gpu" in step_stats.dev_stats[0].device) or
("sycl" in step_stats.dev_stats[0].device)) else 1
ix = 0 if gpu_dev in step_stats.dev_stats[0].device else 1
gpu_stats = step_stats.dev_stats[ix].node_stats
cpu_stats = step_stats.dev_stats[1 - ix].node_stats
self.assertFalse([s for s in cpu_stats if "gru_cell" in s.node_name])

View File

@ -42,7 +42,6 @@ from tensorflow.python.ops import variables as variables_lib
from tensorflow.python.platform import test
from tensorflow.python.platform import tf_logging
from tensorflow.python.util import nest
from tensorflow.python.framework import test_util
class Plus1RNNCell(rnn_lib.RNNCell):
"""RNN Cell generating (output, new_state) = (input + 1, state + 1)."""
@ -2208,11 +2207,11 @@ class TensorArrayOnCorrectDeviceTest(test.TestCase):
if not test.is_gpu_available():
return # Test requires access to a GPU
gpu_dev = test.gpu_device_name()
run_metadata = self._execute_rnn_on(
rnn_device="/cpu:0", cell_device=test_util.gpu_device_name())
rnn_device="/cpu:0", cell_device=gpu_dev)
step_stats = run_metadata.step_stats
ix = 0 if (("gpu" in step_stats.dev_stats[0].device) or
("sycl" in step_stats.dev_stats[0].device)) else 1
ix = 0 if (gpu_dev in step_stats.dev_stats[0].device) else 1
gpu_stats = step_stats.dev_stats[ix].node_stats
cpu_stats = step_stats.dev_stats[1 - ix].node_stats
@ -2233,12 +2232,12 @@ class TensorArrayOnCorrectDeviceTest(test.TestCase):
if not test.is_gpu_available():
return # Test requires access to a GPU
gpu_dev = test.gpu_device_name()
run_metadata = self._execute_rnn_on(
rnn_device="/cpu:0", cell_device="/cpu:0",
input_device=test_util.gpu_device_name())
input_device=gpu_dev)
step_stats = run_metadata.step_stats
ix = 0 if (("gpu" in step_stats.dev_stats[0].device) or
("sycl" in step_stats.dev_stats[0].device)) else 1
ix = 0 if (gpu_dev in step_stats.dev_stats[0].device) else 1
gpu_stats = step_stats.dev_stats[ix].node_stats
cpu_stats = step_stats.dev_stats[1 - ix].node_stats
@ -2253,11 +2252,11 @@ class TensorArrayOnCorrectDeviceTest(test.TestCase):
if not test.is_gpu_available():
return # Test requires access to a GPU
gpu_dev = test.gpu_device_name()
run_metadata = self._execute_rnn_on(
input_device=test_util.gpu_device_name())
input_device=gpu_dev)
step_stats = run_metadata.step_stats
ix = 0 if (("gpu" in step_stats.dev_stats[0].device) or
("sycl" in step_stats.dev_stats[0].device)) else 1
ix = 0 if (gpu_dev in step_stats.dev_stats[0].device) else 1
gpu_stats = step_stats.dev_stats[ix].node_stats
cpu_stats = step_stats.dev_stats[1 - ix].node_stats

View File

@ -357,7 +357,7 @@ def training_gru_block_vs_gru_cell(batch_size,
ops.reset_default_graph()
with session.Session(graph=ops.Graph()) as sess:
# Specify the device which is been used.
with ops.device("/cpu:0" if not use_gpu else "/gpu:0"):
with ops.device("/cpu:0" if not use_gpu else "/device:GPU:0"):
# Random initializers.
seed = 1994
@ -429,7 +429,7 @@ def inference_gru_block_vs_gru_cell(batch_size,
"""Benchmark inference speed between GRUBlockCell vs GRUCell."""
ops.reset_default_graph()
with session.Session(graph=ops.Graph()) as sess:
with ops.device("/cpu:0" if not use_gpu else "/gpu:0"):
with ops.device("/cpu:0" if not use_gpu else "/device:GPU:0"):
# Random initializers.
seed = 1994
@ -484,7 +484,7 @@ def single_bprop_step_gru_block_vs_gru_cell(batch_size,
"""Benchmark single bprop step speed between GRUBlockCell vs GRUCell."""
ops.reset_default_graph()
with session.Session(graph=ops.Graph()) as sess:
with ops.device("/cpu:0" if not use_gpu else "/gpu:0"):
with ops.device("/cpu:0" if not use_gpu else "/device:GPU:0"):
initializer = init_ops.random_uniform_initializer(-1, 1, seed=1989)
# Inputs
x = vs.get_variable("x", [batch_size, input_size])

View File

@ -78,7 +78,7 @@ class GatherTreeTest(test.TestCase):
sequence_length = [[3, 3, 3]]
expected_result = _transpose_batch_time(
[[[2, -1, 2], [6, 5, 6], [7, 8, 9], [-1, -1, -1]]])
with ops.device("/gpu:0"):
with ops.device("/device:GPU:0"):
beams = beam_search_ops.gather_tree(
step_ids=step_ids, parent_ids=parent_ids,
sequence_length=sequence_length)

View File

@ -22,7 +22,7 @@ limitations under the License.
// Device names
// * Every Device should have a unique name with the format:
// /job:___/replica:___/task:___/(gpu|cpu):___
// An example name would be "/job:train/replica:0/task:3/gpu:2".
// An example name would be "/job:train/replica:0/task:3/device:GPU:2".
// * Task numbers are within the specified replica, so there are as
// many "task zeros" as replicas.

View File

@ -476,7 +476,7 @@ TEST(DirectSessionTest, PlacePrunedGraph) {
vx.scalar<float>()() = 1.0;
Node* x = test::graph::Constant(&g, vx);
Node* y = test::graph::Unary(&g, "Darth", x);
y->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0");
y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0");
GraphDef def;
test::graph::ToGraphDef(&g, &def);
@ -494,7 +494,7 @@ TEST(DirectSessionTest, PlacePrunedGraph) {
vx.scalar<float>()() = 1.0;
Node* x = test::graph::Constant(&g, vx);
Node* y = test::graph::Unary(&g, "Darth", x);
y->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0");
y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0");
GraphDef def;
test::graph::ToGraphDef(&g, &def);

View File

@ -154,14 +154,14 @@ static void TestHWAccelerator(bool enableHWTrace) {
Tensor x_tensor(DT_FLOAT, TensorShape({2, 1}));
test::FillValues<float>(&x_tensor, {1, 1});
Node* x = test::graph::Constant(&graph, x_tensor);
x->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0");
x->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0");
#ifdef TENSORFLOW_USE_SYCL
x->set_assigned_device_name("/job:localhost/replica:0/task:0/device:SYCL:0");
#endif // TENSORFLOW_USE_SYCL
// y = A * x
Node* y = test::graph::Matmul(&graph, a, x, false, false);
y->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0");
y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0");
#ifdef TENSORFLOW_USE_SYCL
y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:SYCL:0");
#endif // TENSORFLOW_USE_SYCL

View File

@ -588,7 +588,7 @@ Status BaseGPUDeviceFactory::CreateDevices(const SessionOptions& options,
for (int i = 0; i < n; i++) {
BaseGPUDevice* gpu_device;
TF_RETURN_IF_ERROR(CreateGPUDevice(options,
strings::StrCat(name_prefix, "/gpu:", i),
strings::StrCat(name_prefix, "/device:GPU:", i),
valid_gpu_ids[i], &gpu_device));
TF_RETURN_IF_ERROR(gpu_device->Init(options));
devices->push_back(gpu_device);
@ -1049,7 +1049,7 @@ Status BaseGPUDeviceFactory::GetValidDeviceIds(
size_t new_id = ids->size();
ids->push_back(visible_gpu_id);
LOG(INFO) << "Creating TensorFlow device (/gpu:" << new_id << ") -> "
LOG(INFO) << "Creating TensorFlow device (/device:GPU:" << new_id << ") -> "
<< "(" << GetShortDeviceDescription(visible_gpu_id, desc) << ")";
}

View File

@ -141,7 +141,7 @@ class BaseGPUDeviceFactory : public DeviceFactory {
Allocator* cpu_allocator) = 0;
// Returns into 'ids' the list of valid GPU ids, in the order that
// they should map to logical gpu ids "/gpu:0", "/gpu:1", etc, based
// they should map to logical gpu ids "/device:GPU:0", "/device:GPU:1", etc, based
// upon 'visible_device_list', a comma-separated list of 'visible
// gpu ids'.
Status GetValidDeviceIds(const string& visible_device_list,

View File

@ -106,9 +106,9 @@ TEST_F(GpuStreamUtilTest, SimpleGraphManyStreams) {
TEST_F(GpuStreamUtilTest, StreamOverrides) {
auto root = Scope::NewRootScope().ExitOnError();
ops::_Recv(root.WithOpName("input"), DT_FLOAT, "input", "/cpu:0", 0,
"/gpu:0");
"/device:GPU:0");
Output n = ops::MatMul(root, {}, {});
ops::_Send(root.WithOpName("output"), n, "output", "/gpu:0", 0, "/cpu:0");
ops::_Send(root.WithOpName("output"), n, "output", "/device:GPU:0", 0, "/cpu:0");
Graph g(OpRegistry::Global());
TF_ASSERT_OK(root.ToGraph(&g));

View File

@ -53,7 +53,7 @@ TEST(MemoryTypeChecker, Int32NotOk) {
EXPECT_TRUE(errors::IsInternal(ValidateMemoryTypes(DEVICE_GPU, g)));
// But we can insert _HostSend/_HostRecv to ensure the invariant.
TF_EXPECT_OK(EnsureMemoryTypes(DEVICE_GPU, "/gpu:0", g));
TF_EXPECT_OK(EnsureMemoryTypes(DEVICE_GPU, "/device:GPU:0", g));
TF_EXPECT_OK(ValidateMemoryTypes(DEVICE_GPU, g));
#endif // GOOGLE_CUDA
#ifdef TENSORFLOW_USE_SYCL

View File

@ -86,7 +86,7 @@ void DebugGateway::CopyTensor(const string& node_name, const int output_slot,
// Determine if the tensor is on device (GPU) or host (CPU).
// The second part of the check is necessary because even an OpKernel on
// may have output tensors allocated on CPU.
if ((device->name().find("gpu:") != string::npos || device->name().find("SYCL:") != string::npos) &&
if ((device->name().find("GPU:") != string::npos || device->name().find("SYCL:") != string::npos) &&
!ctx->output_alloc_attr(output_slot).on_host()) {
// GPU tensors: Copy it to host (CPU).
DeviceContext* device_ctxt = ctx->op_device_context();

View File

@ -47,7 +47,7 @@ class SessionDebugMinusAXTest : public ::testing::Test {
Graph graph(OpRegistry::Global());
#if GOOGLE_CUDA
const string kDeviceName = "/job:localhost/replica:0/task:0/gpu:0";
const string kDeviceName = "/job:localhost/replica:0/task:0/device:GPU:0";
#elif defined(TENSORFLOW_USE_SYCL)
const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0";
#else
@ -505,7 +505,7 @@ class SessionDebugOutputSlotWithoutOngoingEdgeTest : public ::testing::Test {
Graph graph(OpRegistry::Global());
#if GOOGLE_CUDA
const string kDeviceName = "/job:localhost/replica:0/task:0/gpu:0";
const string kDeviceName = "/job:localhost/replica:0/task:0/device:GPU:0";
#elif defined(TENSORFLOW_USE_SYCL)
const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0";
#else
@ -607,7 +607,7 @@ class SessionDebugVariableTest : public ::testing::Test {
Graph graph(OpRegistry::Global());
#if GOOGLE_CUDA
const string kDeviceName = "/job:localhost/replica:0/task:0/gpu:0";
const string kDeviceName = "/job:localhost/replica:0/task:0/device:GPU:0";
#elif defined(TENSORFLOW_USE_SYCL)
const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0";
#else
@ -879,7 +879,7 @@ class SessionDebugGPUSwitchTest : public ::testing::Test {
Graph graph(OpRegistry::Global());
#ifdef GOOGLE_CUDA
const string kDeviceName = "/job:localhost/replica:0/task:0/gpu:0";
const string kDeviceName = "/job:localhost/replica:0/task:0/device:GPU:0";
#elif TENSORFLOW_USE_SYCL
const string kDeviceName = "/job:localhost/replica:0/task:0/device:SYCL:0";
#endif

View File

@ -51,14 +51,14 @@ class DebugIOUtilsTest : public ::testing::Test {
};
TEST_F(DebugIOUtilsTest, ConstructDebugNodeKey) {
DebugNodeKey debug_node_key("/job:worker/replica:1/task:0/gpu:2",
DebugNodeKey debug_node_key("/job:worker/replica:1/task:0/device:GPU:2",
"hidden_1/MatMul", 0, "DebugIdentity");
EXPECT_EQ("/job:worker/replica:1/task:0/gpu:2", debug_node_key.device_name);
EXPECT_EQ("/job:worker/replica:1/task:0/device:GPU:2", debug_node_key.device_name);
EXPECT_EQ("hidden_1/MatMul", debug_node_key.node_name);
EXPECT_EQ(0, debug_node_key.output_slot);
EXPECT_EQ("DebugIdentity", debug_node_key.debug_op);
EXPECT_EQ("hidden_1/MatMul:0:DebugIdentity", debug_node_key.debug_node_name);
EXPECT_EQ("_tfdbg_device_,job_worker,replica_1,task_0,gpu_2",
EXPECT_EQ("_tfdbg_device_,job_worker,replica_1,task_0,device_GPU_2",
debug_node_key.device_path);
}

View File

@ -140,7 +140,7 @@ Rendezvous::ParsedKey Key(const string& sender, const uint64 incarnation,
}
#define ALICE "/job:j/replica:0/task:0/cpu:0"
#define BOB "/job:j/replica:0/task:0/gpu:0"
#define BOB "/job:j/replica:0/task:0/device:GPU:0"
TEST_F(ExecutorTest, SimpleAdd) {
// c = a + b

View File

@ -31,9 +31,9 @@ TEST(GrpcChannelTest, IsSameAddressSpace) {
EXPECT_TRUE(IsSameAddrSp("/job:mnist/replica:10/task:10/cpu:0",
"/job:mnist/replica:10/task:10/cpu:1"));
EXPECT_TRUE(IsSameAddrSp("/job:mnist/replica:10/task:10/cpu:0",
"/job:mnist/replica:10/task:10/gpu:2"));
"/job:mnist/replica:10/task:10/device:GPU:2"));
EXPECT_TRUE(IsSameAddrSp("/job:mnist/replica:10/task:10",
"/job:mnist/replica:10/task:10/gpu:2"));
"/job:mnist/replica:10/task:10/device:GPU:2"));
EXPECT_TRUE(IsSameAddrSp("/job:mnist/replica:10/task:10/cpu:1",
"/job:mnist/replica:10/task:10"));

View File

@ -38,8 +38,8 @@ message NodeDef {
// | ( ("gpu" | "cpu") ":" ([1-9][0-9]* | "*") )
//
// Valid values for this string include:
// * "/job:worker/replica:0/task:1/gpu:3" (full specification)
// * "/job:worker/gpu:3" (partial specification)
// * "/job:worker/replica:0/task:1/device:GPU:3" (full specification)
// * "/job:worker/device:GPU:3" (partial specification)
// * "" (no specification)
//
// If the constraints do not resolve to a single device (or if this

View File

@ -39,11 +39,11 @@ namespace {
TEST(RendezvousTest, Key) {
const string key = Rendezvous::CreateKey(
"/job:mnist/replica:1/task:2/CPU:0", 7890,
"/job:mnist/replica:1/task:2/GPU:0", "var0", FrameAndIter(0, 0));
"/job:mnist/replica:1/task:2/device:GPU:0", "var0", FrameAndIter(0, 0));
EXPECT_EQ(key,
"/job:mnist/replica:1/task:2/CPU:0;"
"0000000000001ed2;" // 7890 = 0x1ed2
"/job:mnist/replica:1/task:2/GPU:0;"
"/job:mnist/replica:1/task:2/device:GPU:0;"
"var0;"
"0:0");
Rendezvous::ParsedKey parsed;
@ -51,12 +51,12 @@ TEST(RendezvousTest, Key) {
EXPECT_EQ(parsed.src_device, "/job:mnist/replica:1/task:2/CPU:0");
EXPECT_EQ(parsed.src_incarnation, 7890);
EXPECT_EQ(parsed.src.type, "CPU");
EXPECT_EQ(parsed.dst_device, "/job:mnist/replica:1/task:2/GPU:0");
EXPECT_EQ(parsed.dst_device, "/job:mnist/replica:1/task:2/device:GPU:0");
EXPECT_EQ(parsed.dst.type, "GPU");
EXPECT_FALSE(Rendezvous::ParseKey("foo;bar;baz", &parsed).ok());
EXPECT_FALSE(Rendezvous::ParseKey("/job:mnist/replica:1/task:2/CPU:0;"
"/job:mnist/replica:1/task:2/GPU:0;",
"/job:mnist/replica:1/task:2/device:GPU:0;",
&parsed)
.ok());
EXPECT_FALSE(
@ -99,7 +99,7 @@ string V(const Tensor& tensor) {
Rendezvous::ParsedKey MakeKey(const string& name) {
string s = Rendezvous::CreateKey("/job:mnist/replica:1/task:2/CPU:0", 7890,
"/job:mnist/replica:1/task:2/GPU:0", name,
"/job:mnist/replica:1/task:2/device:GPU:0", name,
FrameAndIter(0, 0));
Rendezvous::ParsedKey k;
TF_EXPECT_OK(Rendezvous::ParseKey(s, &k));

View File

@ -50,7 +50,7 @@ extern Status TopologicalSortNodesWithTimePriority(
namespace {
const char gpu_device[] = "/job:a/replica:0/task:0/gpu:0";
const char gpu_device[] = "/job:a/replica:0/task:0/device:GPU:0";
string SplitByDevice(const Node* node) { return node->assigned_device_name(); }

View File

@ -40,7 +40,7 @@ namespace tensorflow {
namespace {
const char kCPUDevice[] = "/job:a/replica:0/task:0/cpu:0";
const char kGPUDevice[] = "/job:a/replica:0/task:0/gpu:0";
const char kGPUDevice[] = "/job:a/replica:0/task:0/device:GPU:0";
static void InitGraph(const string& s, Graph* graph,
const string& device = kCPUDevice) {

View File

@ -89,7 +89,7 @@ Status SingleMachine::Provision() {
VLOG(1) << "Number of GPUs: " << num_gpus_;
for (int i = 0; i < num_gpus_; ++i) {
string device_name =
strings::StrCat("/job:localhost/replica:0/task:0/gpu:", i);
strings::StrCat("/job:localhost/replica:0/task:0/device:GPU:", i);
VLOG(1) << "Adding GPU device " << device_name;
devices_[device_name] = GetLocalGPUInfo(i);
}

View File

@ -42,7 +42,7 @@ class AnalyticalCostEstimatorTest : public ::testing::Test {
gpu_device.set_frequency(1100);
gpu_device.set_bandwidth(180 * 1024 * 1024);
(*gpu_device.mutable_environment())["architecture"] = "6";
devices["/job:localhost/replica:0/task:0/gpu:0"] = gpu_device;
devices["/job:localhost/replica:0/task:0/device:GPU:0"] = gpu_device;
cluster_.reset(new VirtualCluster(devices));
}

View File

@ -30,14 +30,14 @@ TEST(VirtualPlacerTest, LocalDevices) {
devices["/job:localhost/replica:0/task:0/cpu:0"] = cpu_device;
DeviceProperties gpu_device;
gpu_device.set_type("GPU");
devices["/job:localhost/replica:0/task:0/gpu:0"] = gpu_device;
devices["/job:localhost/replica:0/task:0/device:GPU:0"] = gpu_device;
VirtualCluster cluster(devices);
VirtualPlacer placer(&cluster);
NodeDef node;
node.set_op("Conv2D");
EXPECT_EQ("GPU", placer.get_device(node).type());
EXPECT_EQ("/job:localhost/replica:0/task:0/gpu:0",
EXPECT_EQ("/job:localhost/replica:0/task:0/device:GPU:0",
placer.get_canonical_device_name(node));
node.set_device("CPU");
@ -47,7 +47,7 @@ TEST(VirtualPlacerTest, LocalDevices) {
node.set_device("GPU:0");
EXPECT_EQ("GPU", placer.get_device(node).type());
EXPECT_EQ("/job:localhost/replica:0/task:0/gpu:0",
EXPECT_EQ("/job:localhost/replica:0/task:0/device:GPU:0",
placer.get_canonical_device_name(node));
}
@ -60,7 +60,7 @@ TEST(VirtualPlacerTest, EmptyJobBecomesLocalhost) {
devices["/job:localhost/replica:0/task:0/cpu:0"] = cpu_device;
DeviceProperties gpu_device;
gpu_device.set_type("GPU");
devices["/job:localhost/replica:0/task:0/gpu:0"] = gpu_device;
devices["/job:localhost/replica:0/task:0/device:GPU:0"] = gpu_device;
VirtualCluster cluster(devices);
VirtualPlacer placer(&cluster);
@ -70,7 +70,7 @@ TEST(VirtualPlacerTest, EmptyJobBecomesLocalhost) {
EXPECT_EQ("/job:localhost/replica:0/task:0/cpu:0",
placer.get_canonical_device_name(node));
node.set_device("/device:GPU:0");
EXPECT_EQ("/job:localhost/replica:0/task:0/gpu:0",
EXPECT_EQ("/job:localhost/replica:0/task:0/device:GPU:0",
placer.get_canonical_device_name(node));
}
@ -113,7 +113,7 @@ TEST(VirtualPlacerTest, RemoteDevices) {
devices["/job:my_job/replica:0/task:0/cpu:0"] = cpu_device;
DeviceProperties gpu_device;
gpu_device.set_type("GPU");
devices["/job:my_job/replica:0/task:0/gpu:0"] = gpu_device;
devices["/job:my_job/replica:0/task:0/device:GPU:0"] = gpu_device;
VirtualCluster cluster(devices);
VirtualPlacer placer(&cluster);
@ -122,7 +122,7 @@ TEST(VirtualPlacerTest, RemoteDevices) {
// Device falls back to GPU.
EXPECT_EQ("GPU", placer.get_device(node).type());
EXPECT_EQ("/job:my_job/replica:0/task:0/gpu:0",
EXPECT_EQ("/job:my_job/replica:0/task:0/device:GPU:0",
placer.get_canonical_device_name(node));
node.set_device("/job:my_job/replica:0/task:0/cpu:0");
@ -130,27 +130,27 @@ TEST(VirtualPlacerTest, RemoteDevices) {
EXPECT_EQ("/job:my_job/replica:0/task:0/cpu:0",
placer.get_canonical_device_name(node));
node.set_device("/job:my_job/replica:0/task:0/gpu:0");
node.set_device("/job:my_job/replica:0/task:0/device:GPU:0");
EXPECT_EQ("GPU", placer.get_device(node).type());
EXPECT_EQ("/job:my_job/replica:0/task:0/gpu:0",
EXPECT_EQ("/job:my_job/replica:0/task:0/device:GPU:0",
placer.get_canonical_device_name(node));
// There is no local cpu available. Device falls back to GPU.
node.set_device("CPU");
EXPECT_EQ("GPU", placer.get_device(node).type());
EXPECT_EQ("/job:my_job/replica:0/task:0/gpu:0",
EXPECT_EQ("/job:my_job/replica:0/task:0/device:GPU:0",
placer.get_canonical_device_name(node));
node.set_device("GPU:0");
// There is no local GPU available. Fall back to default GPU.
EXPECT_EQ("GPU", placer.get_device(node).type());
EXPECT_EQ("/job:my_job/replica:0/task:0/gpu:0",
EXPECT_EQ("/job:my_job/replica:0/task:0/device:GPU:0",
placer.get_canonical_device_name(node));
// This isn't a valid name. Fall back to GPU.
node.set_device("/job:my_job/replica:0/task:0");
EXPECT_EQ("GPU", placer.get_device(node).type());
EXPECT_EQ("/job:my_job/replica:0/task:0/gpu:0",
EXPECT_EQ("/job:my_job/replica:0/task:0/device:GPU:0",
placer.get_canonical_device_name(node));
}

View File

@ -320,14 +320,14 @@ TEST_F(ModelPrunerTest, PruningPerservesCrossDeviceIdentity) {
Output c = ops::Const(s.WithOpName("c").WithDevice("/cpu:0"), 0.0f, {10, 10});
// Node i1 should be preserved.
Output i1 = ops::Identity(s.WithOpName("i1").WithDevice("/gpu:0"), c);
Output a1 = ops::Sqrt(s.WithOpName("a1").WithDevice("/gpu:0"), {i1});
Output a2 = ops::Sqrt(s.WithOpName("a2").WithDevice("/gpu:0"), {i1});
Output i1 = ops::Identity(s.WithOpName("i1").WithDevice("/device:GPU:0"), c);
Output a1 = ops::Sqrt(s.WithOpName("a1").WithDevice("/device:GPU:0"), {i1});
Output a2 = ops::Sqrt(s.WithOpName("a2").WithDevice("/device:GPU:0"), {i1});
// Node i2 should be pruned since it resides on the sender's device.
Output i2 = ops::Identity(s.WithOpName("i2").WithDevice("/cpu:0"), c);
Output a3 = ops::Sqrt(s.WithOpName("a3").WithDevice("/gpu:0"), {i2});
Output a4 = ops::Sqrt(s.WithOpName("a4").WithDevice("/gpu:0"), {i2});
Output a3 = ops::Sqrt(s.WithOpName("a3").WithDevice("/device:GPU:0"), {i2});
Output a4 = ops::Sqrt(s.WithOpName("a4").WithDevice("/device:GPU:0"), {i2});
GrapplerItem item;
TF_CHECK_OK(s.ToGraphDef(&item.graph));

View File

@ -579,8 +579,8 @@ Status GPUTracerImpl::Collect(StepStatsCollector *collector) {
// TODO(pbar) Handle device IDs and prefix properly.
const string prefix = "";
const int id = 0;
const string stream_device = strings::StrCat(prefix, "/gpu:", id, "/stream:");
const string memcpy_device = strings::StrCat(prefix, "/gpu:", id, "/memcpy");
const string stream_device = strings::StrCat(prefix, "/device:GPU:", id, "/stream:");
const string memcpy_device = strings::StrCat(prefix, "/device:GPU:", id, "/memcpy");
mutex_lock l2(trace_mu_);
for (const auto &rec : kernel_records_) {

View File

@ -63,12 +63,12 @@ class GPUTracerTest : public ::testing::Test {
Tensor x_tensor(DT_FLOAT, TensorShape({2, 1}));
test::FillValues<float>(&x_tensor, {1, 1});
Node* x = test::graph::Constant(&graph, x_tensor);
x->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0");
x->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0");
x_ = x->name();
// y = A * x
Node* y = test::graph::Matmul(&graph, a, x, false, false);
y->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0");
y->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0");
y_ = y->name();
// Use an Identity op to force a memcpy to CPU and back to GPU.
@ -77,7 +77,7 @@ class GPUTracerTest : public ::testing::Test {
Node* y_neg = test::graph::Unary(&graph, "Neg", i);
y_neg_ = y_neg->name();
y_neg->set_assigned_device_name("/job:localhost/replica:0/task:0/gpu:0");
y_neg->set_assigned_device_name("/job:localhost/replica:0/task:0/device:GPU:0");
test::graph::ToGraphDef(&graph, &def_);
}

View File

@ -127,10 +127,10 @@ tfprof> advise
Not running under xxxx. Skip JobChecker.
AcceleratorUtilizationChecker:
device: /job:worker/replica:0/task:0/gpu:0 low utilization: 0.03
device: /job:worker/replica:0/task:0/gpu:1 low utilization: 0.08
device: /job:worker/replica:0/task:0/gpu:2 low utilization: 0.04
device: /job:worker/replica:0/task:0/gpu:3 low utilization: 0.21
device: /job:worker/replica:0/task:0/device:GPU:0 low utilization: 0.03
device: /job:worker/replica:0/task:0/device:GPU:1 low utilization: 0.08
device: /job:worker/replica:0/task:0/device:GPU:2 low utilization: 0.04
device: /job:worker/replica:0/task:0/device:GPU:3 low utilization: 0.21
OperationChecker:
Found operation using NHWC data_format on GPU. Maybe NCHW is faster.

View File

@ -31,10 +31,10 @@ tfprof --graph_path=graph.pbtxt \
tfprof> advise
AcceleratorUtilizationChecker:
device: /job:worker/replica:0/task:0/gpu:0 low utilization: 0.03
device: /job:worker/replica:0/task:0/gpu:1 low utilization: 0.08
device: /job:worker/replica:0/task:0/gpu:2 low utilization: 0.04
device: /job:worker/replica:0/task:0/gpu:3 low utilization: 0.21
device: /job:worker/replica:0/task:0/device:GPU:0 low utilization: 0.03
device: /job:worker/replica:0/task:0/device:GPU:1 low utilization: 0.08
device: /job:worker/replica:0/task:0/device:GPU:2 low utilization: 0.04
device: /job:worker/replica:0/task:0/device:GPU:3 low utilization: 0.21
OperationChecker:
Found operation using NHWC data_format on GPU. Maybe NCHW is faster.

View File

@ -134,7 +134,7 @@ AddN 50.10ms (17.33%, 1.34%), 5481
tfprof> op -select micros,device -order_by micros
node name | execution time | assigned devices
SoftmaxCrossEntropyWithLogits 1.37sec (100.00%, 36.44%), /job:worker/replica:0/task:0/cpu:0
MatMul 618.97ms (63.56%, 16.51%), |/job:worker/replica:0/task:0/cpu:0|/job:worker/replica:0/task:0/gpu:0|/job:worker/replica:0/task:0/gpu:1|/job:worker/replica:0/task:0/gpu:2|/job:worker/replica:0/task:0/gpu:3
MatMul 618.97ms (63.56%, 16.51%), |/job:worker/replica:0/task:0/cpu:0|/job:worker/replica:0/task:0/device:GPU:0|/job:worker/replica:0/task:0/device:GPU:1|/job:worker/replica:0/task:0/device:GPU:2|/job:worker/replica:0/task:0/device:GPU:3
```

View File

@ -53,10 +53,10 @@ class TFProfAdvisorTest : public ::testing::Test {
NodeExecStats node_stat;
node_stat.set_all_start_micros(start_miros);
node_stat.set_op_end_rel_micros(end_rel_micros);
node->AddStepStat(step, "/job:localhost/replica:0/task:0/gpu:0", node_stat);
node->AddStepStat(step, "/job:localhost/replica:0/task:0/gpu:0:stream:all",
node->AddStepStat(step, "/job:localhost/replica:0/task:0/device:GPU:0", node_stat);
node->AddStepStat(step, "/job:localhost/replica:0/task:0/device:GPU:0:stream:all",
node_stat);
node->AddStepStat(step, "/job:localhost/replica:0/task:0/gpu:0:stream:0",
node->AddStepStat(step, "/job:localhost/replica:0/task:0/device:GPU:0:stream:0",
node_stat);
return node;
}

View File

@ -25,7 +25,7 @@ bool CountAsAcceleratorTime(const string& device) {
}
bool CountAsCPUTime(const string& device) {
return RE2::FullMatch(device, ".*/(gpu|cpu|device:sycl):\\d+");
return RE2::FullMatch(device, ".*/(device:gpu|gpu|cpu|device:sycl):\\d+");
}
bool IsCanonicalDevice(const string& device) { return CountAsCPUTime(device); }
@ -143,7 +143,7 @@ void TFGraphNode::AddStepStat(int64 step, const string& device,
// TODO(xpan): Make this more robust?
// See run_metadata_test.py
// It can be /job:0/replica:0/xxxx/gpu:0, or simply /gpu:0.
// It can be /job:0/replica:0/xxxx/device:GPU:0, or simply /device:GPU:0.
// It can has some ad-hoc suffix, such as /stream:xx or /memcpy:xx.
if (IsCanonicalDevice(dev)) {
if (!canonical_device_.empty()) {

View File

@ -42,7 +42,7 @@ message GPUOptions {
// A comma-separated list of GPU ids that determines the 'visible'
// to 'virtual' mapping of GPU devices. For example, if TensorFlow
// can see 8 GPU devices in the process, and one wanted to map
// visible GPU devices 5 and 3 as "/gpu:0", and "/gpu:1", then one
// visible GPU devices 5 and 3 as "/device:GPU:0", and "/device:GPU:1", then one
// would specify this field as "5,3". This field is similar in
// spirit to the CUDA_VISIBLE_DEVICES environment variable, except
// it applies to the visible GPU devices in the process.

View File

@ -76,21 +76,21 @@ TEST(DeviceNameUtilsTest, Basic) {
DeviceNameUtils::ParsedName p;
EXPECT_FALSE(DeviceNameUtils::ParseFullName("foobar", &p));
EXPECT_FALSE(
DeviceNameUtils::ParseFullName("/job:123/replica:1/task:2/gpu:3", &p));
DeviceNameUtils::ParseFullName("/job:123/replica:1/task:2/device:GPU:3", &p));
EXPECT_FALSE(
DeviceNameUtils::ParseFullName("/job:123/replica:1/task:2/gpu:", &p));
EXPECT_FALSE(DeviceNameUtils::ParseFullName(
"/job:123/replica:1/task:2/device:gpu:", &p));
EXPECT_FALSE(
DeviceNameUtils::ParseFullName("/job:foo/replica:-1/task:2/gpu:3", &p));
DeviceNameUtils::ParseFullName("/job:foo/replica:-1/task:2/device:GPU:3", &p));
EXPECT_FALSE(
DeviceNameUtils::ParseFullName("/job:foo/replica:1/task:-2/gpu:3", &p));
DeviceNameUtils::ParseFullName("/job:foo/replica:1/task:-2/device:GPU:3", &p));
EXPECT_FALSE(
DeviceNameUtils::ParseFullName("/job:foo/replica:1/task:2/bar:3", &p));
EXPECT_FALSE(DeviceNameUtils::ParseFullName(
"/job:foo/replica:1/task:2/gpu:3/extra", &p));
"/job:foo/replica:1/task:2/device:GPU:3/extra", &p));
EXPECT_TRUE(
DeviceNameUtils::ParseFullName("/job:foo/replica:1/task:2/gpu:3", &p));
DeviceNameUtils::ParseFullName("/job:foo/replica:1/task:2/device:GPU:3", &p));
EXPECT_TRUE(p.has_job);
EXPECT_TRUE(p.has_replica);
EXPECT_TRUE(p.has_task);
@ -106,7 +106,7 @@ TEST(DeviceNameUtilsTest, Basic) {
// Allow _ in job names.
DeviceNameUtils::ParsedName p;
EXPECT_TRUE(DeviceNameUtils::ParseFullName(
"/job:foo_bar/replica:1/task:2/gpu:3", &p));
"/job:foo_bar/replica:1/task:2/device:GPU:3", &p));
EXPECT_TRUE(p.has_job);
EXPECT_TRUE(p.has_replica);
EXPECT_TRUE(p.has_task);
@ -193,7 +193,7 @@ TEST(DeviceNameUtilsTest, Basic) {
}
{
DeviceNameUtils::ParsedName p;
EXPECT_TRUE(DeviceNameUtils::ParseFullName("/job:*/replica:4/gpu:5", &p));
EXPECT_TRUE(DeviceNameUtils::ParseFullName("/job:*/replica:4/device:GPU:5", &p));
EXPECT_FALSE(p.has_job);
EXPECT_TRUE(p.has_replica);
EXPECT_FALSE(p.has_task);
@ -216,13 +216,13 @@ TEST(DeviceNameUtilsTest, Basic) {
}
EXPECT_TRUE(DeviceNameUtils::IsSameAddressSpace(
"/job:foo/replica:1/task:2/cpu:3", "/job:foo/replica:1/task:2/gpu:4"));
"/job:foo/replica:1/task:2/cpu:3", "/job:foo/replica:1/task:2/device:GPU:4"));
EXPECT_FALSE(DeviceNameUtils::IsSameAddressSpace(
"/job:foo/replica:1/task:2/cpu:3", "/job:foo/replica:1/task:3/gpu:4"));
"/job:foo/replica:1/task:2/cpu:3", "/job:foo/replica:1/task:3/device:GPU:4"));
EXPECT_FALSE(DeviceNameUtils::IsSameAddressSpace(
"/job:foo/replica:1/task:2/cpu:3", "/job:foo/replica:10/task:2/gpu:4"));
"/job:foo/replica:1/task:2/cpu:3", "/job:foo/replica:10/task:2/device:GPU:4"));
EXPECT_FALSE(DeviceNameUtils::IsSameAddressSpace(
"/job:foo/replica:1/task:2/cpu:3", "/job:bar/replica:1/task:2/gpu:4"));
"/job:foo/replica:1/task:2/cpu:3", "/job:bar/replica:1/task:2/device:GPU:4"));
EXPECT_EQ(DeviceNameUtils::LocalName("CPU", 1), "CPU:1");
EXPECT_EQ(DeviceNameUtils::LocalName("GPU", 2), "GPU:2");
@ -284,17 +284,17 @@ static bool IsCSHelper(StringPiece pattern, StringPiece actual) {
}
TEST(DeviceNameUtilsTest, IsCompleteSpecification) {
EXPECT_TRUE(IsCSHelper("/job:*", "/job:work/replica:1/task:2/gpu:3"));
EXPECT_TRUE(IsCSHelper("/job:*", "/job:work/replica:1/task:2/device:GPU:3"));
EXPECT_TRUE(
IsCSHelper("/job:*/replica:*", "/job:work/replica:1/task:2/gpu:3"));
EXPECT_TRUE(IsCSHelper("/job:*/task:*", "/job:work/replica:1/task:2/gpu:3"));
IsCSHelper("/job:*/replica:*", "/job:work/replica:1/task:2/device:GPU:3"));
EXPECT_TRUE(IsCSHelper("/job:*/task:*", "/job:work/replica:1/task:2/device:GPU:3"));
EXPECT_TRUE(IsCSHelper("/job:*/replica:*/task:*",
"/job:work/replica:1/task:2/gpu:3"));
"/job:work/replica:1/task:2/device:GPU:3"));
EXPECT_TRUE(
IsCSHelper("/job:*/replica:*/gpu:*", "/job:work/replica:1/task:2/gpu:3"));
EXPECT_FALSE(IsCSHelper("/cpu:*", "/job:worker/replica:1/task:2/gpu:3"));
EXPECT_FALSE(IsCSHelper("/gpu:2", "/job:worker/replica:1/task:2/gpu:1"));
EXPECT_TRUE(IsCSHelper("/gpu:*", "/job:worker/replica:1/task:2/gpu:3"));
IsCSHelper("/job:*/replica:*/gpu:*", "/job:work/replica:1/task:2/device:GPU:3"));
EXPECT_FALSE(IsCSHelper("/cpu:*", "/job:worker/replica:1/task:2/device:GPU:3"));
EXPECT_FALSE(IsCSHelper("/device:GPU:2", "/job:worker/replica:1/task:2/device:GPU:1"));
EXPECT_TRUE(IsCSHelper("/gpu:*", "/job:worker/replica:1/task:2/device:GPU:3"));
}
static bool IsSpecHelper(StringPiece pattern, StringPiece actual) {
@ -305,36 +305,36 @@ static bool IsSpecHelper(StringPiece pattern, StringPiece actual) {
}
TEST(DeviceNameUtilsTest, IsSpecification) {
EXPECT_TRUE(IsSpecHelper("/job:*", "/job:work/replica:1/task:2/gpu:3"));
EXPECT_TRUE(IsSpecHelper("/job:*", "/job:work/replica:1/gpu:3"));
EXPECT_TRUE(IsSpecHelper("/job:*", "/job:work/replica:1/task:2/device:GPU:3"));
EXPECT_TRUE(IsSpecHelper("/job:*", "/job:work/replica:1/device:GPU:3"));
EXPECT_TRUE(IsSpecHelper("/job:*", "/job:work/replica:1"));
EXPECT_TRUE(IsSpecHelper("/job:*", "/replica:1"));
EXPECT_TRUE(IsSpecHelper("/job:*", "/job:work"));
EXPECT_TRUE(
IsSpecHelper("/job:*/replica:*", "/job:work/replica:1/task:2/gpu:3"));
IsSpecHelper("/job:*/replica:*", "/job:work/replica:1/task:2/device:GPU:3"));
EXPECT_TRUE(IsSpecHelper("/job:work/replica:1/gpu:*",
"/job:work/replica:1/task:2/gpu:3"));
EXPECT_TRUE(IsSpecHelper("/job:work/replica:1/gpu:3",
"/job:work/replica:1/task:2/gpu:3"));
"/job:work/replica:1/task:2/device:GPU:3"));
EXPECT_TRUE(IsSpecHelper("/job:work/replica:1/device:GPU:3",
"/job:work/replica:1/task:2/device:GPU:3"));
EXPECT_TRUE(IsSpecHelper("/job:work/replica:1/task:2",
"/job:work/replica:1/task:2/gpu:3"));
"/job:work/replica:1/task:2/device:GPU:3"));
EXPECT_TRUE(IsSpecHelper("/job:work/replica:*/task:2",
"/job:work/replica:1/task:2/gpu:3"));
EXPECT_TRUE(IsSpecHelper("/task:*", "/job:*/replica:1/task:2/gpu:3"));
EXPECT_TRUE(IsSpecHelper("/task:2", "/job:*/replica:1/task:2/gpu:3"));
"/job:work/replica:1/task:2/device:GPU:3"));
EXPECT_TRUE(IsSpecHelper("/task:*", "/job:*/replica:1/task:2/device:GPU:3"));
EXPECT_TRUE(IsSpecHelper("/task:2", "/job:*/replica:1/task:2/device:GPU:3"));
EXPECT_TRUE(IsSpecHelper("/cpu:*", "/job:*/replica:1/task:2/cpu:1"));
EXPECT_TRUE(IsSpecHelper("/cpu:0", "/cpu:0"));
EXPECT_TRUE(IsSpecHelper("/gpu:*", "/job:worker/replica:1/task:2/gpu:3"));
EXPECT_TRUE(IsSpecHelper("/gpu:*", "/job:worker/replica:1/task:2/device:GPU:3"));
EXPECT_FALSE(IsSpecHelper("/job:worker/replica:1/task:2/gpu:3", "/gpu:*"));
EXPECT_FALSE(IsSpecHelper("/job:worker/replica:1/task:2/device:GPU:3", "/gpu:*"));
EXPECT_FALSE(IsSpecHelper("/cpu:*", "/job:*/replica:1/task:2"));
EXPECT_FALSE(IsSpecHelper("/cpu:*", "/job:*/replica:1/task:2/gpu:1"));
EXPECT_FALSE(IsSpecHelper("/cpu:*", "/job:worker/replica:1/task:2/gpu:3"));
EXPECT_FALSE(IsSpecHelper("/gpu:2", "/job:worker/replica:1/task:2/gpu:1"));
EXPECT_FALSE(IsSpecHelper("/cpu:*", "/job:*/replica:1/task:2/device:GPU:1"));
EXPECT_FALSE(IsSpecHelper("/cpu:*", "/job:worker/replica:1/task:2/device:GPU:3"));
EXPECT_FALSE(IsSpecHelper("/device:GPU:2", "/job:worker/replica:1/task:2/device:GPU:1"));
EXPECT_FALSE(IsSpecHelper("/job:work/replica:*/task:0",
"/job:work/replica:1/task:2/gpu:3"));
"/job:work/replica:1/task:2/device:GPU:3"));
EXPECT_FALSE(IsSpecHelper("/job:work/replica:0/task:2",
"/job:work/replica:*/task:2/gpu:3"));
"/job:work/replica:*/task:2/device:GPU:3"));
}
TEST(DeviceNameUtilsTest, SplitDeviceName) {
@ -348,7 +348,7 @@ TEST(DeviceNameUtilsTest, SplitDeviceName) {
"/job:foo/cpu:1/task:2/replica:1", &task, &device));
EXPECT_EQ("/job:foo/replica:1/task:2", task);
EXPECT_EQ("CPU:1", device);
EXPECT_TRUE(DeviceNameUtils::SplitDeviceName("/gpu:3", &task, &device));
EXPECT_TRUE(DeviceNameUtils::SplitDeviceName("/device:GPU:3", &task, &device));
EXPECT_EQ("", task);
EXPECT_EQ("GPU:3", device);
EXPECT_FALSE(DeviceNameUtils::SplitDeviceName("gpu:3", &task, &device));
@ -413,11 +413,11 @@ TEST(DeviceNameUtilsTest, MergeDevNames) {
MergeDevNamesHelper("", "/job:foo", "/job:foo");
MergeDevNamesHelper("", "/replica:2", "/replica:2");
MergeDevNamesHelper("", "/task:7", "/task:7");
// MergeDevNamesHelper("", "/gpu:1", "/gpu:1");
// MergeDevNamesHelper("", "/device:GPU:1", "/device:GPU:1");
// Combining disjoint names.
MergeDevNamesHelper("/job:foo", "/task:7", "/job:foo/task:7");
MergeDevNamesHelper("/job:foo", "/gpu:1", "/job:foo/gpu:1");
MergeDevNamesHelper("/job:foo", "/device:GPU:1", "/job:foo/device:GPU:1");
// Combining overlapping names.
MergeDevNamesHelper("/job:foo/replica:0", "/replica:0/task:1",
@ -426,25 +426,25 @@ TEST(DeviceNameUtilsTest, MergeDevNames) {
// Wildcard tests.
MergeDevNamesHelper("", "/gpu:*", "/gpu:*");
MergeDevNamesHelper("/gpu:*", "/gpu:*", "/gpu:*");
MergeDevNamesHelper("/gpu:1", "/gpu:*", "/gpu:1");
MergeDevNamesHelper("/device:GPU:1", "/gpu:*", "/device:GPU:1");
// Incompatible components.
MergeDevNamesError("/job:foo", "/job:bar", "incompatible jobs");
MergeDevNamesError("/replica:0", "/replica:1", "incompatible replicas");
MergeDevNamesError("/task:0", "/task:1", "incompatible tasks");
MergeDevNamesError("/gpu:*", "/cpu:*", "incompatible types");
MergeDevNamesError("/gpu:0", "/gpu:1", "incompatible ids");
MergeDevNamesError("/device:GPU:0", "/device:GPU:1", "incompatible ids");
}
TEST(DeviceNameUtilsTest, MergeDevNamesAllowSoftPlacement) {
// Incompatible components with allow_soft_placement.
MergeDevNamesHelperAllowSoftPlacement("/gpu:*", "/cpu:1", "");
MergeDevNamesHelperAllowSoftPlacement("/cpu:*", "/gpu:1", "");
MergeDevNamesHelperAllowSoftPlacement("/gpu:1", "/gpu:2", "/gpu:*");
MergeDevNamesHelperAllowSoftPlacement("/cpu:*", "/device:GPU:1", "");
MergeDevNamesHelperAllowSoftPlacement("/device:GPU:1", "/device:GPU:2", "/device:GPU:*");
}
TEST(DeviceNameUtilsTest, GetNamesForDeviceMappings) {
DeviceNameUtils::ParsedName p = Name("/job:foo/replica:10/task:0/gpu:1");
DeviceNameUtils::ParsedName p = Name("/job:foo/replica:10/task:0/device:GPU:1");
EXPECT_EQ(str_util::Join(DeviceNameUtils::GetNamesForDeviceMappings(p), ","),
"/job:foo/replica:10/task:0/device:GPU:1,"
"/job:foo/replica:10/task:0/gpu:1");

View File

@ -73,12 +73,12 @@ other wrappers and the dynamic decoder described below. For example, one can
write:
```python
cell = tf.contrib.rnn.DeviceWrapper(LSTMCell(512), "/gpu:0")
cell = tf.contrib.rnn.DeviceWrapper(LSTMCell(512), "/device:GPU:0")
attention_mechanism = tf.contrib.seq2seq.LuongAttention(512, encoder_outputs)
attn_cell = tf.contrib.seq2seq.AttentionWrapper(
cell, attention_mechanism, attention_size=256)
attn_cell = tf.contrib.rnn.DeviceWrapper(attn_cell, "/gpu:1")
top_cell = tf.contrib.rnn.DeviceWrapper(LSTMCell(512), "/gpu:1")
attn_cell = tf.contrib.rnn.DeviceWrapper(attn_cell, "/device:GPU:1")
top_cell = tf.contrib.rnn.DeviceWrapper(LSTMCell(512), "/device:GPU:1")
multi_cell = MultiRNNCell([attn_cell, top_cell])
```

View File

@ -110,7 +110,7 @@ devices. For example, the following snippet creates a variable named `v` and
places it on the second GPU device:
``` python
with tf.device("/gpu:1"):
with tf.device("/device:GPU:1"):
v = tf.get_variable("v", [1])
```

View File

@ -411,7 +411,7 @@ the first tower are prepended with `tower_0`, e.g. `tower_0/conv1/Conv2D`.
* A preferred hardware device to run the operation within a tower.
@{tf.device} specifies this. For
instance, all operations in the first tower reside within `device('/gpu:0')`
instance, all operations in the first tower reside within `device('/device:GPU:0')`
scope indicating that they should be run on the first GPU.
All variables are pinned to the CPU and accessed via

View File

@ -7,8 +7,8 @@ supported device types are `CPU` and `GPU`. They are represented as `strings`.
For example:
* `"/cpu:0"`: The CPU of your machine.
* `"/gpu:0"`: The GPU of your machine, if you have one.
* `"/gpu:1"`: The second GPU of your machine, etc.
* `"/device:GPU:0"`: The GPU of your machine, if you have one.
* `"/device:GPU:1"`: The second GPU of your machine, etc.
If a TensorFlow operation has both CPU and GPU implementations, the GPU devices
will be given priority when the operation is assigned to a device. For example,
@ -35,11 +35,11 @@ You should see the following output:
```
Device mapping:
/job:localhost/replica:0/task:0/gpu:0 -> device: 0, name: Tesla K40c, pci bus
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: Tesla K40c, pci bus
id: 0000:05:00.0
b: /job:localhost/replica:0/task:0/gpu:0
a: /job:localhost/replica:0/task:0/gpu:0
MatMul: /job:localhost/replica:0/task:0/gpu:0
b: /job:localhost/replica:0/task:0/device:GPU:0
a: /job:localhost/replica:0/task:0/device:GPU:0
MatMul: /job:localhost/replica:0/task:0/device:GPU:0
[[ 22. 28.]
[ 49. 64.]]
@ -71,11 +71,11 @@ example) and automatically copy tensors between devices if required.
```
Device mapping:
/job:localhost/replica:0/task:0/gpu:0 -> device: 0, name: Tesla K40c, pci bus
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: Tesla K40c, pci bus
id: 0000:05:00.0
b: /job:localhost/replica:0/task:0/cpu:0
a: /job:localhost/replica:0/task:0/cpu:0
MatMul: /job:localhost/replica:0/task:0/gpu:0
MatMul: /job:localhost/replica:0/task:0/device:GPU:0
[[ 22. 28.]
[ 49. 64.]]
```
@ -127,7 +127,7 @@ to specify the preference explicitly:
```python
# Creates a graph.
with tf.device('/gpu:2'):
with tf.device('/device:GPU:2'):
a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
c = tf.matmul(a, b)
@ -142,9 +142,9 @@ If the device you have specified does not exist, you will get
```
InvalidArgumentError: Invalid argument: Cannot assign a device to node 'b':
Could not satisfy explicit device specification '/gpu:2'
Could not satisfy explicit device specification '/device:GPU:2'
[[Node: b = Const[dtype=DT_FLOAT, value=Tensor<type: float shape: [3,2]
values: 1 2 3...>, _device="/gpu:2"]()]]
values: 1 2 3...>, _device="/device:GPU:2"]()]]
```
If you would like TensorFlow to automatically choose an existing and supported
@ -154,7 +154,7 @@ the session.
```python
# Creates a graph.
with tf.device('/gpu:2'):
with tf.device('/device:GPU:2'):
a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3], name='a')
b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2], name='b')
c = tf.matmul(a, b)
@ -175,7 +175,7 @@ For example:
```
# Creates a graph.
c = []
for d in ['/gpu:2', '/gpu:3']:
for d in ['/device:GPU:2', '/device:GPU:3']:
with tf.device(d):
a = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[2, 3])
b = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], shape=[3, 2])
@ -192,20 +192,20 @@ You will see the following output.
```
Device mapping:
/job:localhost/replica:0/task:0/gpu:0 -> device: 0, name: Tesla K20m, pci bus
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: Tesla K20m, pci bus
id: 0000:02:00.0
/job:localhost/replica:0/task:0/gpu:1 -> device: 1, name: Tesla K20m, pci bus
/job:localhost/replica:0/task:0/device:GPU:1 -> device: 1, name: Tesla K20m, pci bus
id: 0000:03:00.0
/job:localhost/replica:0/task:0/gpu:2 -> device: 2, name: Tesla K20m, pci bus
/job:localhost/replica:0/task:0/device:GPU:2 -> device: 2, name: Tesla K20m, pci bus
id: 0000:83:00.0
/job:localhost/replica:0/task:0/gpu:3 -> device: 3, name: Tesla K20m, pci bus
/job:localhost/replica:0/task:0/device:GPU:3 -> device: 3, name: Tesla K20m, pci bus
id: 0000:84:00.0
Const_3: /job:localhost/replica:0/task:0/gpu:3
Const_2: /job:localhost/replica:0/task:0/gpu:3
MatMul_1: /job:localhost/replica:0/task:0/gpu:3
Const_1: /job:localhost/replica:0/task:0/gpu:2
Const: /job:localhost/replica:0/task:0/gpu:2
MatMul: /job:localhost/replica:0/task:0/gpu:2
Const_3: /job:localhost/replica:0/task:0/device:GPU:3
Const_2: /job:localhost/replica:0/task:0/device:GPU:3
MatMul_1: /job:localhost/replica:0/task:0/device:GPU:3
Const_1: /job:localhost/replica:0/task:0/device:GPU:2
Const: /job:localhost/replica:0/task:0/device:GPU:2
MatMul: /job:localhost/replica:0/task:0/device:GPU:2
AddN: /job:localhost/replica:0/task:0/cpu:0
[[ 44. 56.]
[ 98. 128.]]

View File

@ -47,12 +47,12 @@ def my_model(features, labels, mode):
# Create three fully connected layers respectively of size 10, 20, and 10 with
# each layer having a dropout probability of 0.1.
net = features[X_FEATURE]
with tf.device('/gpu:1'):
with tf.device('/device:GPU:1'):
for units in [10, 20, 10]:
net = tf.layers.dense(net, units=units, activation=tf.nn.relu)
net = tf.layers.dropout(net, rate=0.1)
with tf.device('/gpu:2'):
with tf.device('/device:GPU:2'):
# Compute logits (1 per class).
logits = tf.layers.dense(net, 3, activation=None)

View File

@ -173,7 +173,7 @@ class SessionClusterSpecPropagationTest(test_util.TensorFlowTestCase):
#
# W0718 17:14:41.521534 190121 device_mgr.cc:107] Unknown device:
# /job:worker/replica:0/task:0/device:CPU:0 all devices:
# /job:local/replica:0/task:0/gpu:0,
# /job:local/replica:0/task:0/device:GPU:0,
# /job:local/replica:0/task:0/device:GPU:0,
# /job:local/replica:0/task:0/cpu:1, CPU:0, GPU:0,
# /job:local/replica:0/task:0/device:CPU:1,
@ -198,7 +198,7 @@ class SessionClusterSpecPropagationTest(test_util.TensorFlowTestCase):
sum1 = input1 + input2
if test.is_gpu_available():
device_str = '/job:worker/task:0/gpu:0'
device_str = '/job:worker/task:0/device:GPU:0'
else:
device_str = '/job:worker/task:0/cpu:1'
with ops.device(device_str):

View File

@ -1124,7 +1124,7 @@ class SessionTest(test_util.TensorFlowTestCase):
# which is why placing this is invalid. If at some point
# GPU kernels are added to this test, some other different
# op / device combo should be chosen.
with ops.device('/gpu:0'):
with ops.device('/device:GPU:0'):
a = constant_op.constant(1.0, shape=[1, 2])
b = constant_op.constant(1.0, shape=[1, 2])
@ -1145,7 +1145,7 @@ class SessionTest(test_util.TensorFlowTestCase):
# which is why placing this is invalid. If at some point
# GPU kernels are added to this test, some other different
# op / device combo should be chosen.
with ops.device('/gpu:0'):
with ops.device('/device:GPU:0'):
_ = constant_op.constant(1.0, shape=[1, 2])
b = constant_op.constant(1.0, shape=[1, 2])
@ -1494,7 +1494,7 @@ class SessionTest(test_util.TensorFlowTestCase):
allow_soft_placement=True,
graph_options=config_pb2.GraphOptions(build_cost_model=100))
with session.Session(config=config) as sess:
with ops.device('/gpu:0'):
with ops.device('/device:GPU:0'):
a = array_ops.placeholder(dtypes.float32, shape=[])
b = math_ops.add(a, a)
c = array_ops.identity(b)

View File

@ -100,8 +100,8 @@ class TimelineTest(test.TestCase):
self.assertTrue(run_metadata.HasField('step_stats'))
step_stats = run_metadata.step_stats
devices = [d.device for d in step_stats.dev_stats]
self.assertTrue('/job:localhost/replica:0/task:0/gpu:0' in devices)
self.assertTrue('/gpu:0/stream:all' in devices)
self.assertTrue('/job:localhost/replica:0/task:0/device:GPU:0' in devices)
self.assertTrue('/device:GPU:0/stream:all' in devices)
tl = timeline.Timeline(step_stats)
ctf = tl.generate_chrome_trace_format()
self._validateTrace(ctf)

View File

@ -380,7 +380,8 @@ def device_path_to_device_name(device_dir):
path_items = os.path.basename(device_dir)[
len(METADATA_FILE_PREFIX) + len(DEVICE_TAG):].split(",")
return "/".join([
path_item.replace("_", ":", 1) for path_item in path_items])
path_item.replace("device_", "device:").replace("_", ":", 1)
for path_item in path_items])
class DebugTensorDatum(object):

View File

@ -237,11 +237,11 @@ class DebugDumpDirTest(test_util.TensorFlowTestCase):
gpu_0_dir = os.path.join(
self._dump_root,
debug_data.METADATA_FILE_PREFIX + debug_data.DEVICE_TAG +
",job_localhost,replica_0,task_0,gpu_0")
",job_localhost,replica_0,task_0,device_GPU_0")
gpu_1_dir = os.path.join(
self._dump_root,
debug_data.METADATA_FILE_PREFIX + debug_data.DEVICE_TAG +
",job_localhost,replica_0,task_0,gpu_1")
",job_localhost,replica_0,task_0,device_GPU_1")
os.makedirs(cpu_0_dir)
os.makedirs(gpu_0_dir)
os.makedirs(gpu_1_dir)
@ -281,12 +281,12 @@ class DebugDumpDirTest(test_util.TensorFlowTestCase):
node = graph_gpu_0.node.add()
node.name = "node_foo_1"
node.op = "FooOp"
node.device = "/job:localhost/replica:0/task:0/gpu:0"
node.device = "/job:localhost/replica:0/task:0/device:GPU:0"
graph_gpu_1 = graph_pb2.GraphDef()
node = graph_gpu_1.node.add()
node.name = "node_foo_1"
node.op = "FooOp"
node.device = "/job:localhost/replica:0/task:0/gpu:1"
node.device = "/job:localhost/replica:0/task:0/device:GPU:1"
dump_dir = debug_data.DebugDumpDir(
self._dump_root,
@ -294,14 +294,14 @@ class DebugDumpDirTest(test_util.TensorFlowTestCase):
self.assertItemsEqual(
["/job:localhost/replica:0/task:0/cpu:0",
"/job:localhost/replica:0/task:0/gpu:0",
"/job:localhost/replica:0/task:0/gpu:1"], dump_dir.devices())
"/job:localhost/replica:0/task:0/device:GPU:0",
"/job:localhost/replica:0/task:0/device:GPU:1"], dump_dir.devices())
self.assertEqual(1472563253536385, dump_dir.t0)
self.assertEqual(3, dump_dir.size)
with self.assertRaisesRegexp(
ValueError, r"Invalid device name: "):
dump_dir.nodes("/job:localhost/replica:0/task:0/gpu:2")
dump_dir.nodes("/job:localhost/replica:0/task:0/device:GPU:2")
self.assertItemsEqual(["node_foo_1", "node_foo_1", "node_foo_1"],
dump_dir.nodes())
self.assertItemsEqual(
@ -319,16 +319,16 @@ class DebugDumpDirTest(test_util.TensorFlowTestCase):
node = graph_gpu_0.node.add()
node.name = "node_foo_1"
node.op = "FooOp"
node.device = "/job:localhost/replica:0/task:0/gpu:0"
node.device = "/job:localhost/replica:0/task:0/device:GPU:0"
graph_gpu_1 = graph_pb2.GraphDef()
node = graph_gpu_1.node.add()
node.name = "node_foo_1"
node.op = "FooOp"
node.device = "/job:localhost/replica:0/task:0/gpu:1"
node.device = "/job:localhost/replica:0/task:0/device:GPU:1"
node = graph_gpu_1.node.add() # Here is the duplicate.
node.name = "node_foo_1"
node.op = "FooOp"
node.device = "/job:localhost/replica:0/task:0/gpu:1"
node.device = "/job:localhost/replica:0/task:0/device:GPU:1"
with self.assertRaisesRegexp(
ValueError, r"Duplicate node name on device "):

View File

@ -711,7 +711,7 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
# Test node name list lookup of the DebugDumpDir object.
if test_util.gpu_device_name():
node_names = dump.nodes(
device_name="/job:localhost/replica:0/task:0/gpu:0")
device_name="/job:localhost/replica:0/task:0/device:GPU:0")
else:
node_names = dump.nodes()
self.assertTrue(u_name in node_names)

View File

@ -402,7 +402,7 @@ class LocalCLIDebugWrapperSessionTest(test_util.TensorFlowTestCase):
def testRuntimeErrorBeforeGraphExecutionIsRaised(self):
# Use an impossible device name to cause an error before graph execution.
with ops.device("/gpu:1337"):
with ops.device("/device:GPU:1337"):
w = variables.Variable([1.0] * 10, name="w")
wrapped_sess = LocalCLIDebuggerWrapperSessionForTest(

View File

@ -79,17 +79,17 @@ class DeviceTest(test_util.TensorFlowTestCase):
self.assertEquals("/replica:1/task:0/device:CPU:0", d.to_string())
d.parse_from_string("/replica:1/task:0/device:CPU:0")
self.assertEquals("/replica:1/task:0/device:CPU:0", d.to_string())
d.parse_from_string("/job:muu/gpu:2")
d.parse_from_string("/job:muu/device:GPU:2")
self.assertEquals("/job:muu/device:GPU:2", d.to_string())
with self.assertRaises(Exception) as e:
d.parse_from_string("/job:muu/gpu:2/cpu:0")
d.parse_from_string("/job:muu/device:GPU:2/cpu:0")
self.assertTrue("Cannot specify multiple device" in str(e.exception))
def testFromString(self):
d = device.DeviceSpec.from_string("/job:foo/replica:0")
self.assertEquals("/job:foo/replica:0", d.to_string())
with self.assertRaises(Exception) as e:
d = device.DeviceSpec.from_string("/job:muu/gpu:2/cpu:0")
d = device.DeviceSpec.from_string("/job:muu/device:GPU:2/cpu:0")
self.assertTrue("Cannot specify multiple device" in str(e.exception))
d = device.DeviceSpec.from_string("/job:foo/replica:0/task:3/cpu:*")
@ -102,13 +102,13 @@ class DeviceTest(test_util.TensorFlowTestCase):
def testMerge(self):
d = device.DeviceSpec.from_string("/job:foo/replica:0")
self.assertEquals("/job:foo/replica:0", d.to_string())
d.merge_from(device.DeviceSpec.from_string("/task:1/gpu:2"))
d.merge_from(device.DeviceSpec.from_string("/task:1/device:GPU:2"))
self.assertEquals("/job:foo/replica:0/task:1/device:GPU:2", d.to_string())
d = device.DeviceSpec()
d.merge_from(device.DeviceSpec.from_string("/task:1/cpu:0"))
self.assertEquals("/task:1/device:CPU:0", d.to_string())
d.merge_from(device.DeviceSpec.from_string("/job:boo/gpu:0"))
d.merge_from(device.DeviceSpec.from_string("/job:boo/device:GPU:0"))
self.assertEquals("/job:boo/task:1/device:GPU:0", d.to_string())
d.merge_from(device.DeviceSpec.from_string("/job:muu/cpu:2"))
self.assertEquals("/job:muu/task:1/device:CPU:2", d.to_string())
@ -134,10 +134,10 @@ class DeviceTest(test_util.TensorFlowTestCase):
self.assertEqual("/job:foo/replica:0/task:0/device:GPU:0",
device.canonical_name(
"/job:foo/replica:0/task:0/gpu:0"))
"/job:foo/replica:0/task:0/device:GPU:0"))
self.assertEqual("/job:foo/replica:0/task:0/device:GPU:0",
device.canonical_name(
"/gpu:0/task:0/replica:0/job:foo"))
"/device:GPU:0/task:0/replica:0/job:foo"))
def testCheckValid(self):
device.check_valid("/job:foo/replica:0")
@ -155,7 +155,7 @@ class DeviceTest(test_util.TensorFlowTestCase):
self.assertTrue("Unknown attribute: 'bar'" in str(e.exception))
with self.assertRaises(Exception) as e:
device.check_valid("/cpu:0/gpu:2")
device.check_valid("/cpu:0/device:GPU:2")
self.assertTrue("Cannot specify multiple device" in str(e.exception))

View File

@ -505,7 +505,7 @@ class FunctionTest(test.TestCase):
_ = PlusOne(1, name="p1")
with self.assertRaisesRegexp(ValueError, "Unknown keyword arguments"):
_ = PlusOne(1, device="/gpu:0")
_ = PlusOne(1, device="/device:GPU:0")
def testFunctionDecorator(self):

View File

@ -106,9 +106,9 @@ class DeviceFunctionsTest(test.TestCase):
var_0 = variables.Variable(0)
with ops.device(test_device_func_pin_variable_to_cpu):
var_1 = variables.Variable(1)
with ops.device(lambda op: "/gpu:0"):
with ops.device(lambda op: "/device:GPU:0"):
var_2 = variables.Variable(2)
with ops.device("/gpu:0"): # Implicit merging device function.
with ops.device("/device:GPU:0"): # Implicit merging device function.
var_3 = variables.Variable(3)
self.assertDeviceEqual(var_0.device, None)

View File

@ -878,7 +878,7 @@ class ImportGraphDefTest(test.TestCase):
self.assertEqual(c.device, c4.device) # worker overrides ps.
with ops.Graph().as_default():
with ops.device(device.merge_device("/gpu:0")):
with ops.device(device.merge_device("/device:GPU:0")):
a5, b5, c5 = importer.import_graph_def(
gdef, return_elements=["a", "b", "c"])
self.assertEqual("/device:GPU:0", a5.device)

View File

@ -550,7 +550,7 @@ class ScopedMetaGraphTest(test.TestCase):
a = variables.Variable(
constant_op.constant(
1.0, shape=[2, 2]), name="a")
with ops.device("/job:ps/replica:0/task:0/gpu:0"):
with ops.device("/job:ps/replica:0/task:0/device:GPU:0"):
b = variables.Variable(
constant_op.constant(
2.0, shape=[2, 2]), name="b")

View File

@ -3342,7 +3342,7 @@ class Graph(object):
For example:
```python
with g.device('/gpu:0'):
with g.device('/device:GPU:0'):
# All operations constructed in this context will be placed
# on GPU 0.
with g.device(None):
@ -3352,7 +3352,7 @@ class Graph(object):
# Defines a function from `Operation` to device string.
def matmul_on_gpu(n):
if n.type == "MatMul":
return "/gpu:0"
return "/device:GPU:0"
else:
return "/cpu:0"

View File

@ -1555,26 +1555,26 @@ class ColocationGroupTest(test_util.TensorFlowTestCase):
def testColocationDeviceInteraction(self):
with ops.device("/cpu:0"):
with ops.device("/gpu:0"):
with ops.device("/device:GPU:0"):
a = constant_op.constant([2.0], name="a")
with ops.colocate_with(a.op):
# 'b' is created in the scope of /cpu:0, but it is
# colocated with 'a', which is on '/gpu:0'. colocate_with
# colocated with 'a', which is on '/device:GPU:0'. colocate_with
# overrides devices because it is a stronger constraint.
b = constant_op.constant(3.0)
self.assertEqual([b"loc:@a"], b.op.colocation_groups())
self.assertEqual(a.op.device, b.op.device)
def testColocationCanonicalization(self):
with ops.device("/gpu:0"):
with ops.device("/device:GPU:0"):
_ = constant_op.constant(2.0)
with ops.device(lambda op: "/gpu:0"):
with ops.device(lambda op: "/device:GPU:0"):
b = constant_op.constant(3.0)
with ops.get_default_graph().colocate_with(b):
with ops.device("/gpu:0"):
with ops.device("/device:GPU:0"):
c = constant_op.constant(4.0)
# A's device will be /gpu:0
# A's device will be /device:GPU:0
# B's device will be /device:GPU:0
# C's device will be /device:GPU:0 because it
# inherits B's device name, after canonicalizing the names.
@ -1582,10 +1582,10 @@ class ColocationGroupTest(test_util.TensorFlowTestCase):
def testLocationOverrides(self):
with ops.device("/cpu:0"):
with ops.device("/gpu:0"):
with ops.device("/device:GPU:0"):
a = constant_op.constant([2.0], name="a")
# Note that this colocation is "redundant", since we are
# within the scope of "/gpu:0". However, we would like to
# within the scope of "/device:GPU:0". However, we would like to
# preserve in the GraphDef that these two ops should be
# colocated in a portable way.
with ops.colocate_with(a.op):
@ -1652,7 +1652,7 @@ class ColocationGroupTest(test_util.TensorFlowTestCase):
self.assertEqual([b"loc:@a"], b.op.colocation_groups())
def testInconsistentDeviceWithinColocate(self):
with ops.device("/gpu:0"):
with ops.device("/device:GPU:0"):
a = constant_op.constant([2.0], name="a")
with ops.colocate_with(a.op):
# This is allowed due to legacy but clearly wrong, since we

View File

@ -405,7 +405,7 @@ class TensorFlowTestCase(googletest.TestCase):
trigger the creation of a new session.
Use the `use_gpu` and `force_gpu` options to control where ops are run. If
`force_gpu` is True, all ops are pinned to `/gpu:0`. Otherwise, if `use_gpu`
`force_gpu` is True, all ops are pinned to `/device:GPU:0`. Otherwise, if `use_gpu`
is True, TensorFlow tries to run as many ops on the GPU as possible. If both
`force_gpu and `use_gpu` are False, all ops are pinned to the CPU.
@ -427,7 +427,7 @@ class TensorFlowTestCase(googletest.TestCase):
config: An optional config_pb2.ConfigProto to use to configure the
session.
use_gpu: If True, attempt to run as many ops as possible on GPU.
force_gpu: If True, pin all ops to `/gpu:0`.
force_gpu: If True, pin all ops to `/device:GPU:0`.
Returns:
A Session object that should be used as a context manager to surround
@ -466,11 +466,11 @@ class TensorFlowTestCase(googletest.TestCase):
sess = self._cached_session
with sess.graph.as_default(), sess.as_default():
if force_gpu:
# Use the name of an actual device if one is detected, or '/gpu:0'
# Use the name of an actual device if one is detected, or '/device:GPU:0'
# otherwise
gpu_name = gpu_device_name()
if not gpu_name:
gpu_name = "/gpu:0"
gpu_name = "/device:GPU:0"
with sess.graph.device(gpu_name):
yield sess
elif use_gpu:
@ -481,11 +481,11 @@ class TensorFlowTestCase(googletest.TestCase):
else:
with session.Session(graph=graph, config=prepare_config(config)) as sess:
if force_gpu:
# Use the name of an actual device if one is detected, or '/gpu:0'
# Use the name of an actual device if one is detected, or '/device:GPU:0'
# otherwise
gpu_name = gpu_device_name()
if not gpu_name:
gpu_name = "/gpu:0"
gpu_name = "/device:GPU:0"
with sess.graph.device(gpu_name):
yield sess
elif use_gpu:

View File

@ -238,7 +238,7 @@ class GpuMultiSessionMemoryTest(test_util.TensorFlowTestCase):
n_iterations = 500
with session as s:
data = variables.Variable(1.0)
with ops.device('/gpu:0'):
with ops.device('/device:GPU:0'):
random_seed.set_random_seed(1)
matrix1 = variables.Variable(
random_ops.truncated_normal([1024, 1]), name='matrix1')

View File

@ -311,7 +311,7 @@ class CholeskyBenchmark(test.Benchmark):
if test.is_gpu_available(True):
with ops.Graph().as_default(), \
session.Session() as sess, \
ops.device("/gpu:0"):
ops.device("/device:GPU:0"):
l = linalg_ops.cholesky(data)
self.run_op_benchmark(
sess,
@ -338,11 +338,11 @@ class CholeskyBenchmark(test.Benchmark):
if test.is_gpu_available(True):
_BenchmarkGrad(
MatrixInverseCompositeGrad, "composite_matrix_inverse", "/gpu:0")
MatrixInverseCompositeGrad, "composite_matrix_inverse", "/device:GPU:0")
_BenchmarkGrad(
TriAngInvCompositeGrad, "composite_tri_ang_inverse", "/gpu:0")
TriAngInvCompositeGrad, "composite_tri_ang_inverse", "/device:GPU:0")
_BenchmarkGrad(
TriAngSolveCompositeGrad, "composite_triangular_solve", "/gpu:0")
TriAngSolveCompositeGrad, "composite_triangular_solve", "/device:GPU:0")
_BenchmarkGrad(
MatrixInverseCompositeGrad, "composite_matrix_inverse", "/cpu:0")

View File

@ -1423,9 +1423,8 @@ class ControlFlowTest(test.TestCase):
self.assertEqual(45, rx.eval())
def _testWhileGrad_ColocateGradients(self, colocate):
gpu_dev_name = test.gpu_device_name().lower() if test.is_gpu_available(
) else "/gpu:0"
gpu_short_name = gpu_dev_name.split("/")[-1]
gpu_dev_name = test.gpu_device_name() if test.is_gpu_available(
) else "/device:GPU:0"
with self.test_session(graph=ops.Graph()) as sess:
v = constant_op.constant(2.0, name="v")
@ -1439,19 +1438,19 @@ class ControlFlowTest(test.TestCase):
r = gradients_impl.gradients(
loop, v, colocate_gradients_with_ops=colocate)[0]
r_ops = r.graph.get_operations()
r_devices = [(op.name, op.device.lower()) for op in r_ops]
r_devices = [(op.name, op.device) for op in r_ops]
self.assertTrue(any("Square" in op.name for op in r_ops))
for (name, dev) in r_devices:
if not colocate and name.endswith("Square"):
# Only forward graph contain gpu in Square device
self.assertTrue(gpu_short_name in dev)
self.assertTrue(gpu_dev_name in dev)
elif colocate and "Square" in name:
# Forward and backward graphs contain gpu in Square/Square_grad devices
self.assertTrue(gpu_short_name in dev)
self.assertTrue(gpu_dev_name in dev)
else:
self.assertFalse(gpu_short_name in dev)
self.assertFalse(gpu_dev_name in dev)
self.assertAllClose(1024.0, sess.run(r))
def testWhileGrad_ColocateGradients(self):
@ -2426,7 +2425,7 @@ class ControlFlowTest(test.TestCase):
# device set on tensor, default device on graph => default device on dep.
vdef = variables.Variable([0.0], name="vdef")
with ops.device("/job:worker/gpu:1"):
with ops.device("/job:worker/device:GPU:1"):
with_vdef_dep = control_flow_ops.with_dependencies([vdef.initializer],
vdef)
# The device is empty, but the colocation constraint is set.

View File

@ -347,7 +347,7 @@ def sparse_tensor_dense_vs_dense_matmul_benchmark(thresh,
ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_dense(
x_t, y_t, adjoint_a, adjoint_b)
else:
with ops.device("/gpu:0"):
with ops.device("/device:GPU:0"):
x_t = constant_op.constant(x)
y_t = constant_op.constant(y)
ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_dense(
@ -365,7 +365,7 @@ def sparse_tensor_dense_vs_dense_matmul_benchmark(thresh,
ops_fn = _sparse_tensor_dense_vs_dense_matmul_benchmark_sparse(
x_ind, x_val, x_shape, y_t, adjoint_a, adjoint_b)
else:
with ops.device("/gpu:0"):
with ops.device("/device:GPU:0"):
x_ind = constant_op.constant(np.vstack(np.where(x)).astype(np.int64).T)
x_val = constant_op.constant(x[np.where(x)])
x_shape = constant_op.constant(np.array(x.shape).astype(np.int64))

View File

@ -722,7 +722,7 @@ class VariableScopeTest(test.TestCase):
def device_func(op):
if op.type in ["Variable", "VariableV2", "VarHandleOp"]:
varname_type.append((op.name, op.get_attr("dtype")))
return "/gpu:0"
return "/device:GPU:0"
with g.as_default():
with ops.device(device_func):

View File

@ -163,20 +163,20 @@ class GradientsTest(test_util.TensorFlowTestCase):
with ops.Graph().as_default() as g:
w = constant(1.0, shape=[1, 1])
x = constant(1.0, shape=[1, 2])
with g.device("/gpu:0"):
with g.device("/device:GPU:0"):
wx = math_ops.matmul(w, x)
gw = gradients.gradients(wx, [w], colocate_gradients_with_ops=True)[0]
self.assertEqual(gw.op.colocation_groups(), wx.op.colocation_groups())
def testColocateGradientsWithAggregation(self):
with ops.Graph().as_default() as g:
with g.device("/gpu:1"):
with g.device("/device:GPU:1"):
w = constant(1.0, shape=[1, 1])
x = constant(1.0, shape=[1, 2])
y = constant(1.0, shape=[1, 2])
wx = math_ops.matmul(w, x)
wy = math_ops.matmul(w, y)
with g.device("/gpu:0"):
with g.device("/device:GPU:0"):
z = wx + wy
gw1 = gradients.gradients(z, [w], colocate_gradients_with_ops=True)[0]
@ -187,7 +187,7 @@ class GradientsTest(test_util.TensorFlowTestCase):
def testColocateGradientsWithAggregationInMultipleDevices(self):
with ops.Graph().as_default() as g:
with g.device("/gpu:1"):
with g.device("/device:GPU:1"):
w = constant(1.0, shape=[1, 1])
x = constant(1.0, shape=[1, 2])
y = constant(1.0, shape=[1, 2])
@ -195,7 +195,7 @@ class GradientsTest(test_util.TensorFlowTestCase):
wx = math_ops.matmul(w, x)
with g.device("/task:2"):
wy = math_ops.matmul(w, y)
with g.device("/gpu:0"):
with g.device("/device:GPU:0"):
z = wx + wy
gw1 = gradients.gradients(z, [w], colocate_gradients_with_ops=True)[0]

View File

@ -47,7 +47,7 @@ def build_graph(device, n, m, k, transpose_a, transpose_b, dtype):
Returns:
A matmul operation to run()
"""
with ops.device('/%s:0' % device):
with ops.device('%s' % device):
if not transpose_a:
x = variables.Variable(random_ops.random_uniform([n, m], dtype=dtype))
else:
@ -112,7 +112,7 @@ class MatmulBenchmark(test.Benchmark):
return duration
def run_test_gpu(self, n, m, k, transpose_a, transpose_b, dtype, num_iters):
self.run_graph('gpu', n, m, k, transpose_a, transpose_b, num_iters, dtype)
self.run_graph(test.gpu_device_name(), n, m, k, transpose_a, transpose_b, num_iters, dtype)
def test_round(self, num_iters):
dtypes = [np.float32, np.float64]

View File

@ -71,37 +71,39 @@ class MatmulBenchmarkTest(googletest.TestCase):
def _VerifyBuildGraph(self, n, m, k, transpose_a, transpose_b, dtype):
graph = ops.Graph()
with graph.as_default():
matmul_benchmark.build_graph("gpu", n, m, k, transpose_a, transpose_b,
matmul_benchmark.build_graph(googletest.gpu_device_name(), n, m, k, transpose_a, transpose_b,
dtype)
gd = graph.as_graph_def()
self.assertProtoEquals("""
node { name: "random_uniform/shape" op: "Const" device: "/device:GPU:0" }
node { name: "random_uniform/min" op: "Const" device: "/device:GPU:0" }
node { name: "random_uniform/max" op: "Const" device: "/device:GPU:0" }
node { name: "random_uniform/RandomUniform" op: "RandomUniform" input: "random_uniform/shape" device: "/device:GPU:0" }
node { name: "random_uniform/sub" op: "Sub" input: "random_uniform/max" input: "random_uniform/min" device: "/device:GPU:0" }
node { name: "random_uniform/mul" op: "Mul" input: "random_uniform/RandomUniform" input: "random_uniform/sub" device: "/device:GPU:0" }
node { name: "random_uniform" op: "Add" input: "random_uniform/mul" input: "random_uniform/min" device: "/device:GPU:0" }
node { name: "Variable" op: "VariableV2" device: "/device:GPU:0" }
node { name: "Variable/Assign" op: "Assign" input: "Variable" input: "random_uniform" device: "/device:GPU:0" }
node { name: "Variable/read" op: "Identity" input: "Variable" device: "/device:GPU:0" }
node { name: "random_uniform_1/shape" op: "Const" device: "/device:GPU:0" }
node { name: "random_uniform_1/min" op: "Const" device: "/device:GPU:0" }
node { name: "random_uniform_1/max" op: "Const" device: "/device:GPU:0" }
node { name: "random_uniform_1/RandomUniform" op: "RandomUniform" input: "random_uniform_1/shape" device: "/device:GPU:0" }
node { name: "random_uniform_1/sub" op: "Sub" input: "random_uniform_1/max" input: "random_uniform_1/min" device: "/device:GPU:0" }
node { name: "random_uniform_1/mul" op: "Mul" input: "random_uniform_1/RandomUniform" input: "random_uniform_1/sub" device: "/device:GPU:0" }
node { name: "random_uniform_1" op: "Add" input: "random_uniform_1/mul" input: "random_uniform_1/min" device: "/device:GPU:0" }
node { name: "Variable_1" op: "VariableV2" device: "/device:GPU:0" }
node { name: "Variable_1/Assign" op: "Assign" input: "Variable_1" input: "random_uniform_1" device: "/device:GPU:0" }
node { name: "Variable_1/read" op: "Identity" input: "Variable_1" device: "/device:GPU:0" }
node { name: "MatMul" op: "MatMul" input: "Variable/read" input: "Variable_1/read" device: "/device:GPU:0" }
node { name: "group_deps" op: "NoOp" input: "^MatMul" device: "/device:GPU:0" }
""", self._StripGraph(gd))
dev=googletest.gpu_device_name()
proto_expected = """
node { name: "random_uniform/shape" op: "Const" device: \""""+ dev +"""\" }
node { name: "random_uniform/min" op: "Const" device: \""""+ dev +"""\" }
node { name: "random_uniform/max" op: "Const" device: \""""+ dev +"""\" }
node { name: "random_uniform/RandomUniform" op: "RandomUniform" input: "random_uniform/shape" device: \""""+ dev +"""\" }
node { name: "random_uniform/sub" op: "Sub" input: "random_uniform/max" input: "random_uniform/min" device: \""""+ dev +"""\" }
node { name: "random_uniform/mul" op: "Mul" input: "random_uniform/RandomUniform" input: "random_uniform/sub" device: \""""+ dev +"""\" }
node { name: "random_uniform" op: "Add" input: "random_uniform/mul" input: "random_uniform/min" device: \""""+ dev +"""\" }
node { name: "Variable" op: "VariableV2" device: \""""+ dev +"""\" }
node { name: "Variable/Assign" op: "Assign" input: "Variable" input: "random_uniform" device: \""""+ dev +"""\" }
node { name: "Variable/read" op: "Identity" input: "Variable" device: \""""+ dev +"""\" }
node { name: "random_uniform_1/shape" op: "Const" device: \""""+ dev +"""\" }
node { name: "random_uniform_1/min" op: "Const" device: \""""+ dev +"""\" }
node { name: "random_uniform_1/max" op: "Const" device: \""""+ dev +"""\" }
node { name: "random_uniform_1/RandomUniform" op: "RandomUniform" input: "random_uniform_1/shape" device: \""""+ dev +"""\" }
node { name: "random_uniform_1/sub" op: "Sub" input: "random_uniform_1/max" input: "random_uniform_1/min" device: \""""+ dev +"""\" }
node { name: "random_uniform_1/mul" op: "Mul" input: "random_uniform_1/RandomUniform" input: "random_uniform_1/sub" device: \""""+ dev +"""\" }
node { name: "random_uniform_1" op: "Add" input: "random_uniform_1/mul" input: "random_uniform_1/min" device: \""""+ dev +"""\" }
node { name: "Variable_1" op: "VariableV2" device: \""""+ dev +"""\" }
node { name: "Variable_1/Assign" op: "Assign" input: "Variable_1" input: "random_uniform_1" device: \""""+ dev +"""\" }
node { name: "Variable_1/read" op: "Identity" input: "Variable_1" device: \""""+ dev +"""\" }
node { name: "MatMul" op: "MatMul" input: "Variable/read" input: "Variable_1/read" device: \""""+ dev +"""\" }
node { name: "group_deps" op: "NoOp" input: "^MatMul" device: \""""+ dev +"""\" }
"""
self.assertProtoEquals(str(proto_expected), self._StripGraph(gd))
def _VerifyRunGraph(self, n, m, k, transpose_a, transpose_b, dtype):
benchmark_instance = matmul_benchmark.MatmulBenchmark()
duration = benchmark_instance.run_graph("gpu", n, m, k, transpose_a,
duration = benchmark_instance.run_graph(googletest.gpu_device_name(), n, m, k, transpose_a,
transpose_b, 1, dtype)
self.assertTrue(duration > 1e-6)

View File

@ -97,21 +97,22 @@ class RunMetadataTest(test.TestCase):
if not test.is_gpu_available(cuda_only=True):
return
gpu_dev = test.gpu_device_name()
ops.reset_default_graph()
with ops.device('/gpu:0'):
with ops.device(gpu_dev):
tfprof_node, run_meta = _run_model()
self.assertEqual(tfprof_node.children[0].name, 'MatMul')
self.assertGreater(tfprof_node.children[0].exec_micros, 10)
ret = _extract_node(run_meta, ['MatMul', 'MatMul:MatMul'])
self.assertEqual(len(ret), 3)
self.assertTrue('/job:localhost/replica:0/task:0/gpu:0' in ret)
del ret['/job:localhost/replica:0/task:0/gpu:0']
self.assertTrue('/job:localhost/replica:0/task:0' + gpu_dev in ret)
del ret['/job:localhost/replica:0/task:0' + gpu_dev]
has_all_stream = False
for k, _ in six.iteritems(ret):
self.assertTrue('gpu:0/stream' in k)
if 'gpu:0/stream:all' in k:
self.assertTrue(gpu_dev + '/stream' in k)
if gpu_dev + '/stream:all' in k:
has_all_stream = True
self.assertTrue(has_all_stream)
@ -159,24 +160,24 @@ class RunMetadataTest(test.TestCase):
return
ops.reset_default_graph()
with ops.device('/gpu:0'):
with ops.device('/device:GPU:0'):
tfprof_node, run_meta = _run_loop_model()
# The while-loop caused a node to appear 4 times in scheduling.
ret = _extract_node(run_meta,
'rnn/while/rnn/basic_rnn_cell/basic_rnn_cell/MatMul')
self.assertEqual(len(ret['/job:localhost/replica:0/task:0/gpu:0']), 4)
self.assertEqual(len(ret['/job:localhost/replica:0/task:0/device:GPU:0']), 4)
total_cpu_execs = 0
for node in ret['/job:localhost/replica:0/task:0/gpu:0']:
for node in ret['/job:localhost/replica:0/task:0/device:GPU:0']:
total_cpu_execs += node.op_end_rel_micros
ret = _extract_node(
run_meta,
'rnn/while/rnn/basic_rnn_cell/basic_rnn_cell/MatMul:MatMul')
self.assertGreaterEqual(len(ret['/gpu:0/stream:all']), 4)
self.assertGreaterEqual(len(ret['/device:GPU:0/stream:all']), 4)
total_accelerator_execs = 0
for node in ret['/gpu:0/stream:all']:
for node in ret['/device:GPU:0/stream:all']:
total_accelerator_execs += node.op_end_rel_micros
mm_node = lib.SearchTFProfNode(

View File

@ -315,7 +315,7 @@ class ProfileOptionBuilder(object):
"""Selectively counting statistics based on node types.
Here, 'types' means the profiler nodes' properties. Profiler by default
consider device name (e.g. /job:xx/.../gpu:0) and operation type
consider device name (e.g. /job:xx/.../device:GPU:0) and operation type
(e.g. MatMul) as profiler nodes' properties. User can also associate
customized 'types' to profiler nodes through OpLogProto proto.

View File

@ -50,7 +50,7 @@ class RemoveDeviceTest : public ::testing::Test {
add_node2->set_op("Add");
add_node2->add_input("const_node1");
add_node2->add_input("const_node2");
add_node2->set_device("//gpu:1");
add_node2->set_device("//device:GPU:1");
NodeDef* add_node3 = graph_def.add_node();
add_node3->set_name("add_node3");