Removing device-related code that doesn't seem to be used. Usage was removed by the following commits: 223c8bdf89
, 04c23099c2 (diff-3780f0ef44936240abc76c4c42541532)
.
PiperOrigin-RevId: 314178530 Change-Id: I7a2502d691610a6cd44a9752e9f48e4798071f13
This commit is contained in:
parent
bbe22cf4dd
commit
6f22fa9376
@ -121,7 +121,6 @@ filegroup(
|
||||
"gpu_managed_allocator.h",
|
||||
"gpu_mem_allocator.h",
|
||||
"gpu_process_state.h",
|
||||
"gpu_stream_util.h",
|
||||
"gpu_util.h",
|
||||
"//tensorflow/core/common_runtime:gpu_runtime_headers",
|
||||
],
|
||||
@ -137,7 +136,6 @@ tf_cuda_library(
|
||||
"gpu_device_factory.cc",
|
||||
"gpu_managed_allocator.cc",
|
||||
"gpu_process_state.cc",
|
||||
"gpu_stream_util.cc",
|
||||
"gpu_util.cc",
|
||||
"gpu_util_platform_specific.cc",
|
||||
],
|
||||
@ -400,29 +398,3 @@ tf_cc_test_gpu(
|
||||
"//tensorflow/core/kernels:ops_util",
|
||||
],
|
||||
)
|
||||
|
||||
tf_cc_test_gpu(
|
||||
name = "gpu_stream_util_test",
|
||||
size = "small",
|
||||
srcs = ["gpu_stream_util_test.cc"],
|
||||
linkstatic = tf_kernel_tests_linkstatic(),
|
||||
tags = tf_cuda_tests_tags() + ["nomac"],
|
||||
deps = [
|
||||
":gpu_runtime",
|
||||
"//tensorflow/cc:cc_ops",
|
||||
"//tensorflow/cc:sendrecv_ops",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:framework_internal",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:lib_internal",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
"//tensorflow/core:testlib",
|
||||
"//tensorflow/core/common_runtime:core_cpu",
|
||||
"//tensorflow/core/common_runtime:core_cpu_internal",
|
||||
"//tensorflow/core/common_runtime:direct_session_internal",
|
||||
"//tensorflow/core/kernels:matmul_op",
|
||||
"//tensorflow/core/kernels:ops_util",
|
||||
],
|
||||
)
|
||||
|
@ -42,7 +42,6 @@ limitations under the License.
|
||||
#include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h"
|
||||
#include "tensorflow/core/common_runtime/gpu/gpu_init.h"
|
||||
#include "tensorflow/core/common_runtime/gpu/gpu_process_state.h"
|
||||
#include "tensorflow/core/common_runtime/gpu/gpu_stream_util.h"
|
||||
#include "tensorflow/core/common_runtime/gpu/gpu_util.h"
|
||||
#include "tensorflow/core/common_runtime/gpu_device_context.h"
|
||||
#include "tensorflow/core/common_runtime/local_device.h"
|
||||
|
@ -1,112 +0,0 @@
|
||||
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/common_runtime/gpu/gpu_stream_util.h"
|
||||
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/core/graph/algorithm.h"
|
||||
#include "tensorflow/core/lib/core/errors.h"
|
||||
#include "tensorflow/core/lib/strings/strcat.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace gpu_stream_util {
|
||||
|
||||
Status AssignStreams(const Graph* graph, const AssignStreamsOpts& opts,
|
||||
std::unordered_map<int, int>* node_to_stream_id) {
|
||||
VLOG(1) << "AssignStreams";
|
||||
Status status;
|
||||
|
||||
// Sanity check arguments.
|
||||
if (graph == nullptr)
|
||||
status.Update(errors::InvalidArgument("Bad graph argument supplied."));
|
||||
if (node_to_stream_id == nullptr) {
|
||||
status.Update(
|
||||
errors::InvalidArgument("Bad node_to_stream_id argument supplied."));
|
||||
}
|
||||
if ((opts.max_streams < 1) || (opts.send_stream >= opts.max_streams) ||
|
||||
(opts.recv_stream >= opts.max_streams) ||
|
||||
(opts.const_stream >= opts.max_streams) ||
|
||||
(opts.compute_stream >= opts.max_streams)) {
|
||||
status.Update(errors::InvalidArgument("Bad graph argument supplied."));
|
||||
}
|
||||
TF_RETURN_IF_ERROR(status);
|
||||
|
||||
// Topologically sort the nodes.
|
||||
std::vector<Node*> order;
|
||||
GetReversePostOrder(*graph, &order);
|
||||
if (VLOG_IS_ON(2)) {
|
||||
for (Node* n : order) {
|
||||
const int node_id = n->id();
|
||||
VLOG(2) << "Node " << node_id << " " << n->type_string() << " "
|
||||
<< n->name() << " " << n->in_edges().size() << " inputs";
|
||||
for (const Edge* e : n->in_edges()) {
|
||||
VLOG(2) << " Edge from " << e->src()->id() << " " << e->src()->name()
|
||||
<< " fanout " << e->src()->out_edges().size();
|
||||
}
|
||||
}
|
||||
}
|
||||
// We perform stream assignment assuming a large number of
|
||||
// stream IDs and then map these down to the required number of streams
|
||||
// using simple round-robin.
|
||||
// Stream Assignment strategy:
|
||||
// 1. Nodes with zero inputs are always be executed on a
|
||||
// fresh stream.
|
||||
// 2. Try to execute a node on the same stream as one of its
|
||||
// inputs to avoid inter-stream dependencies.
|
||||
// 3. If any input comes from a node with a large fanout then
|
||||
// perhaps an indication that it is shared between parallel
|
||||
// streams of work. We choose a new stream here so that all consumers
|
||||
// of the tensor are likely to run in parallel.
|
||||
int highest_stream_id = -1;
|
||||
for (Node* n : order) {
|
||||
VLOG(3) << "Inspecting node " << n->DebugString();
|
||||
const int node_id = n->id();
|
||||
const string& op = n->type_string();
|
||||
|
||||
// Determine a suitable stream to use.
|
||||
int stream_id = highest_stream_id + 1;
|
||||
for (const Edge* e : n->in_edges()) {
|
||||
const size_t fanout = e->src()->out_edges().size();
|
||||
if (fanout == 1) {
|
||||
stream_id = (*node_to_stream_id)[e->src()->id()];
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Override stream for specific op types.
|
||||
if (op == "_Send") {
|
||||
if (opts.send_stream >= 0) stream_id = opts.send_stream;
|
||||
} else if (op == "_Recv") {
|
||||
if (opts.recv_stream >= 0) stream_id = opts.recv_stream;
|
||||
} else if (op == "Const") {
|
||||
if (opts.const_stream >= 0) stream_id = opts.const_stream;
|
||||
} else {
|
||||
if (opts.compute_stream >= 0) stream_id = opts.compute_stream;
|
||||
}
|
||||
|
||||
(*node_to_stream_id)[node_id] = stream_id % opts.max_streams;
|
||||
highest_stream_id = std::max(stream_id, highest_stream_id);
|
||||
}
|
||||
VLOG(1) << "Identified " << highest_stream_id << " candidate streams for "
|
||||
<< order.size() << " nodes.";
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
} // namespace gpu_stream_util
|
||||
} // namespace tensorflow
|
@ -1,45 +0,0 @@
|
||||
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_STREAM_UTIL_H_
|
||||
#define TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_STREAM_UTIL_H_
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
#include "tensorflow/core/graph/graph.h"
|
||||
#include "tensorflow/core/lib/core/status.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace gpu_stream_util {
|
||||
|
||||
struct AssignStreamsOpts {
|
||||
int32 max_streams = 1;
|
||||
// The following options specify a stream to use for specific op
|
||||
// types. The value -1 allows ops to be assigned to any stream.
|
||||
int32 send_stream = -1;
|
||||
int32 recv_stream = -1;
|
||||
int32 const_stream = -1;
|
||||
int32 compute_stream = -1;
|
||||
};
|
||||
|
||||
// Given the input graph, assigns every node in the graph with a
|
||||
// stream_id that should be used.
|
||||
Status AssignStreams(const Graph* graph, const AssignStreamsOpts& opts,
|
||||
std::unordered_map<int, int>* node_to_stream_id);
|
||||
|
||||
} // namespace gpu_stream_util
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_STREAM_UTIL_H_
|
@ -1,148 +0,0 @@
|
||||
/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/common_runtime/gpu/gpu_stream_util.h"
|
||||
|
||||
#include "tensorflow/cc/ops/sendrecv_ops.h"
|
||||
#include "tensorflow/cc/ops/standard_ops.h"
|
||||
#include "tensorflow/core/framework/op.h"
|
||||
#include "tensorflow/core/framework/types.pb.h"
|
||||
#include "tensorflow/core/graph/graph_def_builder.h"
|
||||
#include "tensorflow/core/graph/node_builder.h"
|
||||
#include "tensorflow/core/kernels/ops_testutil.h"
|
||||
#include "tensorflow/core/kernels/ops_util.h"
|
||||
#include "tensorflow/core/lib/core/status_test_util.h"
|
||||
#include "tensorflow/core/platform/test.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace {
|
||||
|
||||
class GpuStreamUtilTest : public OpsTestBase {
|
||||
protected:
|
||||
};
|
||||
|
||||
TEST_F(GpuStreamUtilTest, BogusOpts) {
|
||||
auto root = Scope::NewRootScope().ExitOnError();
|
||||
Graph g(OpRegistry::Global());
|
||||
TF_ASSERT_OK(root.ToGraph(&g));
|
||||
std::unordered_map<int, int> node_to_stream_id;
|
||||
gpu_stream_util::AssignStreamsOpts opts;
|
||||
Status status;
|
||||
status = gpu_stream_util::AssignStreams(nullptr, opts, &node_to_stream_id);
|
||||
EXPECT_FALSE(status.ok());
|
||||
status = gpu_stream_util::AssignStreams(&g, opts, nullptr);
|
||||
EXPECT_FALSE(status.ok());
|
||||
opts.max_streams = 0;
|
||||
status = gpu_stream_util::AssignStreams(&g, opts, &node_to_stream_id);
|
||||
EXPECT_FALSE(status.ok());
|
||||
opts.max_streams = 1;
|
||||
opts.compute_stream = 5;
|
||||
status = gpu_stream_util::AssignStreams(&g, opts, &node_to_stream_id);
|
||||
EXPECT_FALSE(status.ok());
|
||||
}
|
||||
|
||||
TEST_F(GpuStreamUtilTest, EmptyGraph) {
|
||||
auto root = Scope::NewRootScope().ExitOnError();
|
||||
Graph g(OpRegistry::Global());
|
||||
TF_ASSERT_OK(root.ToGraph(&g));
|
||||
std::unordered_map<int, int> node_to_stream_id;
|
||||
gpu_stream_util::AssignStreamsOpts opts;
|
||||
TF_ASSERT_OK(gpu_stream_util::AssignStreams(&g, opts, &node_to_stream_id));
|
||||
EXPECT_EQ(2, node_to_stream_id.size()); // _SOURCE and _SINK
|
||||
}
|
||||
|
||||
TEST_F(GpuStreamUtilTest, SimpleGraphOneStream) {
|
||||
auto root = Scope::DisabledShapeInferenceScope().ExitOnError();
|
||||
ops::MatMul(root, {}, {});
|
||||
Graph g(OpRegistry::Global());
|
||||
TF_ASSERT_OK(root.ToGraph(&g));
|
||||
|
||||
std::unordered_map<int, int> node_to_stream_id;
|
||||
gpu_stream_util::AssignStreamsOpts opts;
|
||||
TF_ASSERT_OK(gpu_stream_util::AssignStreams(&g, opts, &node_to_stream_id));
|
||||
|
||||
// There should be 5 nodes assigned.
|
||||
EXPECT_EQ(5, node_to_stream_id.size());
|
||||
|
||||
// All of them should have stream 0.
|
||||
for (const auto& it : node_to_stream_id) {
|
||||
EXPECT_EQ(0, it.second);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(GpuStreamUtilTest, SimpleGraphManyStreams) {
|
||||
auto root = Scope::DisabledShapeInferenceScope().ExitOnError();
|
||||
ops::MatMul(root, {}, {});
|
||||
Graph g(OpRegistry::Global());
|
||||
TF_ASSERT_OK(root.ToGraph(&g));
|
||||
|
||||
std::unordered_map<int, int> node_to_stream_id;
|
||||
gpu_stream_util::AssignStreamsOpts opts;
|
||||
opts.max_streams = 3;
|
||||
TF_ASSERT_OK(gpu_stream_util::AssignStreams(&g, opts, &node_to_stream_id));
|
||||
|
||||
// There should be 5 nodes assigned.
|
||||
EXPECT_EQ(5, node_to_stream_id.size());
|
||||
|
||||
// All of them should have a stream in the range [0..max_streams).
|
||||
for (const auto& it : node_to_stream_id) {
|
||||
EXPECT_GE(it.second, 0);
|
||||
EXPECT_LT(it.second, opts.max_streams);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(GpuStreamUtilTest, StreamOverrides) {
|
||||
auto root = Scope::DisabledShapeInferenceScope().ExitOnError();
|
||||
ops::_Recv(root.WithOpName("input"), DT_FLOAT, "input", "/cpu:0", 0,
|
||||
"/device:GPU:0");
|
||||
Output n = ops::MatMul(root, {}, {});
|
||||
ops::_Send(root.WithOpName("output"), n, "output", "/device:GPU:0", 0,
|
||||
"/cpu:0");
|
||||
Graph g(OpRegistry::Global());
|
||||
TF_ASSERT_OK(root.ToGraph(&g));
|
||||
|
||||
// Perform stream assignment using a large number of streams, but with
|
||||
// op types constrained to specific streams.
|
||||
std::unordered_map<int, int> node_to_stream_id;
|
||||
gpu_stream_util::AssignStreamsOpts opts;
|
||||
opts.max_streams = 100;
|
||||
opts.const_stream = 90;
|
||||
opts.send_stream = 91;
|
||||
opts.recv_stream = 92;
|
||||
opts.compute_stream = 93;
|
||||
TF_ASSERT_OK(gpu_stream_util::AssignStreams(&g, opts, &node_to_stream_id));
|
||||
|
||||
// There should be 7 nodes assigned.
|
||||
EXPECT_EQ(7, node_to_stream_id.size()); // including _SOURCE and _SINK
|
||||
|
||||
// Nodes should be assigned to streams by op type.
|
||||
for (const auto& it : node_to_stream_id) {
|
||||
Node* n = g.FindNodeId(it.first);
|
||||
const string& op = n->type_string();
|
||||
const int stream = it.second;
|
||||
if (op == "Const") {
|
||||
EXPECT_EQ(stream, 90);
|
||||
} else if (op == "_Send") {
|
||||
EXPECT_EQ(stream, 91);
|
||||
} else if (op == "_Recv") {
|
||||
EXPECT_EQ(stream, 92);
|
||||
} else { // Compute.
|
||||
EXPECT_EQ(stream, 93);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace tensorflow
|
@ -119,9 +119,6 @@ class DeviceContext : public core::RefCounted {
|
||||
}
|
||||
};
|
||||
|
||||
// map[i] is the DeviceContext* for the node with id i, if i < map.size().
|
||||
typedef std::vector<DeviceContext*> DeviceContextMap;
|
||||
|
||||
class DeviceBase {
|
||||
public:
|
||||
explicit DeviceBase(Env* env) : env_(env) {}
|
||||
|
Loading…
Reference in New Issue
Block a user