diff --git a/tensorflow/core/common_runtime/gpu/BUILD b/tensorflow/core/common_runtime/gpu/BUILD index 07919117051..3744eb967d3 100644 --- a/tensorflow/core/common_runtime/gpu/BUILD +++ b/tensorflow/core/common_runtime/gpu/BUILD @@ -121,7 +121,6 @@ filegroup( "gpu_managed_allocator.h", "gpu_mem_allocator.h", "gpu_process_state.h", - "gpu_stream_util.h", "gpu_util.h", "//tensorflow/core/common_runtime:gpu_runtime_headers", ], @@ -137,7 +136,6 @@ tf_cuda_library( "gpu_device_factory.cc", "gpu_managed_allocator.cc", "gpu_process_state.cc", - "gpu_stream_util.cc", "gpu_util.cc", "gpu_util_platform_specific.cc", ], @@ -400,29 +398,3 @@ tf_cc_test_gpu( "//tensorflow/core/kernels:ops_util", ], ) - -tf_cc_test_gpu( - name = "gpu_stream_util_test", - size = "small", - srcs = ["gpu_stream_util_test.cc"], - linkstatic = tf_kernel_tests_linkstatic(), - tags = tf_cuda_tests_tags() + ["nomac"], - deps = [ - ":gpu_runtime", - "//tensorflow/cc:cc_ops", - "//tensorflow/cc:sendrecv_ops", - "//tensorflow/core:framework", - "//tensorflow/core:framework_internal", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - "//tensorflow/core:testlib", - "//tensorflow/core/common_runtime:core_cpu", - "//tensorflow/core/common_runtime:core_cpu_internal", - "//tensorflow/core/common_runtime:direct_session_internal", - "//tensorflow/core/kernels:matmul_op", - "//tensorflow/core/kernels:ops_util", - ], -) diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc index cf2e7043cae..e11b079b7ec 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc @@ -42,7 +42,6 @@ limitations under the License. #include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h" #include "tensorflow/core/common_runtime/gpu/gpu_init.h" #include "tensorflow/core/common_runtime/gpu/gpu_process_state.h" -#include "tensorflow/core/common_runtime/gpu/gpu_stream_util.h" #include "tensorflow/core/common_runtime/gpu/gpu_util.h" #include "tensorflow/core/common_runtime/gpu_device_context.h" #include "tensorflow/core/common_runtime/local_device.h" diff --git a/tensorflow/core/common_runtime/gpu/gpu_stream_util.cc b/tensorflow/core/common_runtime/gpu/gpu_stream_util.cc deleted file mode 100644 index de715d140a1..00000000000 --- a/tensorflow/core/common_runtime/gpu/gpu_stream_util.cc +++ /dev/null @@ -1,112 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/common_runtime/gpu/gpu_stream_util.h" - -#include -#include -#include -#include - -#include "tensorflow/core/graph/algorithm.h" -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/strings/strcat.h" - -namespace tensorflow { -namespace gpu_stream_util { - -Status AssignStreams(const Graph* graph, const AssignStreamsOpts& opts, - std::unordered_map* node_to_stream_id) { - VLOG(1) << "AssignStreams"; - Status status; - - // Sanity check arguments. - if (graph == nullptr) - status.Update(errors::InvalidArgument("Bad graph argument supplied.")); - if (node_to_stream_id == nullptr) { - status.Update( - errors::InvalidArgument("Bad node_to_stream_id argument supplied.")); - } - if ((opts.max_streams < 1) || (opts.send_stream >= opts.max_streams) || - (opts.recv_stream >= opts.max_streams) || - (opts.const_stream >= opts.max_streams) || - (opts.compute_stream >= opts.max_streams)) { - status.Update(errors::InvalidArgument("Bad graph argument supplied.")); - } - TF_RETURN_IF_ERROR(status); - - // Topologically sort the nodes. - std::vector order; - GetReversePostOrder(*graph, &order); - if (VLOG_IS_ON(2)) { - for (Node* n : order) { - const int node_id = n->id(); - VLOG(2) << "Node " << node_id << " " << n->type_string() << " " - << n->name() << " " << n->in_edges().size() << " inputs"; - for (const Edge* e : n->in_edges()) { - VLOG(2) << " Edge from " << e->src()->id() << " " << e->src()->name() - << " fanout " << e->src()->out_edges().size(); - } - } - } - // We perform stream assignment assuming a large number of - // stream IDs and then map these down to the required number of streams - // using simple round-robin. - // Stream Assignment strategy: - // 1. Nodes with zero inputs are always be executed on a - // fresh stream. - // 2. Try to execute a node on the same stream as one of its - // inputs to avoid inter-stream dependencies. - // 3. If any input comes from a node with a large fanout then - // perhaps an indication that it is shared between parallel - // streams of work. We choose a new stream here so that all consumers - // of the tensor are likely to run in parallel. - int highest_stream_id = -1; - for (Node* n : order) { - VLOG(3) << "Inspecting node " << n->DebugString(); - const int node_id = n->id(); - const string& op = n->type_string(); - - // Determine a suitable stream to use. - int stream_id = highest_stream_id + 1; - for (const Edge* e : n->in_edges()) { - const size_t fanout = e->src()->out_edges().size(); - if (fanout == 1) { - stream_id = (*node_to_stream_id)[e->src()->id()]; - break; - } - } - // Override stream for specific op types. - if (op == "_Send") { - if (opts.send_stream >= 0) stream_id = opts.send_stream; - } else if (op == "_Recv") { - if (opts.recv_stream >= 0) stream_id = opts.recv_stream; - } else if (op == "Const") { - if (opts.const_stream >= 0) stream_id = opts.const_stream; - } else { - if (opts.compute_stream >= 0) stream_id = opts.compute_stream; - } - - (*node_to_stream_id)[node_id] = stream_id % opts.max_streams; - highest_stream_id = std::max(stream_id, highest_stream_id); - } - VLOG(1) << "Identified " << highest_stream_id << " candidate streams for " - << order.size() << " nodes."; - - return Status::OK(); -} - -} // namespace gpu_stream_util -} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/gpu/gpu_stream_util.h b/tensorflow/core/common_runtime/gpu/gpu_stream_util.h deleted file mode 100644 index c61ada96efe..00000000000 --- a/tensorflow/core/common_runtime/gpu/gpu_stream_util.h +++ /dev/null @@ -1,45 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_STREAM_UTIL_H_ -#define TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_STREAM_UTIL_H_ - -#include - -#include "tensorflow/core/graph/graph.h" -#include "tensorflow/core/lib/core/status.h" - -namespace tensorflow { -namespace gpu_stream_util { - -struct AssignStreamsOpts { - int32 max_streams = 1; - // The following options specify a stream to use for specific op - // types. The value -1 allows ops to be assigned to any stream. - int32 send_stream = -1; - int32 recv_stream = -1; - int32 const_stream = -1; - int32 compute_stream = -1; -}; - -// Given the input graph, assigns every node in the graph with a -// stream_id that should be used. -Status AssignStreams(const Graph* graph, const AssignStreamsOpts& opts, - std::unordered_map* node_to_stream_id); - -} // namespace gpu_stream_util -} // namespace tensorflow - -#endif // TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_STREAM_UTIL_H_ diff --git a/tensorflow/core/common_runtime/gpu/gpu_stream_util_test.cc b/tensorflow/core/common_runtime/gpu/gpu_stream_util_test.cc deleted file mode 100644 index 2500425359c..00000000000 --- a/tensorflow/core/common_runtime/gpu/gpu_stream_util_test.cc +++ /dev/null @@ -1,148 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/common_runtime/gpu/gpu_stream_util.h" - -#include "tensorflow/cc/ops/sendrecv_ops.h" -#include "tensorflow/cc/ops/standard_ops.h" -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/types.pb.h" -#include "tensorflow/core/graph/graph_def_builder.h" -#include "tensorflow/core/graph/node_builder.h" -#include "tensorflow/core/kernels/ops_testutil.h" -#include "tensorflow/core/kernels/ops_util.h" -#include "tensorflow/core/lib/core/status_test_util.h" -#include "tensorflow/core/platform/test.h" - -namespace tensorflow { -namespace { - -class GpuStreamUtilTest : public OpsTestBase { - protected: -}; - -TEST_F(GpuStreamUtilTest, BogusOpts) { - auto root = Scope::NewRootScope().ExitOnError(); - Graph g(OpRegistry::Global()); - TF_ASSERT_OK(root.ToGraph(&g)); - std::unordered_map node_to_stream_id; - gpu_stream_util::AssignStreamsOpts opts; - Status status; - status = gpu_stream_util::AssignStreams(nullptr, opts, &node_to_stream_id); - EXPECT_FALSE(status.ok()); - status = gpu_stream_util::AssignStreams(&g, opts, nullptr); - EXPECT_FALSE(status.ok()); - opts.max_streams = 0; - status = gpu_stream_util::AssignStreams(&g, opts, &node_to_stream_id); - EXPECT_FALSE(status.ok()); - opts.max_streams = 1; - opts.compute_stream = 5; - status = gpu_stream_util::AssignStreams(&g, opts, &node_to_stream_id); - EXPECT_FALSE(status.ok()); -} - -TEST_F(GpuStreamUtilTest, EmptyGraph) { - auto root = Scope::NewRootScope().ExitOnError(); - Graph g(OpRegistry::Global()); - TF_ASSERT_OK(root.ToGraph(&g)); - std::unordered_map node_to_stream_id; - gpu_stream_util::AssignStreamsOpts opts; - TF_ASSERT_OK(gpu_stream_util::AssignStreams(&g, opts, &node_to_stream_id)); - EXPECT_EQ(2, node_to_stream_id.size()); // _SOURCE and _SINK -} - -TEST_F(GpuStreamUtilTest, SimpleGraphOneStream) { - auto root = Scope::DisabledShapeInferenceScope().ExitOnError(); - ops::MatMul(root, {}, {}); - Graph g(OpRegistry::Global()); - TF_ASSERT_OK(root.ToGraph(&g)); - - std::unordered_map node_to_stream_id; - gpu_stream_util::AssignStreamsOpts opts; - TF_ASSERT_OK(gpu_stream_util::AssignStreams(&g, opts, &node_to_stream_id)); - - // There should be 5 nodes assigned. - EXPECT_EQ(5, node_to_stream_id.size()); - - // All of them should have stream 0. - for (const auto& it : node_to_stream_id) { - EXPECT_EQ(0, it.second); - } -} - -TEST_F(GpuStreamUtilTest, SimpleGraphManyStreams) { - auto root = Scope::DisabledShapeInferenceScope().ExitOnError(); - ops::MatMul(root, {}, {}); - Graph g(OpRegistry::Global()); - TF_ASSERT_OK(root.ToGraph(&g)); - - std::unordered_map node_to_stream_id; - gpu_stream_util::AssignStreamsOpts opts; - opts.max_streams = 3; - TF_ASSERT_OK(gpu_stream_util::AssignStreams(&g, opts, &node_to_stream_id)); - - // There should be 5 nodes assigned. - EXPECT_EQ(5, node_to_stream_id.size()); - - // All of them should have a stream in the range [0..max_streams). - for (const auto& it : node_to_stream_id) { - EXPECT_GE(it.second, 0); - EXPECT_LT(it.second, opts.max_streams); - } -} - -TEST_F(GpuStreamUtilTest, StreamOverrides) { - auto root = Scope::DisabledShapeInferenceScope().ExitOnError(); - ops::_Recv(root.WithOpName("input"), DT_FLOAT, "input", "/cpu:0", 0, - "/device:GPU:0"); - Output n = ops::MatMul(root, {}, {}); - ops::_Send(root.WithOpName("output"), n, "output", "/device:GPU:0", 0, - "/cpu:0"); - Graph g(OpRegistry::Global()); - TF_ASSERT_OK(root.ToGraph(&g)); - - // Perform stream assignment using a large number of streams, but with - // op types constrained to specific streams. - std::unordered_map node_to_stream_id; - gpu_stream_util::AssignStreamsOpts opts; - opts.max_streams = 100; - opts.const_stream = 90; - opts.send_stream = 91; - opts.recv_stream = 92; - opts.compute_stream = 93; - TF_ASSERT_OK(gpu_stream_util::AssignStreams(&g, opts, &node_to_stream_id)); - - // There should be 7 nodes assigned. - EXPECT_EQ(7, node_to_stream_id.size()); // including _SOURCE and _SINK - - // Nodes should be assigned to streams by op type. - for (const auto& it : node_to_stream_id) { - Node* n = g.FindNodeId(it.first); - const string& op = n->type_string(); - const int stream = it.second; - if (op == "Const") { - EXPECT_EQ(stream, 90); - } else if (op == "_Send") { - EXPECT_EQ(stream, 91); - } else if (op == "_Recv") { - EXPECT_EQ(stream, 92); - } else { // Compute. - EXPECT_EQ(stream, 93); - } - } -} - -} // namespace -} // namespace tensorflow diff --git a/tensorflow/core/framework/device_base.h b/tensorflow/core/framework/device_base.h index 81ddf8df98d..3415c7f23fc 100644 --- a/tensorflow/core/framework/device_base.h +++ b/tensorflow/core/framework/device_base.h @@ -119,9 +119,6 @@ class DeviceContext : public core::RefCounted { } }; -// map[i] is the DeviceContext* for the node with id i, if i < map.size(). -typedef std::vector DeviceContextMap; - class DeviceBase { public: explicit DeviceBase(Env* env) : env_(env) {}