diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD index 42fa139282a..8810b8731ae 100644 --- a/tensorflow/cc/BUILD +++ b/tensorflow/cc/BUILD @@ -388,6 +388,16 @@ tf_gen_op_wrappers_cc( visibility = ["//tensorflow:internal"], ) +tf_gen_op_wrappers_cc( + name = "functional_ops", + include_internal_ops = 1, + op_lib_names = [ + "functional_ops", + ], + pkg = "//tensorflow/core", + visibility = ["//tensorflow:internal"], +) + tf_gen_op_wrappers_cc( name = "resource_variable_ops", include_internal_ops = 1, diff --git a/tensorflow/compiler/xla/reference_util.h b/tensorflow/compiler/xla/reference_util.h index 03276121294..f58f0bdc9f5 100644 --- a/tensorflow/compiler/xla/reference_util.h +++ b/tensorflow/compiler/xla/reference_util.h @@ -422,7 +422,7 @@ class ReferenceUtil { static std::unique_ptr> ApplyElementwise2D( F&& f, const Array2D& array1, const Array2D&... arrays) { AssertSameSize2D(array1, arrays...); - auto result = MakeUnique>(array1.n1(), array1.n1()); + auto result = MakeUnique>(array1.n1(), array1.n2()); for (int64 i = 0; i < array1.n1(); ++i) { for (int64 j = 0; j < array1.n2(); ++j) { (*result)(i, j) = f(array1(i, j), arrays(i, j)...); diff --git a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc index 240da35ef19..dc002846e9e 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc @@ -24,6 +24,11 @@ bool CpuInstructionFusion::ShouldFuse(HloInstruction* consumer, int64 operand_index) { HloInstruction* producer = consumer->mutable_operand(operand_index); + // Output fusion is not currently supported on CPUs. + if (producer->opcode() == HloOpcode::kFusion) { + return false; + } + // Condition for consumer: must be elementwise or a fusion op // (which necessarily only contains elementwise operations) if (!(consumer->opcode() == HloOpcode::kFusion || diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc index 34a44ad4054..a36dcbbd2fa 100644 --- a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc @@ -46,6 +46,11 @@ bool GpuInstructionFusion::ShouldFuse(HloInstruction* consumer, int64 operand_index) { HloInstruction* producer = consumer->mutable_operand(operand_index); + // Output fusion is not currently supported on GPUs. + if (producer->opcode() == HloOpcode::kFusion) { + return false; + } + // RNG operations are not currently parallel-friendly on GPU. if (producer->opcode() == HloOpcode::kRng) { return false; diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 179e1832654..66fb0599752 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -1570,7 +1570,9 @@ string HloInstruction::ToCategory() const { return "non-elementwise fusion"; } case FusionKind::kInput: - return "reduce fusion"; + return "input fusion"; + case FusionKind::kOutput: + return "output fusion"; case FusionKind::kTransposeDot: return "dot fusion"; case FusionKind::kConvBackwardFilter: @@ -1618,7 +1620,6 @@ bool HloInstruction::IsFusable() const { // Some kinds of instructions don't make sense to fuse. switch (opcode_) { - case HloOpcode::kFusion: case HloOpcode::kInfeed: case HloOpcode::kOutfeed: case HloOpcode::kParameter: @@ -2186,6 +2187,8 @@ string ToString(HloInstruction::FusionKind kind) { return "kLoop"; case HloInstruction::FusionKind::kInput: return "kInput"; + case HloInstruction::FusionKind::kOutput: + return "kOutput"; case HloInstruction::FusionKind::kTransposeDot: return "kTransposeDot"; case HloInstruction::FusionKind::kConvBackwardFilter: diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 5ec17c80048..43935690dff 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -54,7 +54,8 @@ class HloInstruction { public: enum class FusionKind { kLoop, // Fused into a loop. - kInput, // Fused into a reduction kernel. + kInput, // Op's input is fused into the op itself. + kOutput, // Op's output is fused into the op itself. kTransposeDot, // Fused into a dot with transposed operands. kConvBackwardFilter, // Fused into a backward filter convolution. kConvBackwardInput, // Fused into a backward input convolution. diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py index 15e457f932c..25f2922bf8e 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/head.py +++ b/tensorflow/contrib/learn/python/learn/estimators/head.py @@ -379,7 +379,12 @@ def multi_label_head(n_classes, loss_fn=None): """Creates a Head for multi label classification. - The Head uses sigmoid cross entropy loss. + Multi-label classification handles the case where each example may have zero + or more associated labels, from a discrete set. This is distinct from + `multi_class_head` which has exactly one label from a discrete set. + + This head by default uses sigmoid cross entropy loss, which expects as input + a multi-hot tensor of shape `(batch_size, num_classes)`. Args: n_classes: Integer, number of classes, must be >= 2 diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 1617addba05..119bc0f8997 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2417,6 +2417,9 @@ tf_cc_test( ":test_main", ":testlib", "//tensorflow/cc:cc_ops", + "//tensorflow/cc:cc_ops_internal", + "//tensorflow/cc:function_ops", + "//tensorflow/cc:functional_ops", "//tensorflow/core/kernels:cast_op", "//tensorflow/core/kernels:cwise_op", "//tensorflow/core/kernels:function_ops", diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc index 13e20568fff..3644279b920 100644 --- a/tensorflow/core/common_runtime/function.cc +++ b/tensorflow/core/common_runtime/function.cc @@ -1001,25 +1001,19 @@ string NewName(const Node* n, bool pretty) { void ToGraphDef(const Graph* g, GraphDef* gdef, bool pretty) { // We visit nodes in forward topological sort order, which is a // possible execution order of the graph. - std::vector pending(g->num_node_ids()); - std::deque ready; - for (const Node* n : g->nodes()) { - pending[n->id()] = n->in_edges().size(); - if (pending[n->id()] == 0) ready.push_back(n); - } gtl::InlinedVector inputs; gdef->Clear(); gdef->mutable_versions()->CopyFrom(g->versions()); - while (!ready.empty()) { - const Node* n = ready.front(); - ready.pop_front(); - for (const Edge* e : n->out_edges()) { - const Node* next = e->dst(); - if (--pending[next->id()] == 0) { - ready.push_back(next); - } + + std::vector start_nodes; + for (Node* n : g->nodes()) { + if (n->out_edges().empty()) { + start_nodes.push_back(n); } - if (!n->IsOp()) continue; + } + + ReverseDFSFrom(*g, start_nodes, nullptr, [gdef, pretty, &inputs](Node* n) { + if (!n->IsOp()) return; NodeDef* ndef = gdef->add_node(); ndef->set_name(NewName(n, pretty)); ndef->set_op(n->type_string()); @@ -1054,7 +1048,7 @@ void ToGraphDef(const Graph* g, GraphDef* gdef, bool pretty) { ndef->add_input(strings::StrCat(srcname, ":", e->src_output())); } } - } + }); } string DebugString(const Graph* g) { diff --git a/tensorflow/core/common_runtime/function_test.cc b/tensorflow/core/common_runtime/function_test.cc index 8f70ab8783c..af1ff6aec03 100644 --- a/tensorflow/core/common_runtime/function_test.cc +++ b/tensorflow/core/common_runtime/function_test.cc @@ -17,6 +17,10 @@ limitations under the License. #include +#include "tensorflow/cc/ops/array_ops_internal.h" +#include "tensorflow/cc/ops/function_ops.h" +#include "tensorflow/cc/ops/functional_ops.h" +#include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/common_runtime/executor.h" @@ -28,10 +32,12 @@ limitations under the License. #include "tensorflow/core/graph/graph_constructor.h" #include "tensorflow/core/lib/core/notification.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/public/session_options.h" #include "tensorflow/core/public/version.h" +#include "tensorflow/core/util/equal_graph_def.h" namespace tensorflow { @@ -58,13 +64,8 @@ class FunctionTest : public ::testing::Test { : device_(DeviceFactory::NewDevice("CPU", {}, "/job:localhost/replica:0/task:0")) {} - ~FunctionTest() override { - delete exec_; - delete device_; - } - void Create(const FunctionDef& fdef, InstantiateAttrValueSlice attrs) { - delete exec_; + exec_ = nullptr; InstantiationResult result; TF_CHECK_OK(InstantiateFunction(fdef, attrs, GetOpSig, &result)); @@ -79,15 +80,18 @@ class FunctionTest : public ::testing::Test { const int version = g->versions().producer(); LocalExecutorParams params; - params.device = device_; + params.device = device_.get(); params.create_kernel = [this, version](const NodeDef& ndef, OpKernel** kernel) { - return CreateNonCachedKernel(device_, nullptr, ndef, version, kernel); + return CreateNonCachedKernel(device_.get(), nullptr, ndef, version, + kernel); }; params.delete_kernel = [](OpKernel* kernel) { DeleteNonCachedKernel(kernel); }; - TF_CHECK_OK(NewLocalExecutor(params, g, &exec_)); + Executor* exec; + TF_CHECK_OK(NewLocalExecutor(params, g, &exec)); + exec_.reset(exec); } void Run(const std::vector& args, std::vector rets) { @@ -105,8 +109,8 @@ class FunctionTest : public ::testing::Test { } } - Device* device_ = nullptr; - Executor* exec_ = nullptr; + std::unique_ptr device_; + std::unique_ptr exec_; DataTypeVector arg_types_; DataTypeVector ret_types_; }; @@ -136,21 +140,15 @@ class FunctionLibraryRuntimeTest : public ::testing::Test { : device_(DeviceFactory::NewDevice("CPU", {}, "/job:localhost/replica:0/task:0")) {} - ~FunctionLibraryRuntimeTest() override { - delete lib_; - delete lib_def_; - delete device_; - } - void Init(const std::vector& flib) { FunctionDefLibrary proto; for (const auto& fdef : flib) *(proto.add_function()) = fdef; - delete lib_def_; - lib_def_ = new FunctionLibraryDefinition(OpRegistry::Global(), proto); - delete lib_; + lib_def_.reset(new FunctionLibraryDefinition(OpRegistry::Global(), proto)); OptimizerOptions opts; - lib_ = NewFunctionLibraryRuntime(nullptr, Env::Default(), device_, - TF_GRAPH_DEF_VERSION, lib_def_, opts); + lib_.reset(NewFunctionLibraryRuntime(nullptr, Env::Default(), device_.get(), + TF_GRAPH_DEF_VERSION, lib_def_.get(), + opts)); + fdef_lib_ = lib_def_->ToProto(); } Status Run(const string& name, InstantiateAttrValueSlice attrs, @@ -190,7 +188,8 @@ class FunctionLibraryRuntimeTest : public ::testing::Test { return Status::OK(); } - Graph* GetFuncBody(const string& name, InstantiateAttrValueSlice attrs) { + std::unique_ptr GetFuncBody(const string& name, + InstantiateAttrValueSlice attrs) { FunctionLibraryRuntime::Handle handle; Status status = lib_->Instantiate(name, attrs, &handle); if (!status.ok()) { @@ -199,12 +198,13 @@ class FunctionLibraryRuntimeTest : public ::testing::Test { } const FunctionBody* fbody = lib_->GetFunctionBody(handle); CHECK_NOTNULL(fbody); - Graph* ret = new Graph(lib_def_); - CopyGraph(*fbody->graph, ret); + std::unique_ptr ret(new Graph(lib_def_.get())); + CopyGraph(*fbody->graph, ret.get()); return ret; } - Graph* GetGradBody(const string& func, InstantiateAttrValueSlice attrs) { + std::unique_ptr GetGradBody(const string& func, + InstantiateAttrValueSlice attrs) { FunctionLibraryRuntime::Handle handle; Status status = lib_->Instantiate(func, attrs, &handle); if (!status.ok()) { @@ -213,17 +213,17 @@ class FunctionLibraryRuntimeTest : public ::testing::Test { } const FunctionBody* fbody = lib_->GetFunctionBody(handle); CHECK_NOTNULL(fbody); - FunctionBody* gbody = SymbolicGradient(*fbody); + std::unique_ptr gbody(SymbolicGradient(*fbody)); CHECK_NOTNULL(gbody); - Graph* ret = new Graph(lib_def_); - CopyGraph(*gbody->graph, ret); - delete gbody; + std::unique_ptr ret(new Graph(lib_def_.get())); + CopyGraph(*gbody->graph, ret.get()); return ret; } - Device* device_ = nullptr; - FunctionLibraryDefinition* lib_def_ = nullptr; - FunctionLibraryRuntime* lib_ = nullptr; + std::unique_ptr device_; + std::unique_ptr lib_def_; + std::unique_ptr lib_; + FunctionDefLibrary fdef_lib_; }; TEST_F(FunctionLibraryRuntimeTest, IsStateful) { @@ -254,113 +254,174 @@ TEST_F(FunctionLibraryRuntimeTest, XTimesN) { test::ExpectTensorEqual(y, test::AsTensor({16, 32, 48, 64})); } +// Adds a function call to 'scope. +// TODO(phawkins): replace with C++ API for calling functions, when that exists. +Output Call(Scope* scope, const string& op_name, const string& fn_name, + gtl::ArraySlice inputs) { + NodeDef def; + NodeDefBuilder builder(op_name, fn_name, scope->graph()->op_registry()); + for (const Input& input : inputs) { + builder.Input(input.node()->name(), input.index(), + input.node()->output_type(input.index())); + } + TF_CHECK_OK(builder.Finalize(&def)); + Status status; + Node* n = scope->graph()->AddNode(def, &status); + TF_CHECK_OK(status); + for (int i = 0; i < inputs.size(); ++i) { + scope->graph()->AddEdge(inputs[i].node(), inputs[i].index(), n, i); + } + return Output(n); +} + TEST_F(FunctionLibraryRuntimeTest, ExpandInlineFunctions) { Init({test::function::XTimesTwo(), test::function::XTimesFour(), test::function::XTimes16()}); - Graph* g = GetFuncBody("XTimes16", {{"T", DT_FLOAT}}); + std::unique_ptr g = GetFuncBody("XTimes16", {{"T", DT_FLOAT}}); ASSERT_TRUE(g != nullptr); - const char* e0 = R"P( -(n2:float) -> (n4:float) { - n3 = XTimesFour[T=float](n2) - n4 = XTimesFour[T=float](n3) -} -)P"; - EXPECT_EQ(e0, DebugString(g)); - ExpandInlineFunctions(lib_, g); - const char* e1 = R"P( -(n2:float) -> (n17:float) { - n10 = Identity[T=float](n2) - n7 = XTimesTwo[T=float](n10) - n8 = XTimesTwo[T=float](n7) - n11 = Identity[T=float](n8) - n16 = Identity[T=float](n11) - n13 = XTimesTwo[T=float](n16) - n14 = XTimesTwo[T=float](n13) - n17 = Identity[T=float](n14) -} -)P"; - EXPECT_EQ(e1, DebugString(g)); + { + Scope s = Scope::NewRootScope(); + TF_ASSERT_OK(s.graph()->AddFunctionLibrary(fdef_lib_)); + auto arg = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0); + auto a = Call(&s, "x4", "XTimesFour", {arg}); + auto b = Call(&s, "y", "XTimesFour", {a}); + auto ret = ops::_Retval(s.WithOpName("y_RetVal"), b, 0); + GraphDef expected; + TF_ASSERT_OK(s.ToGraphDef(&expected)); - ExpandInlineFunctions(lib_, g); - const char* e2 = R"P( -(n2:float) -> (n17:float) { - n18 = Const[dtype=int64, value=Tensor]() - n25 = Const[dtype=int64, value=Tensor]() - n32 = Const[dtype=int64, value=Tensor]() - n39 = Const[dtype=int64, value=Tensor]() - n19 = Cast[DstT=float, SrcT=int64](n18) - n26 = Cast[DstT=float, SrcT=int64](n25) - n33 = Cast[DstT=float, SrcT=int64](n32) - n40 = Cast[DstT=float, SrcT=int64](n39) - n10 = Identity[T=float](n2) - n23 = Identity[T=float](n10) - n21 = Mul[T=float](n23, n19) - n24 = Identity[T=float](n21) - n30 = Identity[T=float](n24) - n28 = Mul[T=float](n30, n26) - n31 = Identity[T=float](n28) - n11 = Identity[T=float](n31) - n16 = Identity[T=float](n11) - n37 = Identity[T=float](n16) - n35 = Mul[T=float](n37, n33) - n38 = Identity[T=float](n35) - n44 = Identity[T=float](n38) - n42 = Mul[T=float](n44, n40) - n45 = Identity[T=float](n42) - n17 = Identity[T=float](n45) -} -)P"; - EXPECT_EQ(e2, DebugString(g)); + GraphDef actual; + g->ToGraphDef(&actual); + TF_EXPECT_GRAPH_EQ(expected, actual); + } + + ExpandInlineFunctions(lib_.get(), g.get()); + { + Scope s = Scope::NewRootScope(); + TF_ASSERT_OK(s.graph()->AddFunctionLibrary(fdef_lib_)); + auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0); + auto func0 = ops::Identity(s.WithOpName("Func/_0"), x); + auto x4_x2 = Call(&s, "x4/x2", "XTimesTwo", {func0}); + auto x4_y = Call(&s, "x4/y", "XTimesTwo", {x4_x2}); + auto func1 = ops::Identity(s.WithOpName("Func/_1"), x4_y); + auto func2 = ops::Identity(s.WithOpName("Func/_2"), func1); + auto y_x2 = Call(&s, "y/x2", "XTimesTwo", {func2}); + auto y_y = Call(&s, "y/y", "XTimesTwo", {y_x2}); + auto func3 = ops::Identity(s.WithOpName("Func/_3"), y_y); + auto ret = ops::_Retval(s.WithOpName("y_RetVal"), func3, 0); + GraphDef expected; + TF_ASSERT_OK(s.ToGraphDef(&expected)); + + GraphDef actual; + g->ToGraphDef(&actual); + TF_EXPECT_GRAPH_EQ(expected, actual); + } + + ExpandInlineFunctions(lib_.get(), g.get()); + GraphDef e2; + { + Scope s = Scope::NewRootScope(); + auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0); + auto x4_x2_two = ops::Const(s.WithOpName("x4/x2/two"), 2LL); + auto x4_y_two = ops::Const(s.WithOpName("x4/y/two"), 2LL); + auto y_x2_two = ops::Const(s.WithOpName("y/x2/two"), 2LL); + auto y_y_two = ops::Const(s.WithOpName("y/y/two"), 2LL); + auto x4_x2_scale = + ops::Cast(s.WithOpName("x4/x2/scale"), x4_x2_two, DT_FLOAT); + auto x4_y_scale = ops::Cast(s.WithOpName("x4/y/scale"), x4_y_two, DT_FLOAT); + auto y_x2_scale = ops::Cast(s.WithOpName("y/x2/scale"), y_x2_two, DT_FLOAT); + auto y_y_scale = ops::Cast(s.WithOpName("y/y/scale"), y_y_two, DT_FLOAT); + auto func0 = ops::Identity(s.WithOpName("Func/_0"), x); + auto func4 = ops::Identity(s.WithOpName("Func/_4"), func0); + auto x4_x2_y = ops::Mul(s.WithOpName("x4/x2/y"), func4, x4_x2_scale); + auto func5 = ops::Identity(s.WithOpName("Func/_5"), x4_x2_y); + auto func6 = ops::Identity(s.WithOpName("Func/_6"), func5); + auto x4_y_y = ops::Mul(s.WithOpName("x4/y/y"), func6, x4_y_scale); + auto func7 = ops::Identity(s.WithOpName("Func/_7"), x4_y_y); + auto func1 = ops::Identity(s.WithOpName("Func/_1"), func7); + auto func2 = ops::Identity(s.WithOpName("Func/_2"), func1); + auto func8 = ops::Identity(s.WithOpName("Func/_8"), func2); + auto y_x2_y = ops::Mul(s.WithOpName("y/x2/y"), func8, y_x2_scale); + auto func9 = ops::Identity(s.WithOpName("Func/_9"), y_x2_y); + auto func10 = ops::Identity(s.WithOpName("Func/_10"), func9); + auto y_y_y = ops::Mul(s.WithOpName("y/y/y"), func10, y_y_scale); + auto func11 = ops::Identity(s.WithOpName("Func/_11"), y_y_y); + auto func3 = ops::Identity(s.WithOpName("Func/_3"), func11); + auto ret = ops::_Retval(s.WithOpName("y_RetVal"), func3, 0); + TF_ASSERT_OK(s.ToGraphDef(&e2)); + + GraphDef actual; + g->ToGraphDef(&actual); + TF_EXPECT_GRAPH_EQ(e2, actual); + } // No further inlining. - ExpandInlineFunctions(lib_, g); - EXPECT_EQ(e2, DebugString(g)); + ExpandInlineFunctions(lib_.get(), g.get()); + { + GraphDef actual; + g->ToGraphDef(&actual); + TF_EXPECT_GRAPH_EQ(e2, actual); + } // Get rid of redundant Identity nodes. - RemoveIdentityNodes(g); - const char* e3 = R"P( -(n2:float) -> (n42:float) { - n18 = Const[dtype=int64, value=Tensor]() - n25 = Const[dtype=int64, value=Tensor]() - n32 = Const[dtype=int64, value=Tensor]() - n39 = Const[dtype=int64, value=Tensor]() - n19 = Cast[DstT=float, SrcT=int64](n18) - n26 = Cast[DstT=float, SrcT=int64](n25) - n33 = Cast[DstT=float, SrcT=int64](n32) - n40 = Cast[DstT=float, SrcT=int64](n39) - n21 = Mul[T=float](n2, n19) - n28 = Mul[T=float](n21, n26) - n35 = Mul[T=float](n28, n33) - n42 = Mul[T=float](n35, n40) -} -)P"; - EXPECT_EQ(e3, DebugString(g)); - delete g; + RemoveIdentityNodes(g.get()); + { + Scope s = Scope::NewRootScope(); + auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0); + auto x4_x2_two = ops::Const(s.WithOpName("x4/x2/two"), 2LL); + auto x4_y_two = ops::Const(s.WithOpName("x4/y/two"), 2LL); + auto y_x2_two = ops::Const(s.WithOpName("y/x2/two"), 2LL); + auto y_y_two = ops::Const(s.WithOpName("y/y/two"), 2LL); + auto x4_x2_scale = + ops::Cast(s.WithOpName("x4/x2/scale"), x4_x2_two, DT_FLOAT); + auto x4_y_scale = ops::Cast(s.WithOpName("x4/y/scale"), x4_y_two, DT_FLOAT); + auto y_x2_scale = ops::Cast(s.WithOpName("y/x2/scale"), y_x2_two, DT_FLOAT); + auto y_y_scale = ops::Cast(s.WithOpName("y/y/scale"), y_y_two, DT_FLOAT); + auto x4_x2_y = ops::Mul(s.WithOpName("x4/x2/y"), x, x4_x2_scale); + auto x4_y_y = ops::Mul(s.WithOpName("x4/y/y"), x4_x2_y, x4_y_scale); + auto y_x2_y = ops::Mul(s.WithOpName("y/x2/y"), x4_y_y, y_x2_scale); + auto y_y_y = ops::Mul(s.WithOpName("y/y/y"), y_x2_y, y_y_scale); + auto ret = ops::_Retval(s.WithOpName("y_RetVal"), y_y_y, 0); + GraphDef expected; + TF_ASSERT_OK(s.ToGraphDef(&expected)); + + GraphDef actual; + g->ToGraphDef(&actual); + TF_EXPECT_GRAPH_EQ(expected, actual); + } } TEST_F(FunctionLibraryRuntimeTest, OptimizeGraph) { Init({test::function::XTimesTwo(), test::function::XTimesFour(), test::function::XTimes16()}); - std::unique_ptr g(GetFuncBody("XTimes16", {{"T", DT_FLOAT}})); + std::unique_ptr g = GetFuncBody("XTimes16", {{"T", DT_FLOAT}}); ASSERT_TRUE(g != nullptr); - ExpandInlineFunctions(lib_, g.get()); - OptimizeGraph(lib_, &g); - const char* e0 = R"P( -(n2:float) -> (n7:float) { - n8 = Const[dtype=float, value=Tensor]() - n4 = Mul[T=float](n2, n8) - n5 = Mul[T=float](n4, n8) - n6 = Mul[T=float](n5, n8) - n7 = Mul[T=float](n6, n8) -} -)P"; - EXPECT_EQ(e0, DebugString(g.get())); + ExpandInlineFunctions(lib_.get(), g.get()); + OptimizeGraph(lib_.get(), &g); + { + Scope s = Scope::NewRootScope(); + auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0); + auto x4_x2_scale = ops::Const( + s.WithOpName("x4/x2/scale/_12__cf__2") + .WithDevice("/job:localhost/replica:0/task:0/cpu:0"), + 2.0f); + auto x4_x2_y = ops::Mul(s.WithOpName("x4/x2/y"), x, x4_x2_scale); + auto x4_y_y = ops::Mul(s.WithOpName("x4/y/y"), x4_x2_y, x4_x2_scale); + auto y_x2_y = ops::Mul(s.WithOpName("y/x2/y"), x4_y_y, x4_x2_scale); + auto y_y_y = ops::Mul(s.WithOpName("y/y/y"), y_x2_y, x4_x2_scale); + auto ret = ops::_Retval(s.WithOpName("y_RetVal"), y_y_y, 0); + GraphDef expected; + TF_ASSERT_OK(s.ToGraphDef(&expected)); + + GraphDef actual; + g->ToGraphDef(&actual); + TF_EXPECT_GRAPH_EQ(expected, actual); + } } TEST_F(FunctionLibraryRuntimeTest, ManySwapsNodeDef) { auto func = FDH::Create( // Creates a FunctionDef using NodeDefs - // Name + // Name "ManySwapsNodeDef", // Input {"x: float", "y: float"}, @@ -379,9 +440,9 @@ TEST_F(FunctionLibraryRuntimeTest, ManySwapsNodeDef) { // Return {{"o", "g:output"}}); Init({test::function::Swap(), func}); - std::unique_ptr g(GetFuncBody("ManySwapsNodeDef", {})); + std::unique_ptr g = GetFuncBody("ManySwapsNodeDef", {}); ASSERT_TRUE(g != nullptr); - OptimizeGraph(lib_, &g); + OptimizeGraph(lib_.get(), &g); const char* e0 = R"P( (n3:float, n2:float) -> (n3:float) { } @@ -412,24 +473,35 @@ TEST_F(FunctionLibraryRuntimeTest, ControlDeps) { {{"o"}, "Add", {"x2:z:0", "y2:z:0"}, {{"T", DT_FLOAT}}}}, {{"o", "o:z:0"}}); Init({test::function::Swap(), func}); - std::unique_ptr g(GetFuncBody("ManySwapsFirst", {})); + std::unique_ptr g = GetFuncBody("ManySwapsFirst", {}); ASSERT_TRUE(g != nullptr); - OptimizeGraph(lib_, &g); + OptimizeGraph(lib_.get(), &g); - // NOTE: We can remove n8, n9, n10, n11 with a control edge n8->n5. + // NOTE: We can remove func0, func1, func2, func9 with a control edge n8->n5. // But we don't have a pass doing that. - const char* e0 = R"P( -(n3:float, n2:float) -> (n6:float) { - n4 = Mul[T=float](n3, n3) - n8 = NoOp() @ n4 - n9 = Identity[T=float](n3) @ n8 - n10 = Identity[T=float](n2) @ n8 - n11 = NoOp() @ n9, n10 - n5 = Mul[T=float](n2, n2) @ n11 - n6 = Add[T=float](n4, n5) -} -)P"; - EXPECT_EQ(e0, DebugString(g.get())); + { + Scope s = Scope::NewRootScope(); + auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0); + auto y = ops::_Arg(s.WithOpName("y"), DT_FLOAT, 1); + auto x2 = ops::Mul(s.WithOpName("x2"), x, x); + auto func0 = ops::NoOp(s.WithOpName("Func/_0").WithControlDependencies(x2)); + auto func1 = ops::Identity( + s.WithOpName("Func/_1").WithControlDependencies({func0}), x); + auto func2 = ops::Identity( + s.WithOpName("Func/_2").WithControlDependencies({func0}), y); + auto func9 = ops::NoOp(s.WithOpName("Func/_9").WithControlDependencies( + {func1.output.op(), func2.output.op()})); + auto y2 = + ops::Mul(s.WithOpName("y2").WithControlDependencies({func9}), y, y); + auto o = ops::Add(s.WithOpName("o"), x2, y2); + auto ret = ops::_Retval(s.WithOpName("o_RetVal"), o, 0); + GraphDef expected; + TF_ASSERT_OK(s.ToGraphDef(&expected)); + + GraphDef actual; + g->ToGraphDef(&actual); + TF_EXPECT_GRAPH_EQ(expected, actual); + } } TEST_F(FunctionLibraryRuntimeTest, Error_NotFound) { @@ -476,84 +548,136 @@ TEST_F(FunctionLibraryRuntimeTest, Error_InstantiaionError) { TEST_F(FunctionLibraryRuntimeTest, Gradient_XTimesTwo) { Init({test::function::XTimesTwo(), test::function::XTimesFour(), test::function::XTimes16()}); - auto f = GetFuncBody("XTimesTwo", {{"T", DT_FLOAT}}); - const char* e0 = R"P( -(n4:float) -> (n5:float) { - n2 = Const[dtype=int64, value=Tensor]() - n3 = Cast[DstT=float, SrcT=int64](n2) - n5 = Mul[T=float](n4, n3) -} -)P"; - EXPECT_EQ(e0, DebugString(f)); - delete f; - std::unique_ptr g(GetGradBody("XTimesTwo", {{"T", DT_FLOAT}})); - const char* e1 = R"P( -(n4:float, n6:float) -> (n7:float) { - n2 = Const[dtype=int64, value=Tensor]() - n3 = Cast[DstT=float, SrcT=int64](n2) - n5 = Mul[T=float](n4, n3) - n7 = SymbolicGradient[Tin={float, float, float}, Tout={float, float}, f=Mul[T=float]](n4, n3, n6) -} -)P"; - EXPECT_EQ(e1, DebugString(g.get())); + std::unique_ptr f = GetFuncBody("XTimesTwo", {{"T", DT_FLOAT}}); + { + Scope s = Scope::NewRootScope(); + auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0); + auto two = ops::Const(s.WithOpName("two"), 2LL); + auto scale = ops::Cast(s.WithOpName("scale"), two, DT_FLOAT); + auto y = ops::Mul(s.WithOpName("y"), x, scale); + auto ret = ops::_Retval(s.WithOpName("y_RetVal"), y, 0); + GraphDef expected; + TF_ASSERT_OK(s.ToGraphDef(&expected)); - OptimizeGraph(lib_, &g); - const char* e2 = R"P( -(n2:float, n3:float) -> (n9:float) { - n10 = Const[dtype=float, value=Tensor]() - n11 = Const[dtype=int32, value=Tensor]() - n6 = Shape[T=float, out_type=int32](n2) - n5 = Mul[T=float](n3, n10) - n7 = BroadcastGradientArgs[T=int32](n6, n11) - n8 = Sum[T=float, Tidx=int32, keep_dims=false](n5, n7) - n9 = Reshape[T=float, Tshape=int32](n8, n6) -} -)P"; - EXPECT_EQ(e2, DebugString(g.get())); + GraphDef actual; + f->ToGraphDef(&actual); + TF_EXPECT_GRAPH_EQ(expected, actual); + } + + std::unique_ptr g = GetGradBody("XTimesTwo", {{"T", DT_FLOAT}}); + + { + Scope s = Scope::NewRootScope(); + auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0); + auto func0 = ops::_Arg(s.WithOpName("Func/_0"), DT_FLOAT, 1); + auto two = ops::Const(s.WithOpName("two"), 2LL); + auto scale = ops::Cast(s.WithOpName("scale"), two, DT_FLOAT); + auto y = ops::Mul(s.WithOpName("y"), x, scale); + NameAttrList fn; + fn.set_name("Mul"); + (*fn.mutable_attr())["T"].set_type(DT_FLOAT); + auto func1 = ops::SymbolicGradient( + s.WithOpName("Func/_1"), std::initializer_list{x, scale, func0}, + {DT_FLOAT, DT_FLOAT}, fn); + auto func2 = ops::_Retval(s.WithOpName("Func/_2"), func1[0], 0); + GraphDef expected; + TF_ASSERT_OK(s.ToGraphDef(&expected)); + + GraphDef actual; + g->ToGraphDef(&actual); + TF_EXPECT_GRAPH_EQ(expected, actual); + } + + OptimizeGraph(lib_.get(), &g); + + { + Scope s = Scope::NewRootScope(); + auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0); + auto func0 = ops::_Arg(s.WithOpName("Func/_0"), DT_FLOAT, 1); + auto scale = + ops::Const(s.WithOpName("scale/_5__cf__6") + .WithDevice("/job:localhost/replica:0/task:0/cpu:0"), + 2.0f); + auto func1_gx = ops::Mul(s.WithOpName("Func/_1/gx"), func0, scale); + auto func1_sx = ops::Shape(s.WithOpName("Func/_1/sx"), x); + auto const0 = + ops::Const(s.WithOpName("Func/_1/sy/_6__cf__7") + .WithDevice("/job:localhost/replica:0/task:0/cpu:0"), + 0, {0}); + auto func1_rx = ops::internal::BroadcastGradientArgs( + s.WithOpName("Func/_1/rx"), func1_sx, const0); + auto func1_sum_gx = + ops::Sum(s.WithOpName("Func/_1/sum_gx"), func1_gx, func1_rx.r0); + auto func1_dx = + ops::Reshape(s.WithOpName("Func/_1/dx"), func1_sum_gx, func1_sx); + auto func2 = ops::_Retval(s.WithOpName("Func/_2"), func1_dx, 0); + GraphDef expected; + TF_ASSERT_OK(s.ToGraphDef(&expected)); + + GraphDef actual; + g->ToGraphDef(&actual); + TF_EXPECT_GRAPH_EQ(expected, actual); + } } TEST_F(FunctionLibraryRuntimeTest, Gradient_Add) { Init({}); auto T = DT_FLOAT; - auto g = GetFuncBody("SymbolicGradient", - {{"f", FDH::FunctionRef("Add", {{"T", T}})}}); - const char* e0 = R"P( -(n7:float, n5:float, n2:float) -> (n14:float, n11:float) { - n3 = Identity[T=float](n2) - n4 = Identity[T=float](n2) - n6 = Shape[T=float, out_type=int32](n5) - n8 = Shape[T=float, out_type=int32](n7) - n9 = BroadcastGradientArgs[T=int32](n8, n6) - n10 = Sum[T=float, Tidx=int32, keep_dims=false](n3, n9:1) - n13 = Sum[T=float, Tidx=int32, keep_dims=false](n4, n9) - n11 = Reshape[T=float, Tshape=int32](n10, n6) - n14 = Reshape[T=float, Tshape=int32](n13, n8) -} -)P"; - EXPECT_EQ(e0, DebugString(g)); - delete g; + std::unique_ptr g = GetFuncBody( + "SymbolicGradient", {{"f", FDH::FunctionRef("Add", {{"T", T}})}}); + { + Scope s = Scope::NewRootScope(); + auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0); + auto y = ops::_Arg(s.WithOpName("y"), DT_FLOAT, 1); + auto dz = ops::_Arg(s.WithOpName("dz"), DT_FLOAT, 2); + auto gx = ops::Identity(s.WithOpName("gx"), dz); + auto gy = ops::Identity(s.WithOpName("gy"), dz); + auto sx = ops::Shape(s.WithOpName("sx"), x); + auto sy = ops::Shape(s.WithOpName("sy"), y); + auto rx = ops::internal::BroadcastGradientArgs(s.WithOpName("rx"), sx, sy); + auto sum_gx = ops::Sum(s.WithOpName("sum_gx"), gx, rx.r0); + auto sum_gy = ops::Sum(s.WithOpName("sum_gy"), gy, rx.r1); + auto dx = ops::Reshape(s.WithOpName("dx"), sum_gx, sx); + auto dy = ops::Reshape(s.WithOpName("dy"), sum_gy, sy); + auto dx_ret = ops::_Retval(s.WithOpName("dx_RetVal"), dx, 0); + auto dy_ret = ops::_Retval(s.WithOpName("dy_RetVal"), dy, 1); + GraphDef expected; + TF_ASSERT_OK(s.ToGraphDef(&expected)); + + GraphDef actual; + g->ToGraphDef(&actual); + TF_EXPECT_GRAPH_EQ(expected, actual); + } } TEST_F(FunctionLibraryRuntimeTest, Gradient_Mul) { Init({}); auto T = DT_FLOAT; - auto g = GetFuncBody("SymbolicGradient", - {{"f", FDH::FunctionRef("Mul", {{"T", T}})}}); - const char* e0 = R"P( -(n6:float, n3:float, n2:float) -> (n14:float, n11:float) { - n4 = Mul[T=float](n2, n3) - n5 = Shape[T=float, out_type=int32](n3) - n7 = Mul[T=float](n6, n2) - n8 = Shape[T=float, out_type=int32](n6) - n9 = BroadcastGradientArgs[T=int32](n8, n5) - n10 = Sum[T=float, Tidx=int32, keep_dims=false](n7, n9:1) - n13 = Sum[T=float, Tidx=int32, keep_dims=false](n4, n9) - n11 = Reshape[T=float, Tshape=int32](n10, n5) - n14 = Reshape[T=float, Tshape=int32](n13, n8) -} -)P"; - EXPECT_EQ(e0, DebugString(g)); - delete g; + std::unique_ptr g = GetFuncBody( + "SymbolicGradient", {{"f", FDH::FunctionRef("Mul", {{"T", T}})}}); + { + Scope s = Scope::NewRootScope(); + auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0); + auto y = ops::_Arg(s.WithOpName("y"), DT_FLOAT, 1); + auto dz = ops::_Arg(s.WithOpName("dz"), DT_FLOAT, 2); + auto gx = ops::Mul(s.WithOpName("gx"), dz, y); + auto sx = ops::Shape(s.WithOpName("sx"), x); + auto gy = ops::Mul(s.WithOpName("gy"), x, dz); + auto sy = ops::Shape(s.WithOpName("sy"), y); + auto rx = ops::internal::BroadcastGradientArgs(s.WithOpName("rx"), sx, sy); + auto sum_gx = ops::Sum(s.WithOpName("sum_gx"), gx, rx.r0); + auto sum_gy = ops::Sum(s.WithOpName("sum_gy"), gy, rx.r1); + auto dx = ops::Reshape(s.WithOpName("dx"), sum_gx, sx); + auto dy = ops::Reshape(s.WithOpName("dy"), sum_gy, sy); + auto dx_ret = ops::_Retval(s.WithOpName("dx_RetVal"), dx, 0); + auto dy_ret = ops::_Retval(s.WithOpName("dy_RetVal"), dy, 1); + GraphDef expected; + TF_ASSERT_OK(s.ToGraphDef(&expected)); + + GraphDef actual; + g->ToGraphDef(&actual); + TF_EXPECT_GRAPH_EQ(expected, actual); + } } TEST_F(FunctionLibraryRuntimeTest, Gradient_AddSum) { @@ -570,108 +694,170 @@ TEST_F(FunctionLibraryRuntimeTest, Gradient_AddSum) { }); // TestGrad = Test'(x, y) - auto grad = - FDH::Define("TestGrad", {"x:float", "y:float"}, {"dx:float", "dy:float"}, - {}, {FDH::Const("dz", 1), - {{"grad0", "grad1"}, - "SymbolicGradient", - {"x", "y", "dz"}, - { - {"f", FDH::FunctionRef("Test")}, - {"Tin", DataTypeSlice{T, T, T}}, - {"Tout", DataTypeSlice{T, T}}, - }}, - {{"dx"}, "Identity", {"grad0"}, {{"T", DT_FLOAT}}}, - {{"dy"}, "Identity", {"grad1"}, {{"T", DT_FLOAT}}}}); + auto grad = FDH::Define("TestGrad", {"x:float", "y:float"}, + {"dx:float", "dy:float"}, {}, + {FDH::Const("dz", 1), + {{"grad0", "grad1"}, + "SymbolicGradient", + {"x", "y", "dz"}, + { + {"f", FDH::FunctionRef("Test")}, + {"Tin", DataTypeSlice{T, T, T}}, + {"Tout", DataTypeSlice{T, T}}, + }}, + {{"dx"}, "Identity", {"grad0"}, {{"T", DT_FLOAT}}}, + {{"dy"}, "Identity", {"grad1"}, {{"T", DT_FLOAT}}}}); Init({test, grad}); - std::unique_ptr g(GetFuncBody("TestGrad", {})); + std::unique_ptr g = GetFuncBody("TestGrad", {}); ASSERT_TRUE(g != nullptr); - const char* e0 = R"P( -(n4:float, n3:float) -> (n8:float, n6:float) { - n2 = Const[dtype=float, value=Tensor]() - n5 = SymbolicGradient[Tin={float, float, float}, Tout={float, float}, f=Test](n4, n3, n2) - n6 = Identity[T=float](n5:1) - n8 = Identity[T=float](n5) -} -)P"; - EXPECT_EQ(e0, DebugString(g.get())); + { + Scope s = Scope::NewRootScope(); + auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0); + auto y = ops::_Arg(s.WithOpName("y"), DT_FLOAT, 1); + auto dz = ops::Const(s.WithOpName("dz"), 1.0f); + NameAttrList fn; + fn.set_name("Test"); + auto grad0 = ops::SymbolicGradient(s.WithOpName("grad0"), + std::initializer_list{x, y, dz}, + {DT_FLOAT, DT_FLOAT}, fn); + auto dx = ops::Identity(s.WithOpName("dx"), grad0[0]); + auto dy = ops::Identity(s.WithOpName("dy"), grad0[1]); + auto dx_retval = ops::_Retval(s.WithOpName("dx_RetVal"), dx, 0); + auto dy_retval = ops::_Retval(s.WithOpName("dy_RetVal"), dy, 1); + GraphDef expected; + TF_ASSERT_OK(s.ToGraphDef(&expected)); - ExpandInlineFunctions(lib_, g.get()); - const char* e1 = R"P( -(n4:float, n3:float) -> (n8:float, n6:float) { - n10 = Const[dtype=int32, value=Tensor]() - n11 = Const[dtype=int32, value=Tensor]() - n2 = Const[dtype=float, value=Tensor]() - n26 = Identity[T=float](n2) - n25 = Identity[T=float](n3) - n24 = Identity[T=float](n4) - n14 = Add[T=float](n24, n25) - n15 = Rank[T=float](n14) - n16 = Range[Tidx=int32](n11, n15, n10) - n20 = ZerosLike[T=int32](n15) - n17 = Sum[T=float, Tidx=int32, keep_dims=false](n14, n16) - n19 = SymbolicGradient[Tin={float, int32, float}, Tout={float, int32}, f=Sum[T=float, Tidx=int32, keep_dims=false]](n14, n16, n26) - n21 = SymbolicGradient[Tin={float, float, float}, Tout={float, float}, f=Add[T=float]](n24, n25, n19) - n27 = Identity[T=float](n21) - n28 = Identity[T=float](n21:1) - n8 = Identity[T=float](n27) - n6 = Identity[T=float](n28) -} -)P"; - EXPECT_EQ(e1, DebugString(g.get())); + GraphDef actual; + g->ToGraphDef(&actual); + TF_EXPECT_GRAPH_EQ(expected, actual); + } - OptimizeGraph(lib_, &g); - const char* e2 = R"P( -(n4:float, n3:float) -> (n25:float, n23:float) { - n2 = Const[dtype=float, value=Tensor]() - n7 = Const[dtype=int32, value=Tensor]() - n8 = Const[dtype=int32, value=Tensor]() - n19 = Shape[T=float, out_type=int32](n3) - n9 = Add[T=float](n4, n3) - n20 = Shape[T=float, out_type=int32](n4) - n10 = Rank[T=float](n9) - n14 = Shape[T=float, out_type=int32](n9) - n21 = BroadcastGradientArgs[T=int32](n20, n19) - n11 = Range[Tidx=int32](n8, n10, n7) - n12 = Shape[T=int32, out_type=int32](n11) - n13 = Fill[T=int32](n12, n7) - n15 = DynamicStitch[N=2, T=int32](n11, n11, n14, n13) - n16 = Reshape[T=float, Tshape=int32](n2, n15) - n17 = Div[T=int32](n14, n15) - n18 = Tile[T=float, Tmultiples=int32](n16, n17) - n22 = Sum[T=float, Tidx=int32, keep_dims=false](n18, n21:1) - n24 = Sum[T=float, Tidx=int32, keep_dims=false](n18, n21) - n23 = Reshape[T=float, Tshape=int32](n22, n19) - n25 = Reshape[T=float, Tshape=int32](n24, n20) -} -)P"; - EXPECT_EQ(e2, DebugString(g.get())); + ExpandInlineFunctions(lib_.get(), g.get()); + { + Scope s = Scope::NewRootScope(); + auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0); + auto y = ops::_Arg(s.WithOpName("y"), DT_FLOAT, 1); + auto dz = ops::Const(s.WithOpName("dz"), 1.0f); + auto grad0_zero = ops::Const(s.WithOpName("grad0/zero"), 0); + auto grad0_one = ops::Const(s.WithOpName("grad0/one"), 1); + auto func0 = ops::Identity(s.WithOpName("Func/_0"), x); + auto func1 = ops::Identity(s.WithOpName("Func/_1"), y); + auto func2 = ops::Identity(s.WithOpName("Func/_2"), dz); + auto grad0_z = ops::Add(s.WithOpName("grad0/z"), func0, func1); + auto grad0_r = ops::Rank(s.WithOpName("grad0/r"), grad0_z); + auto grad0_indices = ops::Range(s.WithOpName("grad0/indices"), grad0_zero, + grad0_r, grad0_one); + auto grad0_l = ops::Sum(s.WithOpName("grad0/l"), grad0_z, grad0_indices); + + NameAttrList sum; + sum.set_name("Sum"); + (*sum.mutable_attr())["T"].set_type(DT_FLOAT); + (*sum.mutable_attr())["Tidx"].set_type(DT_INT32); + (*sum.mutable_attr())["keep_dims"].set_b(false); + auto grad0_func1 = ops::SymbolicGradient( + s.WithOpName("grad0/Func/_1"), + std::initializer_list{grad0_z, grad0_indices, func2}, + {DT_FLOAT, DT_INT32}, sum); + + auto grad0_func2 = ops::ZerosLike(s.WithOpName("grad0/Func/_2"), grad0_r); + + NameAttrList add; + add.set_name("Add"); + (*add.mutable_attr())["T"].set_type(DT_FLOAT); + auto grad0_func3 = ops::SymbolicGradient( + s.WithOpName("grad0/Func/_3"), + std::initializer_list{func0, func1, grad0_func1[0]}, + {DT_FLOAT, DT_FLOAT}, add); + + auto func3 = ops::Identity(s.WithOpName("Func/_3"), grad0_func3[0]); + auto func4 = ops::Identity(s.WithOpName("Func/_4"), grad0_func3[1]); + auto dx = ops::Identity(s.WithOpName("dx"), func3); + auto dy = ops::Identity(s.WithOpName("dy"), func4); + auto dx_retval = ops::_Retval(s.WithOpName("dx_RetVal"), dx, 0); + auto dy_retval = ops::_Retval(s.WithOpName("dy_RetVal"), dy, 1); + + GraphDef expected; + TF_ASSERT_OK(s.ToGraphDef(&expected)); + + GraphDef actual; + g->ToGraphDef(&actual); + TF_EXPECT_GRAPH_EQ(expected, actual); + } + + OptimizeGraph(lib_.get(), &g); + { + Scope s = Scope::NewRootScope(); + auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0); + auto y = ops::_Arg(s.WithOpName("y"), DT_FLOAT, 1); + auto dz = ops::Const(s.WithOpName("dz"), 1.0f); + auto grad0_zero = ops::Const(s.WithOpName("grad0/zero"), 0); + auto grad0_one = ops::Const(s.WithOpName("grad0/one"), 1); + auto grad0_z = ops::Add(s.WithOpName("grad0/z"), x, y); + auto grad0_r = ops::Rank(s.WithOpName("grad0/r"), grad0_z); + auto grad0_indices = ops::Range(s.WithOpName("grad0/indices"), grad0_zero, + grad0_r, grad0_one); + auto i_shape = + ops::Shape(s.WithOpName("grad0/Func/_1/i_shape"), grad0_indices); + auto stitch_val = ops::Fill(s.WithOpName("grad0/Func/_1/stitch_val1"), + i_shape, grad0_one); + auto x_shape = ops::Shape(s.WithOpName("grad0/Func/_1/x_shape"), grad0_z); + auto y_shape = ops::DynamicStitch( + s.WithOpName("grad0/Func/_1/y_shape"), + std::initializer_list{grad0_indices, grad0_indices}, + std::initializer_list{x_shape, stitch_val}); + auto dy_reshaped = + ops::Reshape(s.WithOpName("grad0/Func/_1/dy_reshaped"), dz, y_shape); + auto tile_scaling = + ops::Div(s.WithOpName("grad0/Func/_1/tile_scaling"), x_shape, y_shape); + auto func1_dx = + ops::Tile(s.WithOpName("grad0/Func/_1/dx"), dy_reshaped, tile_scaling); + + auto sx = ops::Shape(s.WithOpName("grad0/Func/_3/sx"), x); + auto sy = ops::Shape(s.WithOpName("grad0/Func/_3/sy"), y); + auto rx = ops::internal::BroadcastGradientArgs( + s.WithOpName("grad0/Func/_3/rx"), sx, sy); + auto sum_gx = + ops::Sum(s.WithOpName("grad0/Func/_3/sum_gx"), func1_dx, rx.r0); + auto sum_gy = + ops::Sum(s.WithOpName("grad0/Func/_3/sum_gy"), func1_dx, rx.r1); + auto dx = ops::Reshape(s.WithOpName("grad0/Func/_3/dx"), sum_gx, sx); + auto dy = ops::Reshape(s.WithOpName("grad0/Func/_3/dy"), sum_gy, sy); + + auto dx_retval = ops::_Retval(s.WithOpName("dx_RetVal"), dx, 0); + auto dy_retval = ops::_Retval(s.WithOpName("dy_RetVal"), dy, 1); + + GraphDef expected; + TF_ASSERT_OK(s.ToGraphDef(&expected)); + + GraphDef actual; + g->ToGraphDef(&actual); + TF_EXPECT_GRAPH_EQ(expected, actual); + } } namespace { bool DoNothing(Graph* g) { return false; } -string Optimize(const std::function& pass, - const FunctionDef& fdef) { +GraphDef Optimize(const std::function& pass, + const FunctionDef& fdef) { InstantiationResult result; InstantiateAttrValueMap empty; TF_CHECK_OK(InstantiateFunction(fdef, empty, GetOpSig, &result)); - Graph* g = new Graph(OpRegistry::Global()); + std::unique_ptr g(new Graph(OpRegistry::Global())); GraphConstructorOptions opts; opts.allow_internal_ops = true; opts.expect_device_spec = false; - TF_CHECK_OK(ConvertGraphDefToGraph(opts, result.gdef, g)); - pass(g); - Graph* g1 = new Graph(OpRegistry::Global()); - CopyGraph(*g, g1); - delete g; + TF_CHECK_OK(ConvertGraphDefToGraph(opts, result.gdef, g.get())); + pass(g.get()); + std::unique_ptr g1(new Graph(OpRegistry::Global())); + CopyGraph(*g, g1.get()); + g = nullptr; GraphDef gdef; g1->ToGraphDef(&gdef); - delete g1; - return DebugString(gdef); + return gdef; } } // end namespace @@ -700,21 +886,25 @@ TEST(OptimizationTest, RemoveDeadNodes) { {{"keep_me"}, "RandomUniform", {"o"}, {{"T", T}, {"dtype", DT_FLOAT}}}, // y = Add(a, o) {{"y"}, "Add", {"a", "o"}, {{"T", T}}}}); - const char* e0 = R"S( -(x:int32) -> (y:int32) { - o = Const[dtype=int32, value=Tensor]() - keep_me = RandomUniform[T=int32, dtype=float, seed2=0, seed=0](o) - x1 = Add[T=int32](o, o) - a = Square[T=int32](x) - y = Add[T=int32](a, o) - x2 = Mul[T=int32](a, x1) - x3 = Mul[T=int32](x1, x2) -} -)S"; - EXPECT_EQ(Optimize(DoNothing, func), e0); + + GraphDef expected; + { + Scope s = Scope::NewRootScope(); + auto x = ops::_Arg(s.WithOpName("x"), DT_INT32, 0); + auto o = ops::Const(s.WithOpName("o"), 1); + auto keep_me = ops::RandomUniform(s.WithOpName("keep_me"), {o}, DT_FLOAT); + auto x1 = ops::Add(s.WithOpName("x1"), o, o); + auto a = ops::Square(s.WithOpName("a"), x); + auto y = ops::Add(s.WithOpName("y"), a, o); + auto x2 = ops::Mul(s.WithOpName("x2"), a, x1); + auto x3 = ops::Mul(s.WithOpName("x3"), x1, x2); + auto ret = ops::_Retval(s.WithOpName("y_RetVal"), y, 0); + TF_ASSERT_OK(s.ToGraphDef(&expected)); + } + TF_EXPECT_GRAPH_EQ(expected, Optimize(DoNothing, func)); // TODO(zhifengc): Comes up another test case. - EXPECT_EQ(Optimize(::tensorflow::RemoveDeadNodes, func), e0); + TF_EXPECT_GRAPH_EQ(expected, Optimize(::tensorflow::RemoveDeadNodes, func)); } TEST(OptimizationTest, RemoveIdentityNodes_Ref) { @@ -735,23 +925,19 @@ TEST(OptimizationTest, RemoveIdentityNodes_Ref) { {{"v_read"}, "Identity", {"v"}, {{"T", T}}}, // returns v + v {{"ret"}, "Add", {"v_read", "v_read"}, {{"T", T}}}}); - const char* e0 = R"S( -() -> (ret:float) { - v = VariableV2[container="", dtype=float, shape=[], shared_name=""]() - v_read = Identity[T=float](v) - ret = Add[T=float](v_read, v_read) -} -)S"; - EXPECT_EQ(Optimize(DoNothing, func), e0); - const char* e1 = R"S( -() -> (ret:float) { - v = VariableV2[container="", dtype=float, shape=[], shared_name=""]() - v_read = Identity[T=float](v) - ret = Add[T=float](v_read, v_read) -} -)S"; - EXPECT_EQ(Optimize(::tensorflow::RemoveIdentityNodes, func), e1); + GraphDef expected; + { + Scope s = Scope::NewRootScope(); + auto v = ops::Variable(s.WithOpName("v"), PartialTensorShape({}), DT_FLOAT); + auto v_read = ops::Identity(s.WithOpName("v_read"), v); + auto ret = ops::Add(s.WithOpName("ret"), v_read, v_read); + auto ret_retval = ops::_Retval(s.WithOpName("ret_RetVal"), ret, 0); + TF_ASSERT_OK(s.ToGraphDef(&expected)); + } + TF_EXPECT_GRAPH_EQ(expected, Optimize(DoNothing, func)); + TF_EXPECT_GRAPH_EQ(expected, + Optimize(::tensorflow::RemoveIdentityNodes, func)); } TEST(OptimizationTest, RemoveIdentityNodes) { @@ -782,28 +968,38 @@ TEST(OptimizationTest, RemoveIdentityNodes) { {"x3"}}, // y = Add(a, o) {{"y"}, "Add", {"a", "o"}, {{"T", T}}}}); - const char* e0 = R"S( -(x:int32) -> (y:int32) { - o = Const[dtype=int32, value=Tensor]() - a = Square[T=int32](x) - y = Add[T=int32](a, o) - x1 = Identity[T=int32](a) - x2 = Identity[T=int32](x1) - x3 = Identity[T=int32](x2) - keep_me = RandomUniform[T=int32, dtype=float, seed2=0, seed=0](o) @ x3 -} -)S"; - EXPECT_EQ(Optimize(DoNothing, func), e0); - const char* e1 = R"S( -(x:int32) -> (y:int32) { - o = Const[dtype=int32, value=Tensor]() - a = Square[T=int32](x) - y = Add[T=int32](a, o) - keep_me = RandomUniform[T=int32, dtype=float, seed2=0, seed=0](o) @ a -} -)S"; - EXPECT_EQ(Optimize(::tensorflow::RemoveIdentityNodes, func), e1); + { + Scope s = Scope::NewRootScope(); + auto x = ops::_Arg(s.WithOpName("x"), DT_INT32, 0); + auto o = ops::Const(s.WithOpName("o"), 1); + auto a = ops::Square(s.WithOpName("a"), x); + auto y = ops::Add(s.WithOpName("y"), a, o); + auto x1 = ops::Identity(s.WithOpName("x1"), a); + auto x2 = ops::Identity(s.WithOpName("x2"), x1); + auto x3 = ops::Identity(s.WithOpName("x3"), x2); + auto keep_me = ops::RandomUniform( + s.WithOpName("keep_me").WithControlDependencies(x3), {o}, DT_FLOAT); + auto ret = ops::_Retval(s.WithOpName("y_RetVal"), y, 0); + GraphDef expected; + TF_ASSERT_OK(s.ToGraphDef(&expected)); + TF_EXPECT_GRAPH_EQ(expected, Optimize(DoNothing, func)); + } + + { + Scope s = Scope::NewRootScope(); + auto x = ops::_Arg(s.WithOpName("x"), DT_INT32, 0); + auto o = ops::Const(s.WithOpName("o"), 1); + auto a = ops::Square(s.WithOpName("a"), x); + auto y = ops::Add(s.WithOpName("y"), a, o); + auto keep_me = ops::RandomUniform( + s.WithOpName("keep_me").WithControlDependencies(a), {o}, DT_FLOAT); + auto ret = ops::_Retval(s.WithOpName("y_RetVal"), y, 0); + GraphDef expected; + TF_ASSERT_OK(s.ToGraphDef(&expected)); + TF_EXPECT_GRAPH_EQ(expected, + Optimize(::tensorflow::RemoveIdentityNodes, func)); + } } TEST(OptimizationTest, RemoveListArrayConverter) { @@ -840,49 +1036,63 @@ TEST(OptimizationTest, RemoveListArrayConverter) { // Return values {{"o", "o:sum"}}); - const char* e0 = R"P( -(i:float) -> (o:float) { - zero = Const[dtype=int32, value=Tensor]() - s = Split[T=float, num_split=4](zero, i) - a = _ArrayToList[N=4, T=float, out_types={float, float, float, float}](s, s:1, s:2, s:3) - r = Mul[T=float](a:2, a:3) - l = Mul[T=float](a, a:1) - x = _ListToArray[N=2, T=float, Tin={float, float}](l, r) - o = AddN[N=2, T=float](x, x:1) -} -)P"; - EXPECT_EQ(Optimize(DoNothing, func), e0); + { + Scope scope = Scope::NewRootScope(); + auto i = ops::_Arg(scope.WithOpName("i"), DT_FLOAT, 0); + auto zero = ops::Const(scope.WithOpName("zero"), 0); + auto s = ops::Split(scope.WithOpName("s"), zero, i, 4); + auto a = ops::_ArrayToList(scope.WithOpName("a"), s.output, + {DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT}); + auto r = ops::Mul(scope.WithOpName("r"), a[2], a[3]); + auto l = ops::Mul(scope.WithOpName("l"), a[0], a[1]); + auto x = ops::_ListToArray(scope.WithOpName("x"), + std::initializer_list{l, r}, DT_FLOAT, 2); + auto o = ops::AddN(scope.WithOpName("o"), x.output); + auto o_ret = ops::_Retval(scope.WithOpName("o_RetVal"), o, 0); + GraphDef expected; + TF_ASSERT_OK(scope.ToGraphDef(&expected)); + TF_EXPECT_GRAPH_EQ(expected, Optimize(DoNothing, func)); + } - const char* e1 = R"P( -(i:float) -> (o:float) { - zero = Const[dtype=int32, value=Tensor]() - s = Split[T=float, num_split=4](zero, i) - r = Mul[T=float](Func/_2, Func/_3) - l = Mul[T=float](Func/_0, Func/_1) - o = AddN[N=2, T=float](Func/_4, Func/_5) - Func/_0 = Identity[T=float](s) - Func/_1 = Identity[T=float](s:1) - Func/_2 = Identity[T=float](s:2) - Func/_3 = Identity[T=float](s:3) - Func/_4 = Identity[T=float](l) - Func/_5 = Identity[T=float](r) -} -)P"; - EXPECT_EQ(Optimize(RemoveListArrayConverter, func), e1); + { + Scope scope = Scope::NewRootScope(); + auto i = ops::_Arg(scope.WithOpName("i"), DT_FLOAT, 0); + auto zero = ops::Const(scope.WithOpName("zero"), 0); + auto s = ops::Split(scope.WithOpName("s"), zero, i, 4); + auto func_0 = ops::Identity(scope.WithOpName("Func/_0"), s[0]); + auto func_1 = ops::Identity(scope.WithOpName("Func/_1"), s[1]); + auto func_2 = ops::Identity(scope.WithOpName("Func/_2"), s[2]); + auto func_3 = ops::Identity(scope.WithOpName("Func/_3"), s[3]); + auto r = ops::Mul(scope.WithOpName("r"), func_2, func_3); + auto l = ops::Mul(scope.WithOpName("l"), func_0, func_1); + auto func_4 = ops::Identity(scope.WithOpName("Func/_4"), l); + auto func_5 = ops::Identity(scope.WithOpName("Func/_5"), r); + auto o = ops::AddN(scope.WithOpName("o"), + std::initializer_list{func_4, func_5}); + auto o_ret = ops::_Retval(scope.WithOpName("o_RetVal"), o, 0); + GraphDef expected; + TF_ASSERT_OK(scope.ToGraphDef(&expected)); + TF_EXPECT_GRAPH_EQ(expected, Optimize(RemoveListArrayConverter, func)); + } - const char* e2 = R"P( -(i:float) -> (o:float) { - zero = Const[dtype=int32, value=Tensor]() - s = Split[T=float, num_split=4](zero, i) - r = Mul[T=float](s:2, s:3) - l = Mul[T=float](s, s:1) - o = AddN[N=2, T=float](l, r) -} -)P"; - auto remove_listarray_and_identity = [](Graph* g) { - return RemoveListArrayConverter(g) && RemoveIdentityNodes(g); - }; - EXPECT_EQ(Optimize(remove_listarray_and_identity, func), e2); + { + Scope scope = Scope::NewRootScope(); + auto i = ops::_Arg(scope.WithOpName("i"), DT_FLOAT, 0); + auto zero = ops::Const(scope.WithOpName("zero"), 0); + auto s = ops::Split(scope.WithOpName("s"), zero, i, 4); + auto r = ops::Mul(scope.WithOpName("r"), s[2], s[3]); + auto l = ops::Mul(scope.WithOpName("l"), s[0], s[1]); + auto o = + ops::AddN(scope.WithOpName("o"), std::initializer_list{l, r}); + auto o_ret = ops::_Retval(scope.WithOpName("o_RetVal"), o, 0); + GraphDef expected; + TF_ASSERT_OK(scope.ToGraphDef(&expected)); + + auto remove_listarray_and_identity = [](Graph* g) { + return RemoveListArrayConverter(g) && RemoveIdentityNodes(g); + }; + TF_EXPECT_GRAPH_EQ(expected, Optimize(remove_listarray_and_identity, func)); + } } TEST(OptimizationTest, RemoveListArrayConverter_WithContolDeps) { @@ -911,33 +1121,47 @@ TEST(OptimizationTest, RemoveListArrayConverter_WithContolDeps) { {"x"}}}, {{"o", "o:sum"}}); - const char* e0 = R"P( -(i:float) -> (o:float) { - dummy = Const[dtype=int32, value=Tensor]() - x = _ListToArray[N=2, T=float, Tin={float, float}](i, i) @ dummy - o = AddN[N=2, T=float](x, x:1) @ x -} -)P"; - EXPECT_EQ(Optimize(DoNothing, func), e0); + { + Scope s = Scope::NewRootScope(); + auto i = ops::_Arg(s.WithOpName("i"), DT_FLOAT, 0); + auto dummy = ops::Const(s.WithOpName("dummy"), 0); + auto x = ops::_ListToArray(s.WithOpName("x").WithControlDependencies(dummy), + std::initializer_list{i, i}, DT_FLOAT, 2); + auto o = + ops::AddN(s.WithOpName("o").WithControlDependencies({x.output[0].op()}), + x.output); + auto o_ret = ops::_Retval(s.WithOpName("o_RetVal"), o, 0); + GraphDef expected; + TF_ASSERT_OK(s.ToGraphDef(&expected)); + TF_EXPECT_GRAPH_EQ(expected, Optimize(DoNothing, func)); + } - const char* e1 = R"P( -(i:float) -> (o:float) { - dummy = Const[dtype=int32, value=Tensor]() - o = AddN[N=2, T=float](Func/_0, Func/_1) @ Func/_3 - Func/_0 = Identity[T=float](i) @ Func/_2 - Func/_1 = Identity[T=float](i) @ Func/_2 - Func/_2 = NoOp() @ dummy - Func/_3 = NoOp() @ Func/_0, Func/_1 -} -)P"; - EXPECT_EQ(Optimize(RemoveListArrayConverter, func), e1); + GraphDef expected; + { + Scope s = Scope::NewRootScope(); + auto i = ops::_Arg(s.WithOpName("i"), DT_FLOAT, 0); + auto dummy = ops::Const(s.WithOpName("dummy"), 0); + auto func_2 = + ops::NoOp(s.WithOpName("Func/_2").WithControlDependencies(dummy)); + auto func_0 = ops::Identity( + s.WithOpName("Func/_0").WithControlDependencies({func_2}), i); + auto func_1 = ops::Identity( + s.WithOpName("Func/_1").WithControlDependencies({func_2}), i); + auto func_3 = ops::NoOp(s.WithOpName("Func/_3").WithControlDependencies( + {func_0.output.op(), func_1.output.op()})); + auto o = ops::AddN(s.WithOpName("o").WithControlDependencies({func_3}), + std::initializer_list{func_0, func_1}); + auto o_ret = ops::_Retval(s.WithOpName("o_RetVal"), o, 0); + TF_ASSERT_OK(s.ToGraphDef(&expected)); + } + TF_EXPECT_GRAPH_EQ(expected, Optimize(RemoveListArrayConverter, func)); auto remove_listarray_and_identity = [](Graph* g) { return RemoveListArrayConverter(g) && RemoveIdentityNodes(g); }; // NOTE: We are not removing Identity nodes with any control // dependencies yet. - EXPECT_EQ(Optimize(remove_listarray_and_identity, func), e1); + TF_EXPECT_GRAPH_EQ(expected, Optimize(remove_listarray_and_identity, func)); } } // end namespace tensorflow diff --git a/tensorflow/core/framework/attr_value_util.cc b/tensorflow/core/framework/attr_value_util.cc index 452cfdda9e6..3573cc6ec21 100644 --- a/tensorflow/core/framework/attr_value_util.cc +++ b/tensorflow/core/framework/attr_value_util.cc @@ -400,16 +400,33 @@ void SetAttrValue(gtl::ArraySlice value, AttrValue* out) { } } +// Wrapper around protocol buffer serialization that requests deterministic +// serialization, in particular for Map fields, which serialize in a random +// order by default. Returns true on success. +template +static bool DeterministicSerialization(const T& t, string* result) { + const int size = t.ByteSize(); + *result = string(size, '\0'); + ::tensorflow::protobuf::io::ArrayOutputStream array_stream(&(*result)[0], + size); + ::tensorflow::protobuf::io::CodedOutputStream output_stream(&array_stream); + output_stream.SetSerializationDeterministic(true); + t.SerializeWithCachedSizes(&output_stream); + return !output_stream.HadError() && size == output_stream.ByteCount(); +} + bool AreAttrValuesEqual(const AttrValue& a, const AttrValue& b) { string a_str, b_str; - a.SerializeToString(&a_str); - b.SerializeToString(&b_str); + DeterministicSerialization(a, &a_str); + DeterministicSerialization(b, &b_str); // Note: it should be safe to compare proto serializations of the attr // values since at most one field should be set in each (indeed, it // must be the same field if they are to compare equal). // Exception: there are multiple equivalent representations of // TensorProtos. So a return value of true implies a == b, but not the // converse. + // TODO(phawkins): this is incorrect for NameAttrList attributes that may + // contain nested AttrValue maps. return a_str == b_str; } diff --git a/tensorflow/core/graph/algorithm.cc b/tensorflow/core/graph/algorithm.cc index 38f011ecaf1..3bfba3fc4ee 100644 --- a/tensorflow/core/graph/algorithm.cc +++ b/tensorflow/core/graph/algorithm.cc @@ -23,8 +23,8 @@ limitations under the License. namespace tensorflow { -void DFS(const Graph& g, std::function enter, - std::function leave) { +void DFS(const Graph& g, const std::function& enter, + const std::function& leave) { // Stack of work to do. struct Work { Node* node; @@ -61,15 +61,23 @@ void DFS(const Graph& g, std::function enter, } } -void ReverseDFS(const Graph& g, std::function enter, - std::function leave) { +void ReverseDFS(const Graph& g, const std::function& enter, + const std::function& leave) { + ReverseDFSFrom(g, {g.sink_node()}, enter, leave); +} + +void ReverseDFSFrom(const Graph& g, gtl::ArraySlice start, + const std::function& enter, + const std::function& leave) { // Stack of work to do. struct Work { Node* node; bool leave; // Are we entering or leaving n? }; - std::vector stack; - stack.push_back(Work{g.sink_node(), false}); + std::vector stack(start.size()); + for (int i = 0; i < start.size(); ++i) { + stack[i] = Work{start[i], false}; + } std::vector visited(g.num_node_ids(), false); while (!stack.empty()) { diff --git a/tensorflow/core/graph/algorithm.h b/tensorflow/core/graph/algorithm.h index 74aace80722..01d36e0a124 100644 --- a/tensorflow/core/graph/algorithm.h +++ b/tensorflow/core/graph/algorithm.h @@ -21,20 +21,28 @@ limitations under the License. #include #include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/lib/gtl/array_slice.h" namespace tensorflow { // Perform a depth-first-search on g starting at the source node. // If enter is not empty, calls enter(n) before visiting any children of n. // If leave is not empty, calls leave(n) after visiting all children of n. -extern void DFS(const Graph& g, std::function enter, - std::function leave); +extern void DFS(const Graph& g, const std::function& enter, + const std::function& leave); // Perform a reverse depth-first-search on g starting at the sink node. // If enter is not empty, calls enter(n) before visiting any parents of n. // If leave is not empty, calls leave(n) after visiting all parents of n. -extern void ReverseDFS(const Graph& g, std::function enter, - std::function leave); +extern void ReverseDFS(const Graph& g, const std::function& enter, + const std::function& leave); + +// Perform a reverse depth-first-search on g starting at the 'start' nodes. +// If enter is not empty, calls enter(n) before visiting any parents of n. +// If leave is not empty, calls leave(n) after visiting all parents of n. +extern void ReverseDFSFrom(const Graph& g, gtl::ArraySlice start, + const std::function& enter, + const std::function& leave); // Stores in *order the post-order numbering of all nodes // in graph found via a depth first search starting at the source node. diff --git a/tensorflow/core/grappler/costs/BUILD b/tensorflow/core/grappler/costs/BUILD index e784c2df443..22f4708d032 100644 --- a/tensorflow/core/grappler/costs/BUILD +++ b/tensorflow/core/grappler/costs/BUILD @@ -90,6 +90,23 @@ cc_test( ], ) +cc_library( + name = "robust_stats", + srcs = ["robust_stats.cc"], + hdrs = ["robust_stats.h"], + visibility = ["//visibility:public"], +) + +cc_test( + name = "robust_stats_test", + srcs = ["robust_stats_test.cc"], + deps = [ + ":robust_stats", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + cc_library( name = "utils", srcs = ["utils.cc"], @@ -116,3 +133,37 @@ cc_library( "//tensorflow/core:lib", ], ) + +cc_library( + name = "virtual_scheduler", + srcs = ["virtual_scheduler.cc"], + hdrs = ["virtual_scheduler.h"], + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/core:protos_all_cc", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler:utils", + "//tensorflow/core/grappler/costs:cost_estimator", + ], +) + +cc_library( + name = "measuring_cost_estimator", + srcs = ["measuring_cost_estimator.cc"], + hdrs = ["measuring_cost_estimator.h"], + visibility = ["//visibility:public"], + deps = [ + ":robust_stats", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:lib_proto_parsing", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler:grappler_item_builder", + "//tensorflow/core/grappler/clusters:cluster", + "//tensorflow/core/grappler/costs:cost_estimator", + "//tensorflow/core/kernels:ops_util", + ], +) diff --git a/tensorflow/core/grappler/costs/measuring_cost_estimator.cc b/tensorflow/core/grappler/costs/measuring_cost_estimator.cc new file mode 100644 index 00000000000..6179dc05c1e --- /dev/null +++ b/tensorflow/core/grappler/costs/measuring_cost_estimator.cc @@ -0,0 +1,133 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/costs/measuring_cost_estimator.h" + +#include + +#include "tensorflow/core/grappler/clusters/cluster.h" +#include "tensorflow/core/grappler/costs/robust_stats.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/lib/core/blocking_counter.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/public/session.h" + +namespace tensorflow { +namespace grappler { + +MeasuringCostEstimator::MeasuringCostEstimator(Cluster* cluster, + int measurement_steps, + int measurement_threads) + : measurement_steps_(measurement_steps), + measurement_threads_(measurement_threads) { + CHECK_GE(measurement_steps, 1); + if (measurement_threads > 0) { + thread_pool_.reset(new thread::ThreadPool( + Env::Default(), SanitizeThreadSuffix("measurements"), + measurement_threads)); + } + cluster_ = cluster; +} + +Status MeasuringCostEstimator::Initialize(const GrapplerItem& item) { + feed_ = item.feed; + fetch_ = item.fetch; + return cluster_->Initialize(item); +} + +Status MeasuringCostEstimator::PredictCosts(const GraphDef& optimized_graph, + CostGraphDef* cost_graph, + Costs* costs) const { + std::vector times(measurement_steps_); + BlockingCounter barrier(measurement_steps_); + + mutex status_mu; + Status status; + + auto measurement_fn = [&](const int step) { + const Costs::MicroSeconds start = Env::Default()->NowMicros(); + + RunMetadata metadata; + const Status local_status = + cluster_->Run(optimized_graph, feed_, fetch_, &metadata); + { + mutex_lock lock(status_mu); + status.Update(local_status); + } + if (step < 0) { + // Discard the first iteration as it triggers the warmup, and therefore + // takes much longer than a normal step. + return; + } + if (!local_status.ok()) { + // Discard the data if the run wasn't sucessful. + barrier.DecrementCount(); + return; + } + + const Costs::MicroSeconds finish = Env::Default()->NowMicros(); + const double time = (finish - start).count() * 1e3; + times[step] = time; + + if (cost_graph && (step + 1 == measurement_steps_)) { + metadata.mutable_cost_graph()->Swap(cost_graph); + } + + barrier.DecrementCount(); + }; + + // Initialize the computation and warm up TensorFlow. + measurement_fn(-1); + + if (!status.ok()) { + LOG(ERROR) << "Failed to run start measurements: " + << status.error_message(); + costs->execution_time = Costs::Duration::max(); + return status; + } + + // Run "measurement_steps_" and measure the time. + if (measurement_threads_ > 0) { + for (int i = 0; i < measurement_steps_; ++i) { + thread_pool_->Schedule([i, &measurement_fn]() { measurement_fn(i); }); + } + barrier.Wait(); + } else { + for (int i = 0; i < measurement_steps_ && status.ok(); ++i) { + measurement_fn(i); + } + } + + if (!status.ok()) { + LOG(ERROR) << "Failed to measure graph performance: " + << status.error_message(); + costs->execution_time = Costs::Duration::max(); + costs->max_execution_time = Costs::Duration::max(); + costs->min_execution_time = 0; + return status; + } + + // Compute the average time of the measure steps. Use Huber statistics + // to filter out outliers. + RobustStats stats(times); + costs->execution_time = Costs::Duration(stats.mean()); + costs->max_execution_time = Costs::Duration(stats.hi()); + costs->min_execution_time = Costs::Duration(stats.lo()); + + return Status::OK(); +} +} // end namespace grappler +} // end namespace tensorflow diff --git a/tensorflow/core/grappler/costs/measuring_cost_estimator.h b/tensorflow/core/grappler/costs/measuring_cost_estimator.h new file mode 100644 index 00000000000..a84853f6c71 --- /dev/null +++ b/tensorflow/core/grappler/costs/measuring_cost_estimator.h @@ -0,0 +1,76 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_GRAPPLER_COSTS_MEASURING_COST_ESTIMATOR_H_ +#define TENSORFLOW_GRAPPLER_COSTS_MEASURING_COST_ESTIMATOR_H_ + +#include +#include +#include + +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/grappler/costs/cost_estimator.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/core/threadpool.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { +class CostGraphDef; +class GraphDef; +} // namespace tensorflow + +namespace tensorflow { +namespace grappler { + +class Cluster; +struct GrapplerItem; + +// Estimate the cost of running a Grappler item by actually running the +// corresponding TensorFlow graph on the specified cluster and measuring the +// runtimes. +class MeasuringCostEstimator : public CostEstimator { + public: + // Run the model for measurement_steps to measure its average cost. + // When measurement_threads is greater than 0, use a threadpool of as many + // threads to run the measurements; otherwise, run them serially. Does not + // take ownership of cluster. + explicit MeasuringCostEstimator(Cluster* cluster, int measurement_steps, + int measurement_threads); + ~MeasuringCostEstimator() override {} + + // Initalizes the estimator for the specified grappler item. + // This implementation always returns OK. + Status Initialize(const GrapplerItem& item) override; + + // Runs the optimized version of the graph on the cluster, measure + // the runtimes of each operation, and annotated the CostGraphDef + // with the corresponding measurements. + // Returns the average latency for the whole graph. + Status PredictCosts(const GraphDef& optimized_graph, CostGraphDef* cost_graph, + Costs* overall_cost) const override; + + private: + Cluster* cluster_; // Not owned. + int measurement_steps_; + int measurement_threads_; + std::vector> feed_; + std::vector fetch_; + std::unique_ptr thread_pool_; +}; + +} // end namespace grappler +} // end namespace tensorflow + +#endif // TENSORFLOW_GRAPPLER_COSTS_MEASURING_COST_ESTIMATOR_H_ diff --git a/tensorflow/core/grappler/costs/robust_stats.cc b/tensorflow/core/grappler/costs/robust_stats.cc new file mode 100644 index 00000000000..87cda1c0d2e --- /dev/null +++ b/tensorflow/core/grappler/costs/robust_stats.cc @@ -0,0 +1,151 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/costs/robust_stats.h" +#include + +namespace tensorflow { +namespace grappler { + +// Given a sorted vector of values, calculate the median. +// Returns 0 for an empty vector. Does not verify sortedness. +static double SortedMedian(const std::vector &values) { + const int n = values.size(); + if (n == 0) return 0.0; + if (n & 1) { + return values[n / 2]; + } else { + return (values[n / 2] + values[n / 2 - 1]) / 2.0; + } +} + +// Given a vector of values (sorted or not), calculate the median. +static double Median(std::vector &&values) { + const size_t n = values.size(); + if (n == 0) return 0; + const auto middle = values.begin() + (n / 2); + // Put the middle value in its place. + std::nth_element(values.begin(), middle, values.end()); + if (n & 1) { + return *middle; + } + // Return the average of the two elements, the max_element lower than + // *middle is found between begin and middle as a post-cond of + // nth_element. + const auto lower_middle = std::max_element(values.begin(), middle); + // Preventing overflow. We know that '*lower_middle <= *middle'. + // If both are on oposite sides of zero, the sum won't overflow, otherwise + // the difference won't overflow. + if (*lower_middle <= 0 && *middle >= 0) { + return (*lower_middle + *middle) / 2; + } + return *lower_middle + (*middle - *lower_middle) / 2; +} + +// Given a set of values, calculates the scaled Median Absolute Deviation (a +// robust approximation to the standard deviation). This is calculated as the +// median of the absolute deviations from the median, scaled by 1.4826. Its +// advantage over the standard deviation is that it is not (as) affected by +// outlier values. Returns a pair. +static std::pair ScaledMedianAbsoluteDeviation( + const std::vector &sorted_values) { + double median = SortedMedian(sorted_values); + + // Next, we calculate the absolute deviations from the median, + // find the median of the resulting data, and scale by 1.4826. + std::vector deviations; + deviations.reserve(sorted_values.size()); + for (double d : sorted_values) { + deviations.push_back(std::abs(d - median)); + } + double mad = Median(std::move(deviations)) * 1.4826; + return std::pair(median, mad); +} + +RobustStats::RobustStats(const std::vector &values) + : RobustStats(std::vector(values)) {} + +RobustStats::RobustStats(std::vector &&values) { + std::sort(values.begin(), values.end()); + lo_ = values[0]; + hi_ = values.back(); + HuberMAD(values); +} + +// Computes an updated mean using Huber's weighting function (values beyond +// the margin are weighted by margin / abs(value - mean). +double UpdateHuberMean(const std::vector &sorted_values, double mean, + double margin) { + int num_within = 0; + double sum = 0.0; + + for (double d : sorted_values) { + if (d < mean - margin) { + sum -= margin; + } else if (d > mean + margin) { + sum += margin; + } else { + sum += d; + ++num_within; + } + } + + // It is possible, for a set with an interquartile distance of 0, i.e., with + // more than half of the values at the median, to encounter the case where + // the Huber mean drifts slightly off the median and there are no values + // within the margin. In that case, just return the old mean, and the caller + // will quit. + if (num_within > 0) { + return sum / num_within; + } else { + return mean; + } +} + +// Given a list of values, this approximates the stddev using the MAD and then +// uses it to compute a Huber robust mean (sandwich mean). A margin of +// c*stddev is defined around the current mean, and values are weighted by +// margin / abs(value - mean) if outside the margin, or 1 if inside. This +// computes the mean iteratively, because each time it changes the margin +// shifts a bit. It typically settles very quickly, but it's possible for it +// to be unstable. We limit it to 10 iterations. +// +void RobustStats::HuberMAD(const std::vector &sorted_values) { + const std::pair median_mad = + ScaledMedianAbsoluteDeviation(sorted_values); + mean_ = median_mad.first; + stddev_ = median_mad.second; + + // c = 1.345 is the commonly used cutoff with 95% efficiency at the normal. + // We're using c = 1.5 to be a little more conservative, and because that's + // the default in S-plus. + // TODO(dehnert): Specialize Stats for integral types so we don't implement + // methods that don't make sense. + const double c = 1.5; + const double margin = c * stddev_; + + // Iterate 10 times, or until the Huber mean stabilizes. + // If the margin is zero, we don't want mean to drift from the median. + if (margin > 0.0) { + for (int k = 0; k < 10; ++k) { + double old_mean = mean_; + mean_ = UpdateHuberMean(sorted_values, mean_, margin); + if (mean_ == old_mean) break; + } + } +} + +} // namespace grappler +} // namespace tensorflow diff --git a/tensorflow/core/grappler/costs/robust_stats.h b/tensorflow/core/grappler/costs/robust_stats.h new file mode 100644 index 00000000000..9d8f5bc970a --- /dev/null +++ b/tensorflow/core/grappler/costs/robust_stats.h @@ -0,0 +1,42 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_GRAPPLER_COSTS_ROBUST_STATS_H_ +#define TENSORFLOW_GRAPPLER_COSTS_ROBUST_STATS_H_ + +#include +namespace tensorflow { +namespace grappler { +class RobustStats { + public: + RobustStats(const std::vector& values); + RobustStats(std::vector&& values); + + double lo() const { return lo_; } + double hi() const { return hi_; } + double mean() const { return mean_; } + + private: + void HuberMAD(const std::vector& values); + + double lo_; + double hi_; + double mean_; + double stddev_; +}; +} // namespace grappler +} // namespace tensorflow + +#endif // TENSORFLOW_GRAPPLER_COSTS_ROBUST_STATS_H_ diff --git a/tensorflow/core/grappler/costs/robust_stats_test.cc b/tensorflow/core/grappler/costs/robust_stats_test.cc new file mode 100644 index 00000000000..924097b126d --- /dev/null +++ b/tensorflow/core/grappler/costs/robust_stats_test.cc @@ -0,0 +1,63 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/costs/robust_stats.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace grappler { +namespace { + +class RobustStatsTest : public ::testing::Test { + public: + void SetUp() override { + for (double d = 1.0; d <= 5.0; d += 1.0) { + values1_.push_back(5.0 - d); + values1_.push_back(5.0 + d); + values2_.push_back(25.0 - 2 * d); + values2_.push_back(25.0 + 2 * d); + values3_.push_back(-3.0 - d); + values3_.push_back(-3.0 + d); + } + values1_.push_back(5.0); // Odd # elements, mean is 5.0 + values3_.push_back(197.0); + values3_.push_back(-203.0); // Even # elements, mean is -3.0 + } + + std::vector values1_; + std::vector values2_; + std::vector values3_; +}; + +TEST_F(RobustStatsTest, Simple) { + RobustStats s1(values1_); + EXPECT_EQ(5.0, s1.mean()); + EXPECT_EQ(0.0, s1.lo()); + EXPECT_EQ(10.0, s1.hi()); + + RobustStats s2(values2_); + EXPECT_EQ(25.0, s2.mean()); + EXPECT_EQ(15.0, s2.lo()); + EXPECT_EQ(35.0, s2.hi()); + + RobustStats s3(values3_); + EXPECT_EQ(-3.0, s3.mean()); + EXPECT_EQ(-203.0, s3.lo()); + EXPECT_EQ(197.0, s3.hi()); +} + +} // namespace +} // namespace grappler +} // namespace tensorflow diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc new file mode 100644 index 00000000000..8f77d7677ac --- /dev/null +++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc @@ -0,0 +1,215 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/costs/virtual_scheduler.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/grappler/utils.h" + +namespace tensorflow { +namespace grappler { +namespace { + +Costs CombineCosts(const Costs& left, const Costs& right) { + CHECK_NE(left.max_memory, kMemoryUnknown); + CHECK_NE(left.max_per_op_buffers, kMemoryUnknown); + CHECK_NE(left.max_per_op_streaming, kMemoryUnknown); + + Costs result = left; + result.execution_time += right.execution_time; + if (right.max_memory != kMemoryUnknown) { + result.max_memory += right.max_memory; + } + if (right.max_per_op_buffers != kMemoryUnknown) { + result.max_per_op_buffers = + std::max(left.max_per_op_buffers, right.max_per_op_buffers); + } + if (right.max_per_op_streaming != kMemoryUnknown) { + result.max_per_op_streaming = + std::max(left.max_per_op_streaming, right.max_per_op_streaming); + } + VLOG(2) << "costs execution_time=" << result.execution_time.count() + << " max_memory=" << result.max_memory + << " max_per_op_buffers=" << result.max_per_op_buffers + << " max_per_op_streaming=" << result.max_per_op_streaming; + return result; +} +} // namespace + +VirtualScheduler::VirtualScheduler(const GraphDef& graph, + const std::vector& fetch_nodes) + : graph_costs_(Costs::ZeroCosts()), + // TODO(dyoon): Use a better way than FIFO. + ready_nodes_(new FIFOManager()) { + // First, get the nodes that would run to output fetch_nodes. + std::vector nodes = + ComputeTransitiveFanin(graph, fetch_nodes); + + // TODO(dyoon): this is a bit inefficient as name_to_node is already built in + // ComputeTransitiveFanin(). + std::unordered_map name_to_node; + for (const auto& node : graph.node()) { + name_to_node[node.name()] = &node; + } + + // Build node_map. + for (const auto* node : nodes) { + auto& node_state = GetNodeStateOrCreateIt(node); + // TODO(dyoon): add SendRecv considering devices and control dependency. + for (const string& input : node->input()) { + const NodeDef* in = name_to_node[NodeName(input)]; + CHECK(in); + node_state.inputs.push_back(in); + auto& input_node_state = GetNodeStateOrCreateIt(in); + input_node_state.outputs.push_back(node); + } + if (node->input().empty()) { + node_state.time_ready = + Costs::Duration(); // Node without input: ready at time 0. + ready_nodes_->AddNode(node); + } + } +} + +const NodeDef* VirtualScheduler::GetCurrNode() const { + return ready_nodes_->GetCurrNode(); +} + +NodeState& VirtualScheduler::GetNodeStateOrCreateIt(const NodeDef* node) { + auto it = node_map_.find(node); + if (it == node_map_.end()) { + it = node_map_.emplace(node, NodeState()).first; + } + return it->second; +} + +bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) { + // Update graph_costs_ and per-op costs. + graph_costs_ = CombineCosts(graph_costs_, node_costs); + const auto* node = GetCurrNode(); + const auto& op_name = node->op(); + + auto it = op_to_cost_.find(op_name); + if (it == op_to_cost_.end()) { + it = op_to_cost_.emplace(op_name, Costs::ZeroCosts()).first; + } + auto& op_cost = it->second; + op_cost = CombineCosts(op_cost, node_costs); + + // Update node and device states. + auto& node_state = node_map_[node]; + auto& device = device_[node->device()]; + device.nodes_executed.push_back(node); + // Node is scheduled when the device is available AND all the inputs are + // ready; hence, time_scheduled is time_ready if time_ready > device curr + // time. + node_state.time_scheduled = + std::max(device.GetCurrTime(), node_state.time_ready); + // Override device curr time with the time_scheduled. + device.device_costs.execution_time = node_state.time_scheduled; + device.device_costs = CombineCosts(device.device_costs, node_costs); + auto curr_time = device.GetCurrTime(); + node_state.time_finished = curr_time; + + // Update device's per-op cost. + { + auto it = device.op_to_cost.find(op_name); + if (it == device.op_to_cost.end()) { + it = device.op_to_cost.emplace(op_name, Costs::ZeroCosts()).first; + } + auto& op_cost = it->second; + op_cost = CombineCosts(op_cost, node_costs); + + VLOG(2) << "Op scheduled -- name: " << node->name() + << ", op: " << node->op() << ", device: " << node->device() + << ", ready: " << node_state.time_ready.count() + << ", scheduled: " << node_state.time_scheduled.count() + << ", finished: " << node_state.time_finished.count(); + + // Increment num_inputs_ready of the output nodes. + for (auto* output : node_state.outputs) { + auto& output_state = node_map_[output]; + output_state.num_inputs_ready++; + if (output_state.num_inputs_ready == output_state.inputs.size()) { + // This output node is now ready. + output_state.time_ready = curr_time; + ready_nodes_->AddNode(output); + } + } + + // Increment num_outputs_executed of the input nodes. + for (auto* input : node_state.inputs) { + auto& input_state = node_map_[input]; + input_state.num_outputs_executed++; + if (input_state.num_outputs_executed == input_state.outputs.size()) { + // All the outputs are executed; no reference to this input nodel + input_state.time_no_reference = curr_time; + // TODO(dyoon): collect device memory usage; note that this input node + // use device memory between time_scheduled and time_no_reference. + } + } + } + + // Remove the current node; assume FIFO. + ready_nodes_->RemoveCurrNode(); + return !ready_nodes_->Empty(); // True if not empty. +} + +Costs VirtualScheduler::Summary() const { + // Print out basic execution summary. + VLOG(1) << "Expected execution time: " << graph_costs_.execution_time.count(); + VLOG(1) << "Expected max memory: " << graph_costs_.max_memory; + VLOG(1) << "Expected max per-op buffers: " << graph_costs_.max_per_op_buffers; + VLOG(1) << "Expected max per-op streaming buffers: " + << graph_costs_.max_per_op_streaming; + + VLOG(1) << "Per-op execution time:"; + for (const auto& op_cost_pair : op_to_cost_) { + const auto& op = op_cost_pair.first; + const auto& cost = op_cost_pair.second.execution_time.count(); + if (cost) { // Skip printing out zero-cost ops. + VLOG(1) << " + " << op << " : " << cost; + } + } + + // Print per device summary + VLOG(1) << "Devices:"; + Costs critical_path_costs = Costs::ZeroCosts(); + + for (const auto& device : device_) { + const auto& name = device.first; + const auto& state = device.second; + VLOG(1) << "Device = " << name + << ", num_nodes = " << state.nodes_executed.size() + << ", execution_time = " << state.GetCurrTime().count(); + VLOG(1) << "Per-op execution time:"; + for (const auto& op_cost_pair : state.op_to_cost) { + const auto& op = op_cost_pair.first; + const auto& cost = op_cost_pair.second.execution_time.count(); + if (cost) { // Skip printing out zero-cost ops. + VLOG(1) << " + " << op << " : " << cost; + } + } + if (critical_path_costs.execution_time <= state.GetCurrTime()) { + critical_path_costs = state.device_costs; + } + } + + VLOG(1) << "Critical path execution time: " + << critical_path_costs.execution_time.count(); + return critical_path_costs; +} + +} // end namespace grappler +} // end namespace tensorflow diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.h b/tensorflow/core/grappler/costs/virtual_scheduler.h new file mode 100644 index 00000000000..b7785c94e04 --- /dev/null +++ b/tensorflow/core/grappler/costs/virtual_scheduler.h @@ -0,0 +1,116 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_CORE_GRAPPLER_COSTS_VIRTUAL_SCHEDULER_H_ +#define THIRD_PARTY_TENSORFLOW_CORE_GRAPPLER_COSTS_VIRTUAL_SCHEDULER_H_ + +#include +#include +#include + +#include "tensorflow/core/grappler/costs/cost_estimator.h" +#include "tensorflow/core/grappler/grappler_item.h" + +namespace tensorflow { +namespace grappler { + +namespace { +struct NodeState { + std::vector inputs; + std::vector outputs; + int num_inputs_ready; + int num_outputs_executed; + Costs::Duration time_ready; + Costs::Duration time_scheduled; + Costs::Duration time_finished; + Costs::Duration time_no_reference; + + // Node will be ready to be executed at time_ready, scheduled at + // time_scheduled, and finishes execution at time_finished. + // Between time_scheduled and time_no_reference, the node's output tensor + // needs to be on the device, using up device memory. + + NodeState() { + num_inputs_ready = 0; + num_outputs_executed = 0; + time_ready = Costs::Duration::max(); + time_scheduled = Costs::Duration::max(); + time_finished = Costs::Duration::max(); + time_no_reference = Costs::Duration::max(); + } +}; + +struct DeviceState { + std::vector nodes_executed; + Costs device_costs; + std::map op_to_cost; // Per-op cost. + + DeviceState() { device_costs = Costs::ZeroCosts(); } + + Costs::Duration GetCurrTime() const { return device_costs.execution_time; } +}; + +// ReadyNodeManager (abstract class): +// Keeps ready nodes and picks the best one to be scheduled. +class ReadyNodeManager { + public: + ReadyNodeManager() {} + virtual ~ReadyNodeManager() {} + virtual void AddNode(const NodeDef* node) = 0; + virtual const NodeDef* GetCurrNode() const = 0; + virtual void RemoveCurrNode() = 0; + virtual bool Empty() const = 0; +}; + +class FIFOManager : public ReadyNodeManager { + public: + FIFOManager() : ReadyNodeManager() {} + ~FIFOManager() override {} + void AddNode(const NodeDef* node) override { nodes_.push_back(node); } + const NodeDef* GetCurrNode() const override { return nodes_.front(); } + void RemoveCurrNode() override { nodes_.pop_front(); } + bool Empty() const override { return nodes_.empty(); } + + private: + std::list nodes_; +}; +} // namespace + +// The virtual scheduler emulates execution of nodes in a graph, considering +// dependencies, device, etc. +class VirtualScheduler { + public: + VirtualScheduler(const GraphDef& graph, + const std::vector& fetch_nodes); + + const NodeDef* GetCurrNode() const; + bool MarkCurrNodeExecuted(const Costs& node_costs); + + Costs Summary() const; + + private: + NodeState& GetNodeStateOrCreateIt(const NodeDef* node); + + Costs graph_costs_; // Graph cost. + std::map op_to_cost_; // Per-op cost. + std::unique_ptr ready_nodes_; + std::unordered_map node_map_; + std::unordered_map device_; +}; + +} // namespace grappler +} // end namespace tensorflow + +#endif // THIRD_PARTY_TENSORFLOW_CORE_GRAPPLER_COSTS_VIRTUAL_SCHEDULER_H_ diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 29b4d63bbf8..0847d1279b8 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -2109,7 +2109,9 @@ tf_kernel_library( tf_kernel_library( name = "matrix_triangular_solve_op", prefix = "matrix_triangular_solve_op", - deps = LINALG_DEPS, + deps = LINALG_DEPS + if_cuda([ + "//tensorflow/core/platform/default/build_config:cublas_plugin", + ]), ) tf_kernel_library( @@ -2350,6 +2352,8 @@ tf_kernel_library( "//conditions:default": [], }) + if_mkl([ "//third_party/mkl:intel_binary_blob", + ]) + if_cuda([ + "//tensorflow/core/platform/default/build_config:cublas_plugin", ]), ) @@ -2630,6 +2634,7 @@ tf_kernel_library( ], "//conditions:default": [], }) + if_cuda([ + "//tensorflow/core/platform/default/build_config:cublas_plugin", "//tensorflow/core/platform/default/build_config:cudnn_plugin", ]), ) diff --git a/tensorflow/docs_src/performance/performance_models.md b/tensorflow/docs_src/performance/performance_models.md index 027ecb195ed..d415c29aa13 100644 --- a/tensorflow/docs_src/performance/performance_models.md +++ b/tensorflow/docs_src/performance/performance_models.md @@ -328,12 +328,16 @@ The downside is that all the weights read are from the previous training step. So it is a different algorithm from SGD. But it is possible to improve its convergence by adjusting learning rate and other hyperparameters. -### Executing the script +## Executing the script This section lists the core command line arguments and a few basic examples for executing the main script ([tf_cnn_benchmarks.py](https://github.com/tensorflow/benchmarks/tree/master/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py)). +> Note: `tf_cnn_benchmarks.py` uses the config `force_gpu_compatible`, +> which was introduced after TensorFlow 1.1. Until TensorFlow 1.2 is released +> building from source is advised. + #### Base command line arguments * **`model`**: Model to use, e.g. `resnet50`, `inception3`, `vgg16`, and diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py index e408506cb06..a96052a3ae5 100644 --- a/tensorflow/python/feature_column/feature_column.py +++ b/tensorflow/python/feature_column/feature_column.py @@ -139,6 +139,82 @@ from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import nest +def make_input_layer(features, + feature_columns, + weight_collections=None, + trainable=True): + """Returns a dense `Tensor` as input layer based on given `feature_columns`. + + Generally a single example in training data is described with FeatureColumns. + At the first layer of the model, this column oriented data should be converted + to a single `Tensor`. + + Example: + + ```python + price = numeric_column('price') + keywords_embedded = embedding_column( + categorical_column_with_hash_bucket("keywords", 10K), dimensions=16) + all_feature_columns = [price, keywords_embedded, ...] + dense_tensor = make_input_layer(features, all_feature_columns) + for units in [128, 64, 32]: + dense_tensor = tf.layers.dense(dense_tensor, units, tf.nn.relu) + prediction = tf.layers.dense(dense_tensor, 1) + ``` + + Args: + features: A mapping from key to tensors. `FeatureColumn`s look up via these + keys. For example `numeric_column('price') will look at 'price' key in + this dict. Values can be a `SparseTensor` or a `Tensor` depends on + corresponding `FeatureColumn`. + feature_columns: An iterable containing all the `FeatureColumn`s. All items + should be instances of classes derived from `_DenseColumn` such as + `numeric_column`, `embedding_column`, `bucketized_column`, + `indicator_column`. If you have categorical features, you can wrap them + with with an `embedding_column` or `indicator_column`. + weight_collections: A list of collection names to which the Variable will be + added. Note that, variables will also be added to collections + `tf.GraphKeys.GLOBAL_VARIABLES` and `ops.GraphKeys.MODEL_VARIABLES`. + trainable: If `True` also add the variable to the graph collection + `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). + + Returns: + A `Tensor` which represents input layer of a model. Its shape + is (batch_size, first_layer_dimension) and its dtype is `float32`. + first_layer_dimension is determined based on given `feature_columns`. + + Raises: + ValueError: if an item in `feature_columns` is not a `_DenseColumn`. + """ + _check_feature_columns(feature_columns) + for column in feature_columns: + if not isinstance(column, _DenseColumn): + raise ValueError( + 'Items of feature_columns must be a _DenseColumn. ' + 'You can wrap a categorical column with an ' + 'embedding_column or indicator_column. Given: {}'.format(column)) + weight_collections = list(weight_collections or []) + if ops.GraphKeys.GLOBAL_VARIABLES not in weight_collections: + weight_collections.append(ops.GraphKeys.GLOBAL_VARIABLES) + if ops.GraphKeys.MODEL_VARIABLES not in weight_collections: + weight_collections.append(ops.GraphKeys.MODEL_VARIABLES) + with variable_scope.variable_scope( + None, default_name='make_input_layer', values=features.values()): + builder = _LazyBuilder(features) + output_tensors = [] + for column in sorted(feature_columns, key=lambda x: x.name): + with variable_scope.variable_scope(None, default_name=column.name): + tensor = column._get_dense_tensor( # pylint: disable=protected-access + builder, + weight_collections=weight_collections, + trainable=trainable) + num_elements = column._variable_shape.num_elements() # pylint: disable=protected-access + batch_size = array_ops.shape(tensor)[0] + tensor = array_ops.reshape(tensor, shape=(batch_size, num_elements)) + output_tensors.append(tensor) + return array_ops.concat(output_tensors, 1) + + def make_linear_model(features, feature_columns, units=1, @@ -156,10 +232,21 @@ def make_linear_model(features, while `make_input_layer` explicitly requires wrapping each of them with an `embedding_column` or an `indicator_column`. + Example: + + ```python + price = numeric_column('price') + price_buckets = bucketized_column(price, boundaries=[0., 10., 100., 1000.]) + keywords = categorical_column_with_hash_bucket("keywords", 10K) + all_feature_columns = [price_buckets, keywords, ...] + prediction = make_linear_model(features, all_feature_columns) + ``` + Args: - features: A mapping from key to tensors. 'string' key means a base feature. - It can have `_FeatureColumn` as a key too. That means that FeatureColumn - is already transformed by the input pipeline. + features: A mapping from key to tensors. `FeatureColumn`s look up via these + keys. For example `numeric_column('price')` will look at 'price' key in + this dict. Values are `Tensor` or `SparseTensor` depending on + corresponding `FeatureColumn`. feature_columns: An iterable containing all the FeatureColumns. All items should be instances of classes derived from FeatureColumn. units: units: An integer, dimensionality of the output space. Default @@ -191,9 +278,10 @@ def make_linear_model(features, raise ValueError('Items of feature_columns must be either a _DenseColumn ' 'or _CategoricalColumn. Given: {}'.format(column)) weight_collections = list(weight_collections or []) - weight_collections += [ - ops.GraphKeys.GLOBAL_VARIABLES, ops.GraphKeys.MODEL_VARIABLES - ] + if ops.GraphKeys.GLOBAL_VARIABLES not in weight_collections: + weight_collections.append(ops.GraphKeys.GLOBAL_VARIABLES) + if ops.GraphKeys.MODEL_VARIABLES not in weight_collections: + weight_collections.append(ops.GraphKeys.MODEL_VARIABLES) with variable_scope.variable_scope( None, default_name='make_linear_model', values=features.values()): weigthed_sums = [] @@ -228,7 +316,8 @@ def numeric_column(key, normalizer_fn=None): """Represents real valued or numerical features. - An example: + Example: + ```python price = numeric_column('price') all_feature_columns = [price, ...] @@ -298,7 +387,8 @@ def bucketized_column(source_column, boundaries): `boundaries=[0., 1., 2.]` generates buckets `(-inf, 0.)`, `[0., 1.)`, `[1., 2.)`, and `[2., +inf)`. - An example: + Example: + ```python price = numeric_column('price') bucketized_price = bucketized_column(price, boundaries=[...]) @@ -349,7 +439,8 @@ def categorical_column_with_hash_bucket(key, want to distribute your inputs into a finite number of buckets by hashing. output_id = Hash(input_feature_string) % bucket_size - An example: + Example: + ```python keywords = categorical_column_with_hash_bucket("keywords", 10K) all_feature_columns = [keywords, ...] @@ -471,7 +562,7 @@ class _DenseColumn(_FeatureColumn): @abc.abstractproperty def _variable_shape(self): - """Returns shape of variable which is compatible with _get_dense_tensor.""" + """Returns a `TensorShape` of variable compatible with _get_dense_tensor.""" pass @abc.abstractmethod @@ -480,6 +571,7 @@ class _DenseColumn(_FeatureColumn): The output of this function will be used by model-buildier-functions. For example the pseudo code of `make_input_layer` will be like that: + ```python def make_input_layer(features, feature_columns, ...): outputs = [fc._get_dense_tensor(...) for fc in feature_columns] @@ -503,7 +595,7 @@ def _create_dense_column_weighted_sum( builder, weight_collections=weight_collections, trainable=trainable) - num_elements = tensor_shape.TensorShape(column._variable_shape).num_elements() # pylint: disable=protected-access + num_elements = column._variable_shape.num_elements() # pylint: disable=protected-access batch_size = array_ops.shape(tensor)[0] tensor = array_ops.reshape(tensor, shape=(batch_size, num_elements)) weight = variable_scope.get_variable( @@ -615,12 +707,15 @@ class _LazyBuilder(object): """Creates a `_LazyBuilder`. Args: - features: A mapping from feature column to tensors. A `string` key + features: A mapping from feature column to objects that are `Tensor` or + `SparseTensor`, or can be converted to same via + `sparse_tensor.convert_to_tensor_or_sparse_tensor`. A `string` key signifies a base feature (not-transformed). A `FeatureColumn` key means that this `Tensor` is the output of an existing `FeatureColumn` which can be reused. """ - self._columns_to_tensors = features.copy() + self._features = features.copy() + self._feature_tensors = {} def get(self, key): """Returns a `Tensor` for the given key. @@ -640,9 +735,16 @@ class _LazyBuilder(object): ValueError: if key is not found or a transformed `Tensor` cannot be computed. """ - if key in self._columns_to_tensors: - # Feature_column is already transformed or it's a raw feature. - return self._columns_to_tensors[key] + if key in self._feature_tensors: + # FeatureColumn is already transformed or converted. + return self._feature_tensors[key] + + if key in self._features: + # FeatureColumn is a raw feature. + feature_tensor = sparse_tensor_lib.convert_to_tensor_or_sparse_tensor( + self._features[key]) + self._feature_tensors[key] = feature_tensor + return feature_tensor if not isinstance(key, (str, _FeatureColumn)): raise TypeError('"key" must be either a "str" or "_FeatureColumn". ' @@ -653,11 +755,13 @@ class _LazyBuilder(object): column = key logging.debug('Transforming feature_column %s.', column) - transformed = column._transform_feature(self) # pylint: disable=protected-access + # pylint: disable=protected-access + transformed = column._transform_feature(self) + # pylint: enable=protected-access if transformed is None: raise ValueError('Column {} is not supported.'.format(column.name)) - self._columns_to_tensors[column] = transformed - return self._columns_to_tensors[column] + self._feature_tensors[column] = transformed + return transformed def _check_feature_columns(feature_columns): @@ -709,7 +813,7 @@ class _NumericColumn(_DenseColumn, @property def _variable_shape(self): - return self.shape + return tensor_shape.TensorShape(self.shape) def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None): del weight_collections @@ -738,7 +842,8 @@ class _BucketizedColumn(_DenseColumn, _CategoricalColumn, @property def _variable_shape(self): - return tuple(self.source_column.shape) + (len(self.boundaries) + 1,) + return tensor_shape.TensorShape( + tuple(self.source_column.shape) + (len(self.boundaries) + 1,)) def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None): del weight_collections diff --git a/tensorflow/python/feature_column/feature_column_test.py b/tensorflow/python/feature_column/feature_column_test.py index 32d6a4e8f0a..bc626533104 100644 --- a/tensorflow/python/feature_column/feature_column_test.py +++ b/tensorflow/python/feature_column/feature_column_test.py @@ -65,7 +65,7 @@ class LazyColumnTest(test.TestCase): def _parse_example_config(self): pass - builder = fc._LazyBuilder(features={'a': constant_op.constant([[2], [3.]])}) + builder = fc._LazyBuilder(features={'a': [[2], [3.]]}) column = TransformCounter() self.assertEqual(0, column.num_transform) builder.get(column) @@ -88,7 +88,7 @@ class LazyColumnTest(test.TestCase): def _parse_example_config(self): pass - builder = fc._LazyBuilder(features={'a': constant_op.constant([[2], [3.]])}) + builder = fc._LazyBuilder(features={'a': [[2], [3.]]}) column = Transformer() self.assertEqual('Output', builder.get(column)) self.assertEqual('Output', builder.get(column)) @@ -108,13 +108,13 @@ class LazyColumnTest(test.TestCase): def _parse_example_config(self): pass - features = {'a': constant_op.constant([[2], [3.]])} + features = {'a': [[2], [3.]]} builder = fc._LazyBuilder(features=features) builder.get(Transformer()) self.assertEqual(['a'], list(features.keys())) def test_error_if_feature_is_not_found(self): - builder = fc._LazyBuilder(features={'a': constant_op.constant([[2], [3.]])}) + builder = fc._LazyBuilder(features={'a': [[2], [3.]]}) with self.assertRaisesRegexp(ValueError, 'bbb is not in features dictionary'): builder.get('bbb') @@ -135,7 +135,7 @@ class LazyColumnTest(test.TestCase): def _parse_example_config(self): pass - builder = fc._LazyBuilder(features={'a': constant_op.constant([[2], [3.]])}) + builder = fc._LazyBuilder(features={'a': [[2], [3.]]}) with self.assertRaisesRegexp(ValueError, 'NotAProperColumn is not supported'): builder.get(NotAProperColumn()) @@ -145,7 +145,7 @@ class LazyColumnTest(test.TestCase): class NotAFeatureColumn(object): pass - builder = fc._LazyBuilder(features={'a': constant_op.constant([[2], [3.]])}) + builder = fc._LazyBuilder(features={'a': [[2], [3.]]}) with self.assertRaisesRegexp( TypeError, '"key" must be either a "str" or "_FeatureColumn".'): builder.get(NotAFeatureColumn()) @@ -273,7 +273,7 @@ class NumericColumnTest(test.TestCase): price = fc.numeric_column('price', shape=[2], normalizer_fn=_increment_two) builder = fc._LazyBuilder({ - 'price': constant_op.constant([[1., 2.], [5., 6.]]) + 'price': [[1., 2.], [5., 6.]] }) output = builder.get(price) with self.test_session(): @@ -286,7 +286,7 @@ class NumericColumnTest(test.TestCase): price = fc.numeric_column('price', shape=[2], normalizer_fn=_increment_two) builder = fc._LazyBuilder({ - 'price': constant_op.constant([[1., 2.], [5., 6.]]) + 'price': [[1., 2.], [5., 6.]] }) self.assertEqual(builder.get(price), price._get_dense_tensor(builder)) @@ -315,7 +315,7 @@ class NumericColumnTest(test.TestCase): def test_make_linear_model(self): price = fc.numeric_column('price') with ops.Graph().as_default(): - features = {'price': constant_op.constant([[1.], [5.]])} + features = {'price': [[1.], [5.]]} predictions = fc.make_linear_model(features, [price]) bias = get_linear_model_bias() price_var = get_linear_model_column_var(price) @@ -402,7 +402,7 @@ class BucketizedColumnTest(test.TestCase): bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6]) with ops.Graph().as_default(): builder = fc._LazyBuilder({ - 'price': constant_op.constant([[-1., 1.], [5., 6.]]) + 'price': [[-1., 1.], [5., 6.]] }) transformed_tensor = builder.get(bucketized_price) with _initialized_session(): @@ -414,7 +414,7 @@ class BucketizedColumnTest(test.TestCase): bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6]) with ops.Graph().as_default(): builder = fc._LazyBuilder({ - 'price': constant_op.constant([[-1.], [1.], [5.], [6.]]) + 'price': [[-1.], [1.], [5.], [6.]] }) with _initialized_session(): bucketized_price_tensor = bucketized_price._get_dense_tensor(builder) @@ -432,7 +432,7 @@ class BucketizedColumnTest(test.TestCase): bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6]) with ops.Graph().as_default(): builder = fc._LazyBuilder({ - 'price': constant_op.constant([[-1., 1.], [5., 6.]]) + 'price': [[-1., 1.], [5., 6.]] }) with _initialized_session(): bucketized_price_tensor = bucketized_price._get_dense_tensor(builder) @@ -448,7 +448,7 @@ class BucketizedColumnTest(test.TestCase): bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6]) with ops.Graph().as_default(): builder = fc._LazyBuilder({ - 'price': constant_op.constant([[-1.], [1.], [5.], [6.]]) + 'price': [[-1.], [1.], [5.], [6.]] }) with _initialized_session() as sess: id_weight_pair = bucketized_price._get_sparse_tensors(builder) @@ -465,7 +465,7 @@ class BucketizedColumnTest(test.TestCase): bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6]) with ops.Graph().as_default(): builder = fc._LazyBuilder({ - 'price': constant_op.constant([[-1., 1.], [5., 6.]]) + 'price': [[-1., 1.], [5., 6.]] }) with _initialized_session() as sess: id_weight_pair = bucketized_price._get_sparse_tensors(builder) @@ -502,7 +502,7 @@ class BucketizedColumnTest(test.TestCase): price = fc.numeric_column('price', shape=[1]) bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6]) with ops.Graph().as_default(): - features = {'price': constant_op.constant([[-1.], [1.], [5.], [6.]])} + features = {'price': [[-1.], [1.], [5.], [6.]]} predictions = fc.make_linear_model(features, [bucketized_price]) bias = get_linear_model_bias() bucketized_price_var = get_linear_model_column_var(bucketized_price) @@ -527,7 +527,7 @@ class BucketizedColumnTest(test.TestCase): price = fc.numeric_column('price', shape=[2]) bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6]) with ops.Graph().as_default(): - features = {'price': constant_op.constant([[-1., 1.], [5., 6.]])} + features = {'price': [[-1., 1.], [5., 6.]]} predictions = fc.make_linear_model(features, [bucketized_price]) bias = get_linear_model_bias() bucketized_price_var = get_linear_model_column_var(bucketized_price) @@ -621,15 +621,15 @@ class SparseColumnHashedTest(test.TestCase): float_fc = fc.categorical_column_with_hash_bucket( 'a_float', 10, dtype=dtypes.string) int_tensor = sparse_tensor.SparseTensor( - values=constant_op.constant([101]), + values=[101], indices=[[0, 0]], dense_shape=[1, 1]) string_tensor = sparse_tensor.SparseTensor( - values=constant_op.constant(['101']), + values=['101'], indices=[[0, 0]], dense_shape=[1, 1]) float_tensor = sparse_tensor.SparseTensor( - values=constant_op.constant([101.]), + values=[101.], indices=[[0, 0]], dense_shape=[1, 1]) builder = fc._LazyBuilder({ @@ -745,7 +745,7 @@ class MakeLinearModelTest(test.TestCase): def test_dense_bias(self): price = fc.numeric_column('price') with ops.Graph().as_default(): - features = {'price': constant_op.constant([[1.], [5.]])} + features = {'price': [[1.], [5.]]} predictions = fc.make_linear_model(features, [price]) bias = get_linear_model_bias() price_var = get_linear_model_column_var(price) @@ -848,7 +848,7 @@ class MakeLinearModelTest(test.TestCase): def test_dense_multi_output(self): price = fc.numeric_column('price') with ops.Graph().as_default(): - features = {'price': constant_op.constant([[1.], [5.]])} + features = {'price': [[1.], [5.]]} predictions = fc.make_linear_model(features, [price], units=3) bias = get_linear_model_bias() price_var = get_linear_model_column_var(price) @@ -885,7 +885,7 @@ class MakeLinearModelTest(test.TestCase): def test_dense_multi_dimension(self): price = fc.numeric_column('price', shape=2) with ops.Graph().as_default(): - features = {'price': constant_op.constant([[1., 2.], [5., 6.]])} + features = {'price': [[1., 2.], [5., 6.]]} predictions = fc.make_linear_model(features, [price]) price_var = get_linear_model_column_var(price) with _initialized_session() as sess: @@ -913,7 +913,7 @@ class MakeLinearModelTest(test.TestCase): def test_dense_multi_dimension_multi_output(self): price = fc.numeric_column('price', shape=2) with ops.Graph().as_default(): - features = {'price': constant_op.constant([[1., 2.], [5., 6.]])} + features = {'price': [[1., 2.], [5., 6.]]} predictions = fc.make_linear_model(features, [price], units=3) bias = get_linear_model_bias() price_var = get_linear_model_column_var(price) @@ -928,7 +928,7 @@ class MakeLinearModelTest(test.TestCase): def test_raises_if_shape_mismatch(self): price = fc.numeric_column('price', shape=2) with ops.Graph().as_default(): - features = {'price': constant_op.constant([[1.], [5.]])} + features = {'price': [[1.], [5.]]} predictions = fc.make_linear_model(features, [price]) with _initialized_session(): with self.assertRaisesRegexp(Exception, 'requested shape has 4'): @@ -937,7 +937,7 @@ class MakeLinearModelTest(test.TestCase): def test_dense_reshaping(self): price = fc.numeric_column('price', shape=[1, 2]) with ops.Graph().as_default(): - features = {'price': constant_op.constant([[[1., 2.]], [[5., 6.]]])} + features = {'price': [[[1., 2.]], [[5., 6.]]]} predictions = fc.make_linear_model(features, [price]) bias = get_linear_model_bias() price_var = get_linear_model_column_var(price) @@ -953,8 +953,8 @@ class MakeLinearModelTest(test.TestCase): price2 = fc.numeric_column('price2') with ops.Graph().as_default(): features = { - 'price1': constant_op.constant([[1., 2.], [5., 6.]]), - 'price2': constant_op.constant([[3.], [4.]]) + 'price1': [[1., 2.], [5., 6.]], + 'price2': [[3.], [4.]] } predictions = fc.make_linear_model(features, [price1, price2]) bias = get_linear_model_bias() @@ -973,7 +973,7 @@ class MakeLinearModelTest(test.TestCase): def test_dense_collection(self): price = fc.numeric_column('price') with ops.Graph().as_default() as g: - features = {'price': constant_op.constant([[1.], [5.]])} + features = {'price': [[1.], [5.]]} fc.make_linear_model(features, [price], weight_collections=['my-vars']) my_vars = g.get_collection('my-vars') bias = get_linear_model_bias() @@ -998,7 +998,7 @@ class MakeLinearModelTest(test.TestCase): def test_dense_trainable_default(self): price = fc.numeric_column('price') with ops.Graph().as_default() as g: - features = {'price': constant_op.constant([[1.], [5.]])} + features = {'price': [[1.], [5.]]} fc.make_linear_model(features, [price]) bias = get_linear_model_bias() price_var = get_linear_model_column_var(price) @@ -1022,7 +1022,7 @@ class MakeLinearModelTest(test.TestCase): def test_dense_trainable_false(self): price = fc.numeric_column('price') with ops.Graph().as_default() as g: - features = {'price': constant_op.constant([[1.], [5.]])} + features = {'price': [[1.], [5.]]} fc.make_linear_model(features, [price], trainable=False) trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) self.assertEqual([], trainable_vars) @@ -1074,5 +1074,89 @@ class MakeLinearModelTest(test.TestCase): self.assertIn('wire_cast', my_vars[2].name) +class MakeInputLayerTest(test.TestCase): + + def test_should_be_dense_column(self): + with self.assertRaisesRegexp(ValueError, 'must be a _DenseColumn'): + fc.make_input_layer( + features={'a': [[0]]}, + feature_columns=[ + fc.categorical_column_with_hash_bucket('wire_cast', 4) + ]) + + def test_does_not_support_dict_columns(self): + with self.assertRaisesRegexp( + ValueError, 'Expected feature_columns to be iterable, found dict.'): + fc.make_input_layer( + features={'a': [[0]]}, feature_columns={'a': fc.numeric_column('a')}) + + def test_raises_if_duplicate_name(self): + with self.assertRaisesRegexp( + ValueError, 'Duplicate feature column name found for columns'): + fc.make_input_layer( + features={'a': [[0]]}, + feature_columns=[fc.numeric_column('a'), + fc.numeric_column('a')]) + + def test_one_column(self): + price = fc.numeric_column('price') + with ops.Graph().as_default(): + features = {'price': [[1.], [5.]]} + net = fc.make_input_layer(features, [price]) + with _initialized_session(): + self.assertAllClose([[1.], [5.]], net.eval()) + + def test_multi_dimension(self): + price = fc.numeric_column('price', shape=2) + with ops.Graph().as_default(): + features = {'price': [[1., 2.], [5., 6.]]} + net = fc.make_input_layer(features, [price]) + with _initialized_session(): + self.assertAllClose([[1., 2.], [5., 6.]], net.eval()) + + def test_raises_if_shape_mismatch(self): + price = fc.numeric_column('price', shape=2) + with ops.Graph().as_default(): + features = {'price': [[1.], [5.]]} + net = fc.make_input_layer(features, [price]) + with _initialized_session(): + with self.assertRaisesRegexp(Exception, 'requested shape has 4'): + net.eval() + + def test_reshaping(self): + price = fc.numeric_column('price', shape=[1, 2]) + with ops.Graph().as_default(): + features = {'price': [[[1., 2.]], [[5., 6.]]]} + net = fc.make_input_layer(features, [price]) + with _initialized_session(): + self.assertAllClose([[1., 2.], [5., 6.]], net.eval()) + + def test_multi_column(self): + price1 = fc.numeric_column('price1', shape=2) + price2 = fc.numeric_column('price2') + with ops.Graph().as_default(): + features = { + 'price1': [[1., 2.], [5., 6.]], + 'price2': [[3.], [4.]] + } + net = fc.make_input_layer(features, [price1, price2]) + with _initialized_session(): + self.assertAllClose([[1., 2., 3.], [5., 6., 4.]], net.eval()) + + def test_column_order(self): + price_a = fc.numeric_column('price_a') + price_b = fc.numeric_column('price_b') + with ops.Graph().as_default(): + features = { + 'price_a': [[1.]], + 'price_b': [[3.]], + } + net1 = fc.make_input_layer(features, [price_a, price_b]) + net2 = fc.make_input_layer(features, [price_b, price_a]) + with _initialized_session(): + self.assertAllClose([[1., 3.]], net1.eval()) + self.assertAllClose([[1., 3.]], net2.eval()) + + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py index 69d1a6f60e1..245dcc96db7 100644 --- a/tensorflow/python/kernel_tests/variable_scope_test.py +++ b/tensorflow/python/kernel_tests/variable_scope_test.py @@ -774,6 +774,11 @@ class VariableScopeTest(test.TestCase): self.assertEqual([v.name for v in scope.global_variables()], ["foo/b:0"]) + def testGetVariableWithRefDtype(self): + v = variable_scope.get_variable("v", shape=[3, 4], dtype=dtypes.float32) + # Ensure it is possible to do get_variable with a _ref dtype passed in. + _ = variable_scope.get_variable("w", shape=[5, 6], dtype=v.dtype) + def axis0_into1_partitioner(shape=None, **unused_kwargs): part = [1] * len(shape) diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index 76719f35b80..43addbe5a52 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -280,6 +280,17 @@ class _VariableStore(object): raise ValueError( "Passed a custom_getter which is not callable: %s" % custom_getter) + # If a *_ref type is passed in an error would be triggered further down the + # stack. We prevent this using base_dtype to get a non-ref version of the + # type, before doing anything else. When _ref types are removed in favour of + # resources, this line can be removed. + try: + dtype = dtype.base_dtype + except AttributeError: + # .base_dtype not existing means that we will try and use the raw dtype + # which was passed in - this might be a NumPy type which is valid. + pass + # This is the main logic of get_variable. However, custom_getter # may override this logic. So we save it as a callable and pass # it to custom_getter. diff --git a/tensorflow/python/training/supervisor.py b/tensorflow/python/training/supervisor.py index 93e64b4ab0b..277c11386dd 100644 --- a/tensorflow/python/training/supervisor.py +++ b/tensorflow/python/training/supervisor.py @@ -994,7 +994,7 @@ class SVSummaryThread(coordinator.LooperThread): summary_strs = self._sess.run(self._sv.summary_op) global_step = None if self._sv.summary_writer: - logging.info("Recording summary at step %d.", global_step) + logging.info("Recording summary at step %s.", global_step) self._sv.summary_writer.add_summary(summary_strs, global_step) diff --git a/tensorflow/stream_executor/cuda/cuda_driver.cc b/tensorflow/stream_executor/cuda/cuda_driver.cc index e441321fc86..9b8e23babd6 100644 --- a/tensorflow/stream_executor/cuda/cuda_driver.cc +++ b/tensorflow/stream_executor/cuda/cuda_driver.cc @@ -227,7 +227,7 @@ string ToString(CUresult result) { // created by StreamExecutor (to ensure that the CUDA runtime didn't create a // context behind our backs). CUcontext CurrentContext() { - CUcontext current = CUDADriver::CurrentContextOrDie(); + CUcontext current = CUDADriver::CurrentContextOrDie(); if (current != nullptr && !CreatedContexts::Has(current)) { LOG(FATAL) << "current context was not created by the StreamExecutor " "cuda_driver API: " @@ -480,27 +480,56 @@ bool DeviceOptionsToContextFlags(DeviceOptions device_options, int *flags) { CUdevice device, DeviceOptions device_options, CudaContext** context) { *context = nullptr; - CUcontext former_context = CurrentContext(); - if (former_context != nullptr) { - LOG(WARNING) << "creating context when one is currently active; existing: " - << former_context; - } - int flags = 0; if (!DeviceOptionsToContextFlags(device_options, &flags)) { LOG(WARNING) << "could not convert all device options into context flags"; } CUresult res; + CUcontext former_context; CUcontext new_context; { // TODO(leary) Need to see if NVIDIA can expunge the leakiness in their // context creation: see http://b/13248943 #if CUDA_VERSION >= 7000 - res = cuDevicePrimaryCtxSetFlags(device, flags); + { + unsigned int former_primary_context_flags; + int former_primary_context_is_active; + CHECK_EQ(CUDA_SUCCESS, + cuDevicePrimaryCtxGetState(device, &former_primary_context_flags, + &former_primary_context_is_active)); + if (former_primary_context_flags != flags) { + if (former_primary_context_is_active) { + LOG(ERROR) + << "The primary context is active and has a different flag set (" + << former_primary_context_flags << ") than the desired flag set (" + << flags << ")."; + } else { + CHECK_EQ(CUDA_SUCCESS, cuDevicePrimaryCtxSetFlags(device, flags)); + } + } + } + + former_context = CUDADriver::CurrentContextOrDie(); res = cuDevicePrimaryCtxRetain(&new_context, device); + if (former_context != nullptr) { + if (former_context == new_context) { + VLOG(2) << "The primary context " << former_context + << " exists before initializing the StreamExecutor."; + } else { + LOG(WARNING) << "A non-primary context " << former_context + << " exists before initializing the StreamExecutor. We " + "haven't verified StreamExecutor works with that."; + } + } #else + former_context = CurrentContext(); + if (former_context != nullptr) { + LOG(WARNING) + << "creating context when one is currently active; existing: " + << former_context; + } res = cuCtxCreate(&new_context, flags, device); #endif }