Internal change.
Change: 155009390
This commit is contained in:
parent
7828637e07
commit
965d620104
@ -388,6 +388,16 @@ tf_gen_op_wrappers_cc(
|
||||
visibility = ["//tensorflow:internal"],
|
||||
)
|
||||
|
||||
tf_gen_op_wrappers_cc(
|
||||
name = "functional_ops",
|
||||
include_internal_ops = 1,
|
||||
op_lib_names = [
|
||||
"functional_ops",
|
||||
],
|
||||
pkg = "//tensorflow/core",
|
||||
visibility = ["//tensorflow:internal"],
|
||||
)
|
||||
|
||||
tf_gen_op_wrappers_cc(
|
||||
name = "resource_variable_ops",
|
||||
include_internal_ops = 1,
|
||||
|
@ -422,7 +422,7 @@ class ReferenceUtil {
|
||||
static std::unique_ptr<Array2D<T1>> ApplyElementwise2D(
|
||||
F&& f, const Array2D<T1>& array1, const Array2D<Ts>&... arrays) {
|
||||
AssertSameSize2D(array1, arrays...);
|
||||
auto result = MakeUnique<Array2D<T1>>(array1.n1(), array1.n1());
|
||||
auto result = MakeUnique<Array2D<T1>>(array1.n1(), array1.n2());
|
||||
for (int64 i = 0; i < array1.n1(); ++i) {
|
||||
for (int64 j = 0; j < array1.n2(); ++j) {
|
||||
(*result)(i, j) = f(array1(i, j), arrays(i, j)...);
|
||||
|
@ -24,6 +24,11 @@ bool CpuInstructionFusion::ShouldFuse(HloInstruction* consumer,
|
||||
int64 operand_index) {
|
||||
HloInstruction* producer = consumer->mutable_operand(operand_index);
|
||||
|
||||
// Output fusion is not currently supported on CPUs.
|
||||
if (producer->opcode() == HloOpcode::kFusion) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Condition for consumer: must be elementwise or a fusion op
|
||||
// (which necessarily only contains elementwise operations)
|
||||
if (!(consumer->opcode() == HloOpcode::kFusion ||
|
||||
|
@ -46,6 +46,11 @@ bool GpuInstructionFusion::ShouldFuse(HloInstruction* consumer,
|
||||
int64 operand_index) {
|
||||
HloInstruction* producer = consumer->mutable_operand(operand_index);
|
||||
|
||||
// Output fusion is not currently supported on GPUs.
|
||||
if (producer->opcode() == HloOpcode::kFusion) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// RNG operations are not currently parallel-friendly on GPU.
|
||||
if (producer->opcode() == HloOpcode::kRng) {
|
||||
return false;
|
||||
|
@ -1570,7 +1570,9 @@ string HloInstruction::ToCategory() const {
|
||||
return "non-elementwise fusion";
|
||||
}
|
||||
case FusionKind::kInput:
|
||||
return "reduce fusion";
|
||||
return "input fusion";
|
||||
case FusionKind::kOutput:
|
||||
return "output fusion";
|
||||
case FusionKind::kTransposeDot:
|
||||
return "dot fusion";
|
||||
case FusionKind::kConvBackwardFilter:
|
||||
@ -1618,7 +1620,6 @@ bool HloInstruction::IsFusable() const {
|
||||
|
||||
// Some kinds of instructions don't make sense to fuse.
|
||||
switch (opcode_) {
|
||||
case HloOpcode::kFusion:
|
||||
case HloOpcode::kInfeed:
|
||||
case HloOpcode::kOutfeed:
|
||||
case HloOpcode::kParameter:
|
||||
@ -2186,6 +2187,8 @@ string ToString(HloInstruction::FusionKind kind) {
|
||||
return "kLoop";
|
||||
case HloInstruction::FusionKind::kInput:
|
||||
return "kInput";
|
||||
case HloInstruction::FusionKind::kOutput:
|
||||
return "kOutput";
|
||||
case HloInstruction::FusionKind::kTransposeDot:
|
||||
return "kTransposeDot";
|
||||
case HloInstruction::FusionKind::kConvBackwardFilter:
|
||||
|
@ -54,7 +54,8 @@ class HloInstruction {
|
||||
public:
|
||||
enum class FusionKind {
|
||||
kLoop, // Fused into a loop.
|
||||
kInput, // Fused into a reduction kernel.
|
||||
kInput, // Op's input is fused into the op itself.
|
||||
kOutput, // Op's output is fused into the op itself.
|
||||
kTransposeDot, // Fused into a dot with transposed operands.
|
||||
kConvBackwardFilter, // Fused into a backward filter convolution.
|
||||
kConvBackwardInput, // Fused into a backward input convolution.
|
||||
|
@ -379,7 +379,12 @@ def multi_label_head(n_classes,
|
||||
loss_fn=None):
|
||||
"""Creates a Head for multi label classification.
|
||||
|
||||
The Head uses sigmoid cross entropy loss.
|
||||
Multi-label classification handles the case where each example may have zero
|
||||
or more associated labels, from a discrete set. This is distinct from
|
||||
`multi_class_head` which has exactly one label from a discrete set.
|
||||
|
||||
This head by default uses sigmoid cross entropy loss, which expects as input
|
||||
a multi-hot tensor of shape `(batch_size, num_classes)`.
|
||||
|
||||
Args:
|
||||
n_classes: Integer, number of classes, must be >= 2
|
||||
|
@ -2417,6 +2417,9 @@ tf_cc_test(
|
||||
":test_main",
|
||||
":testlib",
|
||||
"//tensorflow/cc:cc_ops",
|
||||
"//tensorflow/cc:cc_ops_internal",
|
||||
"//tensorflow/cc:function_ops",
|
||||
"//tensorflow/cc:functional_ops",
|
||||
"//tensorflow/core/kernels:cast_op",
|
||||
"//tensorflow/core/kernels:cwise_op",
|
||||
"//tensorflow/core/kernels:function_ops",
|
||||
|
@ -1001,25 +1001,19 @@ string NewName(const Node* n, bool pretty) {
|
||||
void ToGraphDef(const Graph* g, GraphDef* gdef, bool pretty) {
|
||||
// We visit nodes in forward topological sort order, which is a
|
||||
// possible execution order of the graph.
|
||||
std::vector<size_t> pending(g->num_node_ids());
|
||||
std::deque<const Node*> ready;
|
||||
for (const Node* n : g->nodes()) {
|
||||
pending[n->id()] = n->in_edges().size();
|
||||
if (pending[n->id()] == 0) ready.push_back(n);
|
||||
}
|
||||
gtl::InlinedVector<const Edge*, 4> inputs;
|
||||
gdef->Clear();
|
||||
gdef->mutable_versions()->CopyFrom(g->versions());
|
||||
while (!ready.empty()) {
|
||||
const Node* n = ready.front();
|
||||
ready.pop_front();
|
||||
for (const Edge* e : n->out_edges()) {
|
||||
const Node* next = e->dst();
|
||||
if (--pending[next->id()] == 0) {
|
||||
ready.push_back(next);
|
||||
}
|
||||
|
||||
std::vector<Node*> start_nodes;
|
||||
for (Node* n : g->nodes()) {
|
||||
if (n->out_edges().empty()) {
|
||||
start_nodes.push_back(n);
|
||||
}
|
||||
if (!n->IsOp()) continue;
|
||||
}
|
||||
|
||||
ReverseDFSFrom(*g, start_nodes, nullptr, [gdef, pretty, &inputs](Node* n) {
|
||||
if (!n->IsOp()) return;
|
||||
NodeDef* ndef = gdef->add_node();
|
||||
ndef->set_name(NewName(n, pretty));
|
||||
ndef->set_op(n->type_string());
|
||||
@ -1054,7 +1048,7 @@ void ToGraphDef(const Graph* g, GraphDef* gdef, bool pretty) {
|
||||
ndef->add_input(strings::StrCat(srcname, ":", e->src_output()));
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
string DebugString(const Graph* g) {
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -400,16 +400,33 @@ void SetAttrValue(gtl::ArraySlice<NameAttrList> value, AttrValue* out) {
|
||||
}
|
||||
}
|
||||
|
||||
// Wrapper around protocol buffer serialization that requests deterministic
|
||||
// serialization, in particular for Map fields, which serialize in a random
|
||||
// order by default. Returns true on success.
|
||||
template <typename T>
|
||||
static bool DeterministicSerialization(const T& t, string* result) {
|
||||
const int size = t.ByteSize();
|
||||
*result = string(size, '\0');
|
||||
::tensorflow::protobuf::io::ArrayOutputStream array_stream(&(*result)[0],
|
||||
size);
|
||||
::tensorflow::protobuf::io::CodedOutputStream output_stream(&array_stream);
|
||||
output_stream.SetSerializationDeterministic(true);
|
||||
t.SerializeWithCachedSizes(&output_stream);
|
||||
return !output_stream.HadError() && size == output_stream.ByteCount();
|
||||
}
|
||||
|
||||
bool AreAttrValuesEqual(const AttrValue& a, const AttrValue& b) {
|
||||
string a_str, b_str;
|
||||
a.SerializeToString(&a_str);
|
||||
b.SerializeToString(&b_str);
|
||||
DeterministicSerialization(a, &a_str);
|
||||
DeterministicSerialization(b, &b_str);
|
||||
// Note: it should be safe to compare proto serializations of the attr
|
||||
// values since at most one field should be set in each (indeed, it
|
||||
// must be the same field if they are to compare equal).
|
||||
// Exception: there are multiple equivalent representations of
|
||||
// TensorProtos. So a return value of true implies a == b, but not the
|
||||
// converse.
|
||||
// TODO(phawkins): this is incorrect for NameAttrList attributes that may
|
||||
// contain nested AttrValue maps.
|
||||
return a_str == b_str;
|
||||
}
|
||||
|
||||
|
@ -23,8 +23,8 @@ limitations under the License.
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
void DFS(const Graph& g, std::function<void(Node*)> enter,
|
||||
std::function<void(Node*)> leave) {
|
||||
void DFS(const Graph& g, const std::function<void(Node*)>& enter,
|
||||
const std::function<void(Node*)>& leave) {
|
||||
// Stack of work to do.
|
||||
struct Work {
|
||||
Node* node;
|
||||
@ -61,15 +61,23 @@ void DFS(const Graph& g, std::function<void(Node*)> enter,
|
||||
}
|
||||
}
|
||||
|
||||
void ReverseDFS(const Graph& g, std::function<void(Node*)> enter,
|
||||
std::function<void(Node*)> leave) {
|
||||
void ReverseDFS(const Graph& g, const std::function<void(Node*)>& enter,
|
||||
const std::function<void(Node*)>& leave) {
|
||||
ReverseDFSFrom(g, {g.sink_node()}, enter, leave);
|
||||
}
|
||||
|
||||
void ReverseDFSFrom(const Graph& g, gtl::ArraySlice<Node*> start,
|
||||
const std::function<void(Node*)>& enter,
|
||||
const std::function<void(Node*)>& leave) {
|
||||
// Stack of work to do.
|
||||
struct Work {
|
||||
Node* node;
|
||||
bool leave; // Are we entering or leaving n?
|
||||
};
|
||||
std::vector<Work> stack;
|
||||
stack.push_back(Work{g.sink_node(), false});
|
||||
std::vector<Work> stack(start.size());
|
||||
for (int i = 0; i < start.size(); ++i) {
|
||||
stack[i] = Work{start[i], false};
|
||||
}
|
||||
|
||||
std::vector<bool> visited(g.num_node_ids(), false);
|
||||
while (!stack.empty()) {
|
||||
|
@ -21,20 +21,28 @@ limitations under the License.
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/core/graph/graph.h"
|
||||
#include "tensorflow/core/lib/gtl/array_slice.h"
|
||||
|
||||
namespace tensorflow {
|
||||
|
||||
// Perform a depth-first-search on g starting at the source node.
|
||||
// If enter is not empty, calls enter(n) before visiting any children of n.
|
||||
// If leave is not empty, calls leave(n) after visiting all children of n.
|
||||
extern void DFS(const Graph& g, std::function<void(Node*)> enter,
|
||||
std::function<void(Node*)> leave);
|
||||
extern void DFS(const Graph& g, const std::function<void(Node*)>& enter,
|
||||
const std::function<void(Node*)>& leave);
|
||||
|
||||
// Perform a reverse depth-first-search on g starting at the sink node.
|
||||
// If enter is not empty, calls enter(n) before visiting any parents of n.
|
||||
// If leave is not empty, calls leave(n) after visiting all parents of n.
|
||||
extern void ReverseDFS(const Graph& g, std::function<void(Node*)> enter,
|
||||
std::function<void(Node*)> leave);
|
||||
extern void ReverseDFS(const Graph& g, const std::function<void(Node*)>& enter,
|
||||
const std::function<void(Node*)>& leave);
|
||||
|
||||
// Perform a reverse depth-first-search on g starting at the 'start' nodes.
|
||||
// If enter is not empty, calls enter(n) before visiting any parents of n.
|
||||
// If leave is not empty, calls leave(n) after visiting all parents of n.
|
||||
extern void ReverseDFSFrom(const Graph& g, gtl::ArraySlice<Node*> start,
|
||||
const std::function<void(Node*)>& enter,
|
||||
const std::function<void(Node*)>& leave);
|
||||
|
||||
// Stores in *order the post-order numbering of all nodes
|
||||
// in graph found via a depth first search starting at the source node.
|
||||
|
@ -90,6 +90,23 @@ cc_test(
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "robust_stats",
|
||||
srcs = ["robust_stats.cc"],
|
||||
hdrs = ["robust_stats.h"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
||||
|
||||
cc_test(
|
||||
name = "robust_stats_test",
|
||||
srcs = ["robust_stats_test.cc"],
|
||||
deps = [
|
||||
":robust_stats",
|
||||
"//tensorflow/core:test",
|
||||
"//tensorflow/core:test_main",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "utils",
|
||||
srcs = ["utils.cc"],
|
||||
@ -116,3 +133,37 @@ cc_library(
|
||||
"//tensorflow/core:lib",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "virtual_scheduler",
|
||||
srcs = ["virtual_scheduler.cc"],
|
||||
hdrs = ["virtual_scheduler.h"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core/grappler:grappler_item",
|
||||
"//tensorflow/core/grappler:utils",
|
||||
"//tensorflow/core/grappler/costs:cost_estimator",
|
||||
],
|
||||
)
|
||||
|
||||
cc_library(
|
||||
name = "measuring_cost_estimator",
|
||||
srcs = ["measuring_cost_estimator.cc"],
|
||||
hdrs = ["measuring_cost_estimator.h"],
|
||||
visibility = ["//visibility:public"],
|
||||
deps = [
|
||||
":robust_stats",
|
||||
"//tensorflow/core:core_cpu",
|
||||
"//tensorflow/core:framework",
|
||||
"//tensorflow/core:lib",
|
||||
"//tensorflow/core:lib_internal",
|
||||
"//tensorflow/core:lib_proto_parsing",
|
||||
"//tensorflow/core:protos_all_cc",
|
||||
"//tensorflow/core/grappler:grappler_item",
|
||||
"//tensorflow/core/grappler:grappler_item_builder",
|
||||
"//tensorflow/core/grappler/clusters:cluster",
|
||||
"//tensorflow/core/grappler/costs:cost_estimator",
|
||||
"//tensorflow/core/kernels:ops_util",
|
||||
],
|
||||
)
|
||||
|
133
tensorflow/core/grappler/costs/measuring_cost_estimator.cc
Normal file
133
tensorflow/core/grappler/costs/measuring_cost_estimator.cc
Normal file
@ -0,0 +1,133 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/grappler/costs/measuring_cost_estimator.h"
|
||||
|
||||
#include <limits>
|
||||
|
||||
#include "tensorflow/core/grappler/clusters/cluster.h"
|
||||
#include "tensorflow/core/grappler/costs/robust_stats.h"
|
||||
#include "tensorflow/core/grappler/grappler_item.h"
|
||||
#include "tensorflow/core/kernels/ops_util.h"
|
||||
#include "tensorflow/core/lib/core/blocking_counter.h"
|
||||
#include "tensorflow/core/platform/env.h"
|
||||
#include "tensorflow/core/public/session.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace grappler {
|
||||
|
||||
MeasuringCostEstimator::MeasuringCostEstimator(Cluster* cluster,
|
||||
int measurement_steps,
|
||||
int measurement_threads)
|
||||
: measurement_steps_(measurement_steps),
|
||||
measurement_threads_(measurement_threads) {
|
||||
CHECK_GE(measurement_steps, 1);
|
||||
if (measurement_threads > 0) {
|
||||
thread_pool_.reset(new thread::ThreadPool(
|
||||
Env::Default(), SanitizeThreadSuffix("measurements"),
|
||||
measurement_threads));
|
||||
}
|
||||
cluster_ = cluster;
|
||||
}
|
||||
|
||||
Status MeasuringCostEstimator::Initialize(const GrapplerItem& item) {
|
||||
feed_ = item.feed;
|
||||
fetch_ = item.fetch;
|
||||
return cluster_->Initialize(item);
|
||||
}
|
||||
|
||||
Status MeasuringCostEstimator::PredictCosts(const GraphDef& optimized_graph,
|
||||
CostGraphDef* cost_graph,
|
||||
Costs* costs) const {
|
||||
std::vector<double> times(measurement_steps_);
|
||||
BlockingCounter barrier(measurement_steps_);
|
||||
|
||||
mutex status_mu;
|
||||
Status status;
|
||||
|
||||
auto measurement_fn = [&](const int step) {
|
||||
const Costs::MicroSeconds start = Env::Default()->NowMicros();
|
||||
|
||||
RunMetadata metadata;
|
||||
const Status local_status =
|
||||
cluster_->Run(optimized_graph, feed_, fetch_, &metadata);
|
||||
{
|
||||
mutex_lock lock(status_mu);
|
||||
status.Update(local_status);
|
||||
}
|
||||
if (step < 0) {
|
||||
// Discard the first iteration as it triggers the warmup, and therefore
|
||||
// takes much longer than a normal step.
|
||||
return;
|
||||
}
|
||||
if (!local_status.ok()) {
|
||||
// Discard the data if the run wasn't sucessful.
|
||||
barrier.DecrementCount();
|
||||
return;
|
||||
}
|
||||
|
||||
const Costs::MicroSeconds finish = Env::Default()->NowMicros();
|
||||
const double time = (finish - start).count() * 1e3;
|
||||
times[step] = time;
|
||||
|
||||
if (cost_graph && (step + 1 == measurement_steps_)) {
|
||||
metadata.mutable_cost_graph()->Swap(cost_graph);
|
||||
}
|
||||
|
||||
barrier.DecrementCount();
|
||||
};
|
||||
|
||||
// Initialize the computation and warm up TensorFlow.
|
||||
measurement_fn(-1);
|
||||
|
||||
if (!status.ok()) {
|
||||
LOG(ERROR) << "Failed to run start measurements: "
|
||||
<< status.error_message();
|
||||
costs->execution_time = Costs::Duration::max();
|
||||
return status;
|
||||
}
|
||||
|
||||
// Run "measurement_steps_" and measure the time.
|
||||
if (measurement_threads_ > 0) {
|
||||
for (int i = 0; i < measurement_steps_; ++i) {
|
||||
thread_pool_->Schedule([i, &measurement_fn]() { measurement_fn(i); });
|
||||
}
|
||||
barrier.Wait();
|
||||
} else {
|
||||
for (int i = 0; i < measurement_steps_ && status.ok(); ++i) {
|
||||
measurement_fn(i);
|
||||
}
|
||||
}
|
||||
|
||||
if (!status.ok()) {
|
||||
LOG(ERROR) << "Failed to measure graph performance: "
|
||||
<< status.error_message();
|
||||
costs->execution_time = Costs::Duration::max();
|
||||
costs->max_execution_time = Costs::Duration::max();
|
||||
costs->min_execution_time = 0;
|
||||
return status;
|
||||
}
|
||||
|
||||
// Compute the average time of the measure steps. Use Huber statistics
|
||||
// to filter out outliers.
|
||||
RobustStats stats(times);
|
||||
costs->execution_time = Costs::Duration(stats.mean());
|
||||
costs->max_execution_time = Costs::Duration(stats.hi());
|
||||
costs->min_execution_time = Costs::Duration(stats.lo());
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
} // end namespace grappler
|
||||
} // end namespace tensorflow
|
76
tensorflow/core/grappler/costs/measuring_cost_estimator.h
Normal file
76
tensorflow/core/grappler/costs/measuring_cost_estimator.h
Normal file
@ -0,0 +1,76 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_GRAPPLER_COSTS_MEASURING_COST_ESTIMATOR_H_
|
||||
#define TENSORFLOW_GRAPPLER_COSTS_MEASURING_COST_ESTIMATOR_H_
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "tensorflow/core/framework/tensor.h"
|
||||
#include "tensorflow/core/grappler/costs/cost_estimator.h"
|
||||
#include "tensorflow/core/lib/core/status.h"
|
||||
#include "tensorflow/core/lib/core/threadpool.h"
|
||||
#include "tensorflow/core/platform/types.h"
|
||||
|
||||
namespace tensorflow {
|
||||
class CostGraphDef;
|
||||
class GraphDef;
|
||||
} // namespace tensorflow
|
||||
|
||||
namespace tensorflow {
|
||||
namespace grappler {
|
||||
|
||||
class Cluster;
|
||||
struct GrapplerItem;
|
||||
|
||||
// Estimate the cost of running a Grappler item by actually running the
|
||||
// corresponding TensorFlow graph on the specified cluster and measuring the
|
||||
// runtimes.
|
||||
class MeasuringCostEstimator : public CostEstimator {
|
||||
public:
|
||||
// Run the model for measurement_steps to measure its average cost.
|
||||
// When measurement_threads is greater than 0, use a threadpool of as many
|
||||
// threads to run the measurements; otherwise, run them serially. Does not
|
||||
// take ownership of cluster.
|
||||
explicit MeasuringCostEstimator(Cluster* cluster, int measurement_steps,
|
||||
int measurement_threads);
|
||||
~MeasuringCostEstimator() override {}
|
||||
|
||||
// Initalizes the estimator for the specified grappler item.
|
||||
// This implementation always returns OK.
|
||||
Status Initialize(const GrapplerItem& item) override;
|
||||
|
||||
// Runs the optimized version of the graph on the cluster, measure
|
||||
// the runtimes of each operation, and annotated the CostGraphDef
|
||||
// with the corresponding measurements.
|
||||
// Returns the average latency for the whole graph.
|
||||
Status PredictCosts(const GraphDef& optimized_graph, CostGraphDef* cost_graph,
|
||||
Costs* overall_cost) const override;
|
||||
|
||||
private:
|
||||
Cluster* cluster_; // Not owned.
|
||||
int measurement_steps_;
|
||||
int measurement_threads_;
|
||||
std::vector<std::pair<string, Tensor>> feed_;
|
||||
std::vector<string> fetch_;
|
||||
std::unique_ptr<thread::ThreadPool> thread_pool_;
|
||||
};
|
||||
|
||||
} // end namespace grappler
|
||||
} // end namespace tensorflow
|
||||
|
||||
#endif // TENSORFLOW_GRAPPLER_COSTS_MEASURING_COST_ESTIMATOR_H_
|
151
tensorflow/core/grappler/costs/robust_stats.cc
Normal file
151
tensorflow/core/grappler/costs/robust_stats.cc
Normal file
@ -0,0 +1,151 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/grappler/costs/robust_stats.h"
|
||||
#include <algorithm>
|
||||
|
||||
namespace tensorflow {
|
||||
namespace grappler {
|
||||
|
||||
// Given a sorted vector of values, calculate the median.
|
||||
// Returns 0 for an empty vector. Does not verify sortedness.
|
||||
static double SortedMedian(const std::vector<double> &values) {
|
||||
const int n = values.size();
|
||||
if (n == 0) return 0.0;
|
||||
if (n & 1) {
|
||||
return values[n / 2];
|
||||
} else {
|
||||
return (values[n / 2] + values[n / 2 - 1]) / 2.0;
|
||||
}
|
||||
}
|
||||
|
||||
// Given a vector of values (sorted or not), calculate the median.
|
||||
static double Median(std::vector<double> &&values) {
|
||||
const size_t n = values.size();
|
||||
if (n == 0) return 0;
|
||||
const auto middle = values.begin() + (n / 2);
|
||||
// Put the middle value in its place.
|
||||
std::nth_element(values.begin(), middle, values.end());
|
||||
if (n & 1) {
|
||||
return *middle;
|
||||
}
|
||||
// Return the average of the two elements, the max_element lower than
|
||||
// *middle is found between begin and middle as a post-cond of
|
||||
// nth_element.
|
||||
const auto lower_middle = std::max_element(values.begin(), middle);
|
||||
// Preventing overflow. We know that '*lower_middle <= *middle'.
|
||||
// If both are on oposite sides of zero, the sum won't overflow, otherwise
|
||||
// the difference won't overflow.
|
||||
if (*lower_middle <= 0 && *middle >= 0) {
|
||||
return (*lower_middle + *middle) / 2;
|
||||
}
|
||||
return *lower_middle + (*middle - *lower_middle) / 2;
|
||||
}
|
||||
|
||||
// Given a set of values, calculates the scaled Median Absolute Deviation (a
|
||||
// robust approximation to the standard deviation). This is calculated as the
|
||||
// median of the absolute deviations from the median, scaled by 1.4826. Its
|
||||
// advantage over the standard deviation is that it is not (as) affected by
|
||||
// outlier values. Returns a pair<median, mad>.
|
||||
static std::pair<double, double> ScaledMedianAbsoluteDeviation(
|
||||
const std::vector<double> &sorted_values) {
|
||||
double median = SortedMedian(sorted_values);
|
||||
|
||||
// Next, we calculate the absolute deviations from the median,
|
||||
// find the median of the resulting data, and scale by 1.4826.
|
||||
std::vector<double> deviations;
|
||||
deviations.reserve(sorted_values.size());
|
||||
for (double d : sorted_values) {
|
||||
deviations.push_back(std::abs(d - median));
|
||||
}
|
||||
double mad = Median(std::move(deviations)) * 1.4826;
|
||||
return std::pair<double, double>(median, mad);
|
||||
}
|
||||
|
||||
RobustStats::RobustStats(const std::vector<double> &values)
|
||||
: RobustStats(std::vector<double>(values)) {}
|
||||
|
||||
RobustStats::RobustStats(std::vector<double> &&values) {
|
||||
std::sort(values.begin(), values.end());
|
||||
lo_ = values[0];
|
||||
hi_ = values.back();
|
||||
HuberMAD(values);
|
||||
}
|
||||
|
||||
// Computes an updated mean using Huber's weighting function (values beyond
|
||||
// the margin are weighted by margin / abs(value - mean).
|
||||
double UpdateHuberMean(const std::vector<double> &sorted_values, double mean,
|
||||
double margin) {
|
||||
int num_within = 0;
|
||||
double sum = 0.0;
|
||||
|
||||
for (double d : sorted_values) {
|
||||
if (d < mean - margin) {
|
||||
sum -= margin;
|
||||
} else if (d > mean + margin) {
|
||||
sum += margin;
|
||||
} else {
|
||||
sum += d;
|
||||
++num_within;
|
||||
}
|
||||
}
|
||||
|
||||
// It is possible, for a set with an interquartile distance of 0, i.e., with
|
||||
// more than half of the values at the median, to encounter the case where
|
||||
// the Huber mean drifts slightly off the median and there are no values
|
||||
// within the margin. In that case, just return the old mean, and the caller
|
||||
// will quit.
|
||||
if (num_within > 0) {
|
||||
return sum / num_within;
|
||||
} else {
|
||||
return mean;
|
||||
}
|
||||
}
|
||||
|
||||
// Given a list of values, this approximates the stddev using the MAD and then
|
||||
// uses it to compute a Huber robust mean (sandwich mean). A margin of
|
||||
// c*stddev is defined around the current mean, and values are weighted by
|
||||
// margin / abs(value - mean) if outside the margin, or 1 if inside. This
|
||||
// computes the mean iteratively, because each time it changes the margin
|
||||
// shifts a bit. It typically settles very quickly, but it's possible for it
|
||||
// to be unstable. We limit it to 10 iterations.
|
||||
//
|
||||
void RobustStats::HuberMAD(const std::vector<double> &sorted_values) {
|
||||
const std::pair<double, double> median_mad =
|
||||
ScaledMedianAbsoluteDeviation(sorted_values);
|
||||
mean_ = median_mad.first;
|
||||
stddev_ = median_mad.second;
|
||||
|
||||
// c = 1.345 is the commonly used cutoff with 95% efficiency at the normal.
|
||||
// We're using c = 1.5 to be a little more conservative, and because that's
|
||||
// the default in S-plus.
|
||||
// TODO(dehnert): Specialize Stats for integral types so we don't implement
|
||||
// methods that don't make sense.
|
||||
const double c = 1.5;
|
||||
const double margin = c * stddev_;
|
||||
|
||||
// Iterate 10 times, or until the Huber mean stabilizes.
|
||||
// If the margin is zero, we don't want mean to drift from the median.
|
||||
if (margin > 0.0) {
|
||||
for (int k = 0; k < 10; ++k) {
|
||||
double old_mean = mean_;
|
||||
mean_ = UpdateHuberMean(sorted_values, mean_, margin);
|
||||
if (mean_ == old_mean) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace grappler
|
||||
} // namespace tensorflow
|
42
tensorflow/core/grappler/costs/robust_stats.h
Normal file
42
tensorflow/core/grappler/costs/robust_stats.h
Normal file
@ -0,0 +1,42 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef TENSORFLOW_GRAPPLER_COSTS_ROBUST_STATS_H_
|
||||
#define TENSORFLOW_GRAPPLER_COSTS_ROBUST_STATS_H_
|
||||
|
||||
#include <vector>
|
||||
namespace tensorflow {
|
||||
namespace grappler {
|
||||
class RobustStats {
|
||||
public:
|
||||
RobustStats(const std::vector<double>& values);
|
||||
RobustStats(std::vector<double>&& values);
|
||||
|
||||
double lo() const { return lo_; }
|
||||
double hi() const { return hi_; }
|
||||
double mean() const { return mean_; }
|
||||
|
||||
private:
|
||||
void HuberMAD(const std::vector<double>& values);
|
||||
|
||||
double lo_;
|
||||
double hi_;
|
||||
double mean_;
|
||||
double stddev_;
|
||||
};
|
||||
} // namespace grappler
|
||||
} // namespace tensorflow
|
||||
|
||||
#endif // TENSORFLOW_GRAPPLER_COSTS_ROBUST_STATS_H_
|
63
tensorflow/core/grappler/costs/robust_stats_test.cc
Normal file
63
tensorflow/core/grappler/costs/robust_stats_test.cc
Normal file
@ -0,0 +1,63 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/grappler/costs/robust_stats.h"
|
||||
#include "tensorflow/core/platform/test.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace grappler {
|
||||
namespace {
|
||||
|
||||
class RobustStatsTest : public ::testing::Test {
|
||||
public:
|
||||
void SetUp() override {
|
||||
for (double d = 1.0; d <= 5.0; d += 1.0) {
|
||||
values1_.push_back(5.0 - d);
|
||||
values1_.push_back(5.0 + d);
|
||||
values2_.push_back(25.0 - 2 * d);
|
||||
values2_.push_back(25.0 + 2 * d);
|
||||
values3_.push_back(-3.0 - d);
|
||||
values3_.push_back(-3.0 + d);
|
||||
}
|
||||
values1_.push_back(5.0); // Odd # elements, mean is 5.0
|
||||
values3_.push_back(197.0);
|
||||
values3_.push_back(-203.0); // Even # elements, mean is -3.0
|
||||
}
|
||||
|
||||
std::vector<double> values1_;
|
||||
std::vector<double> values2_;
|
||||
std::vector<double> values3_;
|
||||
};
|
||||
|
||||
TEST_F(RobustStatsTest, Simple) {
|
||||
RobustStats s1(values1_);
|
||||
EXPECT_EQ(5.0, s1.mean());
|
||||
EXPECT_EQ(0.0, s1.lo());
|
||||
EXPECT_EQ(10.0, s1.hi());
|
||||
|
||||
RobustStats s2(values2_);
|
||||
EXPECT_EQ(25.0, s2.mean());
|
||||
EXPECT_EQ(15.0, s2.lo());
|
||||
EXPECT_EQ(35.0, s2.hi());
|
||||
|
||||
RobustStats s3(values3_);
|
||||
EXPECT_EQ(-3.0, s3.mean());
|
||||
EXPECT_EQ(-203.0, s3.lo());
|
||||
EXPECT_EQ(197.0, s3.hi());
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace grappler
|
||||
} // namespace tensorflow
|
215
tensorflow/core/grappler/costs/virtual_scheduler.cc
Normal file
215
tensorflow/core/grappler/costs/virtual_scheduler.cc
Normal file
@ -0,0 +1,215 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#include "tensorflow/core/grappler/costs/virtual_scheduler.h"
|
||||
#include "tensorflow/core/framework/node_def.pb.h"
|
||||
#include "tensorflow/core/grappler/utils.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace grappler {
|
||||
namespace {
|
||||
|
||||
Costs CombineCosts(const Costs& left, const Costs& right) {
|
||||
CHECK_NE(left.max_memory, kMemoryUnknown);
|
||||
CHECK_NE(left.max_per_op_buffers, kMemoryUnknown);
|
||||
CHECK_NE(left.max_per_op_streaming, kMemoryUnknown);
|
||||
|
||||
Costs result = left;
|
||||
result.execution_time += right.execution_time;
|
||||
if (right.max_memory != kMemoryUnknown) {
|
||||
result.max_memory += right.max_memory;
|
||||
}
|
||||
if (right.max_per_op_buffers != kMemoryUnknown) {
|
||||
result.max_per_op_buffers =
|
||||
std::max(left.max_per_op_buffers, right.max_per_op_buffers);
|
||||
}
|
||||
if (right.max_per_op_streaming != kMemoryUnknown) {
|
||||
result.max_per_op_streaming =
|
||||
std::max(left.max_per_op_streaming, right.max_per_op_streaming);
|
||||
}
|
||||
VLOG(2) << "costs execution_time=" << result.execution_time.count()
|
||||
<< " max_memory=" << result.max_memory
|
||||
<< " max_per_op_buffers=" << result.max_per_op_buffers
|
||||
<< " max_per_op_streaming=" << result.max_per_op_streaming;
|
||||
return result;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
VirtualScheduler::VirtualScheduler(const GraphDef& graph,
|
||||
const std::vector<string>& fetch_nodes)
|
||||
: graph_costs_(Costs::ZeroCosts()),
|
||||
// TODO(dyoon): Use a better way than FIFO.
|
||||
ready_nodes_(new FIFOManager()) {
|
||||
// First, get the nodes that would run to output fetch_nodes.
|
||||
std::vector<const NodeDef*> nodes =
|
||||
ComputeTransitiveFanin(graph, fetch_nodes);
|
||||
|
||||
// TODO(dyoon): this is a bit inefficient as name_to_node is already built in
|
||||
// ComputeTransitiveFanin().
|
||||
std::unordered_map<string, const NodeDef*> name_to_node;
|
||||
for (const auto& node : graph.node()) {
|
||||
name_to_node[node.name()] = &node;
|
||||
}
|
||||
|
||||
// Build node_map.
|
||||
for (const auto* node : nodes) {
|
||||
auto& node_state = GetNodeStateOrCreateIt(node);
|
||||
// TODO(dyoon): add SendRecv considering devices and control dependency.
|
||||
for (const string& input : node->input()) {
|
||||
const NodeDef* in = name_to_node[NodeName(input)];
|
||||
CHECK(in);
|
||||
node_state.inputs.push_back(in);
|
||||
auto& input_node_state = GetNodeStateOrCreateIt(in);
|
||||
input_node_state.outputs.push_back(node);
|
||||
}
|
||||
if (node->input().empty()) {
|
||||
node_state.time_ready =
|
||||
Costs::Duration(); // Node without input: ready at time 0.
|
||||
ready_nodes_->AddNode(node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const NodeDef* VirtualScheduler::GetCurrNode() const {
|
||||
return ready_nodes_->GetCurrNode();
|
||||
}
|
||||
|
||||
NodeState& VirtualScheduler::GetNodeStateOrCreateIt(const NodeDef* node) {
|
||||
auto it = node_map_.find(node);
|
||||
if (it == node_map_.end()) {
|
||||
it = node_map_.emplace(node, NodeState()).first;
|
||||
}
|
||||
return it->second;
|
||||
}
|
||||
|
||||
bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) {
|
||||
// Update graph_costs_ and per-op costs.
|
||||
graph_costs_ = CombineCosts(graph_costs_, node_costs);
|
||||
const auto* node = GetCurrNode();
|
||||
const auto& op_name = node->op();
|
||||
|
||||
auto it = op_to_cost_.find(op_name);
|
||||
if (it == op_to_cost_.end()) {
|
||||
it = op_to_cost_.emplace(op_name, Costs::ZeroCosts()).first;
|
||||
}
|
||||
auto& op_cost = it->second;
|
||||
op_cost = CombineCosts(op_cost, node_costs);
|
||||
|
||||
// Update node and device states.
|
||||
auto& node_state = node_map_[node];
|
||||
auto& device = device_[node->device()];
|
||||
device.nodes_executed.push_back(node);
|
||||
// Node is scheduled when the device is available AND all the inputs are
|
||||
// ready; hence, time_scheduled is time_ready if time_ready > device curr
|
||||
// time.
|
||||
node_state.time_scheduled =
|
||||
std::max(device.GetCurrTime(), node_state.time_ready);
|
||||
// Override device curr time with the time_scheduled.
|
||||
device.device_costs.execution_time = node_state.time_scheduled;
|
||||
device.device_costs = CombineCosts(device.device_costs, node_costs);
|
||||
auto curr_time = device.GetCurrTime();
|
||||
node_state.time_finished = curr_time;
|
||||
|
||||
// Update device's per-op cost.
|
||||
{
|
||||
auto it = device.op_to_cost.find(op_name);
|
||||
if (it == device.op_to_cost.end()) {
|
||||
it = device.op_to_cost.emplace(op_name, Costs::ZeroCosts()).first;
|
||||
}
|
||||
auto& op_cost = it->second;
|
||||
op_cost = CombineCosts(op_cost, node_costs);
|
||||
|
||||
VLOG(2) << "Op scheduled -- name: " << node->name()
|
||||
<< ", op: " << node->op() << ", device: " << node->device()
|
||||
<< ", ready: " << node_state.time_ready.count()
|
||||
<< ", scheduled: " << node_state.time_scheduled.count()
|
||||
<< ", finished: " << node_state.time_finished.count();
|
||||
|
||||
// Increment num_inputs_ready of the output nodes.
|
||||
for (auto* output : node_state.outputs) {
|
||||
auto& output_state = node_map_[output];
|
||||
output_state.num_inputs_ready++;
|
||||
if (output_state.num_inputs_ready == output_state.inputs.size()) {
|
||||
// This output node is now ready.
|
||||
output_state.time_ready = curr_time;
|
||||
ready_nodes_->AddNode(output);
|
||||
}
|
||||
}
|
||||
|
||||
// Increment num_outputs_executed of the input nodes.
|
||||
for (auto* input : node_state.inputs) {
|
||||
auto& input_state = node_map_[input];
|
||||
input_state.num_outputs_executed++;
|
||||
if (input_state.num_outputs_executed == input_state.outputs.size()) {
|
||||
// All the outputs are executed; no reference to this input nodel
|
||||
input_state.time_no_reference = curr_time;
|
||||
// TODO(dyoon): collect device memory usage; note that this input node
|
||||
// use device memory between time_scheduled and time_no_reference.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove the current node; assume FIFO.
|
||||
ready_nodes_->RemoveCurrNode();
|
||||
return !ready_nodes_->Empty(); // True if not empty.
|
||||
}
|
||||
|
||||
Costs VirtualScheduler::Summary() const {
|
||||
// Print out basic execution summary.
|
||||
VLOG(1) << "Expected execution time: " << graph_costs_.execution_time.count();
|
||||
VLOG(1) << "Expected max memory: " << graph_costs_.max_memory;
|
||||
VLOG(1) << "Expected max per-op buffers: " << graph_costs_.max_per_op_buffers;
|
||||
VLOG(1) << "Expected max per-op streaming buffers: "
|
||||
<< graph_costs_.max_per_op_streaming;
|
||||
|
||||
VLOG(1) << "Per-op execution time:";
|
||||
for (const auto& op_cost_pair : op_to_cost_) {
|
||||
const auto& op = op_cost_pair.first;
|
||||
const auto& cost = op_cost_pair.second.execution_time.count();
|
||||
if (cost) { // Skip printing out zero-cost ops.
|
||||
VLOG(1) << " + " << op << " : " << cost;
|
||||
}
|
||||
}
|
||||
|
||||
// Print per device summary
|
||||
VLOG(1) << "Devices:";
|
||||
Costs critical_path_costs = Costs::ZeroCosts();
|
||||
|
||||
for (const auto& device : device_) {
|
||||
const auto& name = device.first;
|
||||
const auto& state = device.second;
|
||||
VLOG(1) << "Device = " << name
|
||||
<< ", num_nodes = " << state.nodes_executed.size()
|
||||
<< ", execution_time = " << state.GetCurrTime().count();
|
||||
VLOG(1) << "Per-op execution time:";
|
||||
for (const auto& op_cost_pair : state.op_to_cost) {
|
||||
const auto& op = op_cost_pair.first;
|
||||
const auto& cost = op_cost_pair.second.execution_time.count();
|
||||
if (cost) { // Skip printing out zero-cost ops.
|
||||
VLOG(1) << " + " << op << " : " << cost;
|
||||
}
|
||||
}
|
||||
if (critical_path_costs.execution_time <= state.GetCurrTime()) {
|
||||
critical_path_costs = state.device_costs;
|
||||
}
|
||||
}
|
||||
|
||||
VLOG(1) << "Critical path execution time: "
|
||||
<< critical_path_costs.execution_time.count();
|
||||
return critical_path_costs;
|
||||
}
|
||||
|
||||
} // end namespace grappler
|
||||
} // end namespace tensorflow
|
116
tensorflow/core/grappler/costs/virtual_scheduler.h
Normal file
116
tensorflow/core/grappler/costs/virtual_scheduler.h
Normal file
@ -0,0 +1,116 @@
|
||||
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==============================================================================*/
|
||||
|
||||
#ifndef THIRD_PARTY_TENSORFLOW_CORE_GRAPPLER_COSTS_VIRTUAL_SCHEDULER_H_
|
||||
#define THIRD_PARTY_TENSORFLOW_CORE_GRAPPLER_COSTS_VIRTUAL_SCHEDULER_H_
|
||||
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "tensorflow/core/grappler/costs/cost_estimator.h"
|
||||
#include "tensorflow/core/grappler/grappler_item.h"
|
||||
|
||||
namespace tensorflow {
|
||||
namespace grappler {
|
||||
|
||||
namespace {
|
||||
struct NodeState {
|
||||
std::vector<const NodeDef*> inputs;
|
||||
std::vector<const NodeDef*> outputs;
|
||||
int num_inputs_ready;
|
||||
int num_outputs_executed;
|
||||
Costs::Duration time_ready;
|
||||
Costs::Duration time_scheduled;
|
||||
Costs::Duration time_finished;
|
||||
Costs::Duration time_no_reference;
|
||||
|
||||
// Node will be ready to be executed at time_ready, scheduled at
|
||||
// time_scheduled, and finishes execution at time_finished.
|
||||
// Between time_scheduled and time_no_reference, the node's output tensor
|
||||
// needs to be on the device, using up device memory.
|
||||
|
||||
NodeState() {
|
||||
num_inputs_ready = 0;
|
||||
num_outputs_executed = 0;
|
||||
time_ready = Costs::Duration::max();
|
||||
time_scheduled = Costs::Duration::max();
|
||||
time_finished = Costs::Duration::max();
|
||||
time_no_reference = Costs::Duration::max();
|
||||
}
|
||||
};
|
||||
|
||||
struct DeviceState {
|
||||
std::vector<const NodeDef*> nodes_executed;
|
||||
Costs device_costs;
|
||||
std::map<string, Costs> op_to_cost; // Per-op cost.
|
||||
|
||||
DeviceState() { device_costs = Costs::ZeroCosts(); }
|
||||
|
||||
Costs::Duration GetCurrTime() const { return device_costs.execution_time; }
|
||||
};
|
||||
|
||||
// ReadyNodeManager (abstract class):
|
||||
// Keeps ready nodes and picks the best one to be scheduled.
|
||||
class ReadyNodeManager {
|
||||
public:
|
||||
ReadyNodeManager() {}
|
||||
virtual ~ReadyNodeManager() {}
|
||||
virtual void AddNode(const NodeDef* node) = 0;
|
||||
virtual const NodeDef* GetCurrNode() const = 0;
|
||||
virtual void RemoveCurrNode() = 0;
|
||||
virtual bool Empty() const = 0;
|
||||
};
|
||||
|
||||
class FIFOManager : public ReadyNodeManager {
|
||||
public:
|
||||
FIFOManager() : ReadyNodeManager() {}
|
||||
~FIFOManager() override {}
|
||||
void AddNode(const NodeDef* node) override { nodes_.push_back(node); }
|
||||
const NodeDef* GetCurrNode() const override { return nodes_.front(); }
|
||||
void RemoveCurrNode() override { nodes_.pop_front(); }
|
||||
bool Empty() const override { return nodes_.empty(); }
|
||||
|
||||
private:
|
||||
std::list<const NodeDef*> nodes_;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
// The virtual scheduler emulates execution of nodes in a graph, considering
|
||||
// dependencies, device, etc.
|
||||
class VirtualScheduler {
|
||||
public:
|
||||
VirtualScheduler(const GraphDef& graph,
|
||||
const std::vector<string>& fetch_nodes);
|
||||
|
||||
const NodeDef* GetCurrNode() const;
|
||||
bool MarkCurrNodeExecuted(const Costs& node_costs);
|
||||
|
||||
Costs Summary() const;
|
||||
|
||||
private:
|
||||
NodeState& GetNodeStateOrCreateIt(const NodeDef* node);
|
||||
|
||||
Costs graph_costs_; // Graph cost.
|
||||
std::map<string, Costs> op_to_cost_; // Per-op cost.
|
||||
std::unique_ptr<ReadyNodeManager> ready_nodes_;
|
||||
std::unordered_map<const NodeDef*, NodeState> node_map_;
|
||||
std::unordered_map<string, DeviceState> device_;
|
||||
};
|
||||
|
||||
} // namespace grappler
|
||||
} // end namespace tensorflow
|
||||
|
||||
#endif // THIRD_PARTY_TENSORFLOW_CORE_GRAPPLER_COSTS_VIRTUAL_SCHEDULER_H_
|
@ -2109,7 +2109,9 @@ tf_kernel_library(
|
||||
tf_kernel_library(
|
||||
name = "matrix_triangular_solve_op",
|
||||
prefix = "matrix_triangular_solve_op",
|
||||
deps = LINALG_DEPS,
|
||||
deps = LINALG_DEPS + if_cuda([
|
||||
"//tensorflow/core/platform/default/build_config:cublas_plugin",
|
||||
]),
|
||||
)
|
||||
|
||||
tf_kernel_library(
|
||||
@ -2350,6 +2352,8 @@ tf_kernel_library(
|
||||
"//conditions:default": [],
|
||||
}) + if_mkl([
|
||||
"//third_party/mkl:intel_binary_blob",
|
||||
]) + if_cuda([
|
||||
"//tensorflow/core/platform/default/build_config:cublas_plugin",
|
||||
]),
|
||||
)
|
||||
|
||||
@ -2630,6 +2634,7 @@ tf_kernel_library(
|
||||
],
|
||||
"//conditions:default": [],
|
||||
}) + if_cuda([
|
||||
"//tensorflow/core/platform/default/build_config:cublas_plugin",
|
||||
"//tensorflow/core/platform/default/build_config:cudnn_plugin",
|
||||
]),
|
||||
)
|
||||
|
@ -328,12 +328,16 @@ The downside is that all the weights read are from the previous training step.
|
||||
So it is a different algorithm from SGD. But it is possible to improve its
|
||||
convergence by adjusting learning rate and other hyperparameters.
|
||||
|
||||
### Executing the script
|
||||
## Executing the script
|
||||
|
||||
This section lists the core command line arguments and a few basic examples for
|
||||
executing the main script
|
||||
([tf_cnn_benchmarks.py](https://github.com/tensorflow/benchmarks/tree/master/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py)).
|
||||
|
||||
> Note: `tf_cnn_benchmarks.py` uses the config `force_gpu_compatible`,
|
||||
> which was introduced after TensorFlow 1.1. Until TensorFlow 1.2 is released
|
||||
> building from source is advised.
|
||||
|
||||
#### Base command line arguments
|
||||
|
||||
* **`model`**: Model to use, e.g. `resnet50`, `inception3`, `vgg16`, and
|
||||
|
@ -139,6 +139,82 @@ from tensorflow.python.platform import tf_logging as logging
|
||||
from tensorflow.python.util import nest
|
||||
|
||||
|
||||
def make_input_layer(features,
|
||||
feature_columns,
|
||||
weight_collections=None,
|
||||
trainable=True):
|
||||
"""Returns a dense `Tensor` as input layer based on given `feature_columns`.
|
||||
|
||||
Generally a single example in training data is described with FeatureColumns.
|
||||
At the first layer of the model, this column oriented data should be converted
|
||||
to a single `Tensor`.
|
||||
|
||||
Example:
|
||||
|
||||
```python
|
||||
price = numeric_column('price')
|
||||
keywords_embedded = embedding_column(
|
||||
categorical_column_with_hash_bucket("keywords", 10K), dimensions=16)
|
||||
all_feature_columns = [price, keywords_embedded, ...]
|
||||
dense_tensor = make_input_layer(features, all_feature_columns)
|
||||
for units in [128, 64, 32]:
|
||||
dense_tensor = tf.layers.dense(dense_tensor, units, tf.nn.relu)
|
||||
prediction = tf.layers.dense(dense_tensor, 1)
|
||||
```
|
||||
|
||||
Args:
|
||||
features: A mapping from key to tensors. `FeatureColumn`s look up via these
|
||||
keys. For example `numeric_column('price') will look at 'price' key in
|
||||
this dict. Values can be a `SparseTensor` or a `Tensor` depends on
|
||||
corresponding `FeatureColumn`.
|
||||
feature_columns: An iterable containing all the `FeatureColumn`s. All items
|
||||
should be instances of classes derived from `_DenseColumn` such as
|
||||
`numeric_column`, `embedding_column`, `bucketized_column`,
|
||||
`indicator_column`. If you have categorical features, you can wrap them
|
||||
with with an `embedding_column` or `indicator_column`.
|
||||
weight_collections: A list of collection names to which the Variable will be
|
||||
added. Note that, variables will also be added to collections
|
||||
`tf.GraphKeys.GLOBAL_VARIABLES` and `ops.GraphKeys.MODEL_VARIABLES`.
|
||||
trainable: If `True` also add the variable to the graph collection
|
||||
`GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
|
||||
|
||||
Returns:
|
||||
A `Tensor` which represents input layer of a model. Its shape
|
||||
is (batch_size, first_layer_dimension) and its dtype is `float32`.
|
||||
first_layer_dimension is determined based on given `feature_columns`.
|
||||
|
||||
Raises:
|
||||
ValueError: if an item in `feature_columns` is not a `_DenseColumn`.
|
||||
"""
|
||||
_check_feature_columns(feature_columns)
|
||||
for column in feature_columns:
|
||||
if not isinstance(column, _DenseColumn):
|
||||
raise ValueError(
|
||||
'Items of feature_columns must be a _DenseColumn. '
|
||||
'You can wrap a categorical column with an '
|
||||
'embedding_column or indicator_column. Given: {}'.format(column))
|
||||
weight_collections = list(weight_collections or [])
|
||||
if ops.GraphKeys.GLOBAL_VARIABLES not in weight_collections:
|
||||
weight_collections.append(ops.GraphKeys.GLOBAL_VARIABLES)
|
||||
if ops.GraphKeys.MODEL_VARIABLES not in weight_collections:
|
||||
weight_collections.append(ops.GraphKeys.MODEL_VARIABLES)
|
||||
with variable_scope.variable_scope(
|
||||
None, default_name='make_input_layer', values=features.values()):
|
||||
builder = _LazyBuilder(features)
|
||||
output_tensors = []
|
||||
for column in sorted(feature_columns, key=lambda x: x.name):
|
||||
with variable_scope.variable_scope(None, default_name=column.name):
|
||||
tensor = column._get_dense_tensor( # pylint: disable=protected-access
|
||||
builder,
|
||||
weight_collections=weight_collections,
|
||||
trainable=trainable)
|
||||
num_elements = column._variable_shape.num_elements() # pylint: disable=protected-access
|
||||
batch_size = array_ops.shape(tensor)[0]
|
||||
tensor = array_ops.reshape(tensor, shape=(batch_size, num_elements))
|
||||
output_tensors.append(tensor)
|
||||
return array_ops.concat(output_tensors, 1)
|
||||
|
||||
|
||||
def make_linear_model(features,
|
||||
feature_columns,
|
||||
units=1,
|
||||
@ -156,10 +232,21 @@ def make_linear_model(features,
|
||||
while `make_input_layer` explicitly requires wrapping each of them with an
|
||||
`embedding_column` or an `indicator_column`.
|
||||
|
||||
Example:
|
||||
|
||||
```python
|
||||
price = numeric_column('price')
|
||||
price_buckets = bucketized_column(price, boundaries=[0., 10., 100., 1000.])
|
||||
keywords = categorical_column_with_hash_bucket("keywords", 10K)
|
||||
all_feature_columns = [price_buckets, keywords, ...]
|
||||
prediction = make_linear_model(features, all_feature_columns)
|
||||
```
|
||||
|
||||
Args:
|
||||
features: A mapping from key to tensors. 'string' key means a base feature.
|
||||
It can have `_FeatureColumn` as a key too. That means that FeatureColumn
|
||||
is already transformed by the input pipeline.
|
||||
features: A mapping from key to tensors. `FeatureColumn`s look up via these
|
||||
keys. For example `numeric_column('price')` will look at 'price' key in
|
||||
this dict. Values are `Tensor` or `SparseTensor` depending on
|
||||
corresponding `FeatureColumn`.
|
||||
feature_columns: An iterable containing all the FeatureColumns. All items
|
||||
should be instances of classes derived from FeatureColumn.
|
||||
units: units: An integer, dimensionality of the output space. Default
|
||||
@ -191,9 +278,10 @@ def make_linear_model(features,
|
||||
raise ValueError('Items of feature_columns must be either a _DenseColumn '
|
||||
'or _CategoricalColumn. Given: {}'.format(column))
|
||||
weight_collections = list(weight_collections or [])
|
||||
weight_collections += [
|
||||
ops.GraphKeys.GLOBAL_VARIABLES, ops.GraphKeys.MODEL_VARIABLES
|
||||
]
|
||||
if ops.GraphKeys.GLOBAL_VARIABLES not in weight_collections:
|
||||
weight_collections.append(ops.GraphKeys.GLOBAL_VARIABLES)
|
||||
if ops.GraphKeys.MODEL_VARIABLES not in weight_collections:
|
||||
weight_collections.append(ops.GraphKeys.MODEL_VARIABLES)
|
||||
with variable_scope.variable_scope(
|
||||
None, default_name='make_linear_model', values=features.values()):
|
||||
weigthed_sums = []
|
||||
@ -228,7 +316,8 @@ def numeric_column(key,
|
||||
normalizer_fn=None):
|
||||
"""Represents real valued or numerical features.
|
||||
|
||||
An example:
|
||||
Example:
|
||||
|
||||
```python
|
||||
price = numeric_column('price')
|
||||
all_feature_columns = [price, ...]
|
||||
@ -298,7 +387,8 @@ def bucketized_column(source_column, boundaries):
|
||||
`boundaries=[0., 1., 2.]` generates buckets `(-inf, 0.)`, `[0., 1.)`,
|
||||
`[1., 2.)`, and `[2., +inf)`.
|
||||
|
||||
An example:
|
||||
Example:
|
||||
|
||||
```python
|
||||
price = numeric_column('price')
|
||||
bucketized_price = bucketized_column(price, boundaries=[...])
|
||||
@ -349,7 +439,8 @@ def categorical_column_with_hash_bucket(key,
|
||||
want to distribute your inputs into a finite number of buckets by hashing.
|
||||
output_id = Hash(input_feature_string) % bucket_size
|
||||
|
||||
An example:
|
||||
Example:
|
||||
|
||||
```python
|
||||
keywords = categorical_column_with_hash_bucket("keywords", 10K)
|
||||
all_feature_columns = [keywords, ...]
|
||||
@ -471,7 +562,7 @@ class _DenseColumn(_FeatureColumn):
|
||||
|
||||
@abc.abstractproperty
|
||||
def _variable_shape(self):
|
||||
"""Returns shape of variable which is compatible with _get_dense_tensor."""
|
||||
"""Returns a `TensorShape` of variable compatible with _get_dense_tensor."""
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
@ -480,6 +571,7 @@ class _DenseColumn(_FeatureColumn):
|
||||
|
||||
The output of this function will be used by model-buildier-functions. For
|
||||
example the pseudo code of `make_input_layer` will be like that:
|
||||
|
||||
```python
|
||||
def make_input_layer(features, feature_columns, ...):
|
||||
outputs = [fc._get_dense_tensor(...) for fc in feature_columns]
|
||||
@ -503,7 +595,7 @@ def _create_dense_column_weighted_sum(
|
||||
builder,
|
||||
weight_collections=weight_collections,
|
||||
trainable=trainable)
|
||||
num_elements = tensor_shape.TensorShape(column._variable_shape).num_elements() # pylint: disable=protected-access
|
||||
num_elements = column._variable_shape.num_elements() # pylint: disable=protected-access
|
||||
batch_size = array_ops.shape(tensor)[0]
|
||||
tensor = array_ops.reshape(tensor, shape=(batch_size, num_elements))
|
||||
weight = variable_scope.get_variable(
|
||||
@ -615,12 +707,15 @@ class _LazyBuilder(object):
|
||||
"""Creates a `_LazyBuilder`.
|
||||
|
||||
Args:
|
||||
features: A mapping from feature column to tensors. A `string` key
|
||||
features: A mapping from feature column to objects that are `Tensor` or
|
||||
`SparseTensor`, or can be converted to same via
|
||||
`sparse_tensor.convert_to_tensor_or_sparse_tensor`. A `string` key
|
||||
signifies a base feature (not-transformed). A `FeatureColumn` key
|
||||
means that this `Tensor` is the output of an existing `FeatureColumn`
|
||||
which can be reused.
|
||||
"""
|
||||
self._columns_to_tensors = features.copy()
|
||||
self._features = features.copy()
|
||||
self._feature_tensors = {}
|
||||
|
||||
def get(self, key):
|
||||
"""Returns a `Tensor` for the given key.
|
||||
@ -640,9 +735,16 @@ class _LazyBuilder(object):
|
||||
ValueError: if key is not found or a transformed `Tensor` cannot be
|
||||
computed.
|
||||
"""
|
||||
if key in self._columns_to_tensors:
|
||||
# Feature_column is already transformed or it's a raw feature.
|
||||
return self._columns_to_tensors[key]
|
||||
if key in self._feature_tensors:
|
||||
# FeatureColumn is already transformed or converted.
|
||||
return self._feature_tensors[key]
|
||||
|
||||
if key in self._features:
|
||||
# FeatureColumn is a raw feature.
|
||||
feature_tensor = sparse_tensor_lib.convert_to_tensor_or_sparse_tensor(
|
||||
self._features[key])
|
||||
self._feature_tensors[key] = feature_tensor
|
||||
return feature_tensor
|
||||
|
||||
if not isinstance(key, (str, _FeatureColumn)):
|
||||
raise TypeError('"key" must be either a "str" or "_FeatureColumn". '
|
||||
@ -653,11 +755,13 @@ class _LazyBuilder(object):
|
||||
|
||||
column = key
|
||||
logging.debug('Transforming feature_column %s.', column)
|
||||
transformed = column._transform_feature(self) # pylint: disable=protected-access
|
||||
# pylint: disable=protected-access
|
||||
transformed = column._transform_feature(self)
|
||||
# pylint: enable=protected-access
|
||||
if transformed is None:
|
||||
raise ValueError('Column {} is not supported.'.format(column.name))
|
||||
self._columns_to_tensors[column] = transformed
|
||||
return self._columns_to_tensors[column]
|
||||
self._feature_tensors[column] = transformed
|
||||
return transformed
|
||||
|
||||
|
||||
def _check_feature_columns(feature_columns):
|
||||
@ -709,7 +813,7 @@ class _NumericColumn(_DenseColumn,
|
||||
|
||||
@property
|
||||
def _variable_shape(self):
|
||||
return self.shape
|
||||
return tensor_shape.TensorShape(self.shape)
|
||||
|
||||
def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None):
|
||||
del weight_collections
|
||||
@ -738,7 +842,8 @@ class _BucketizedColumn(_DenseColumn, _CategoricalColumn,
|
||||
|
||||
@property
|
||||
def _variable_shape(self):
|
||||
return tuple(self.source_column.shape) + (len(self.boundaries) + 1,)
|
||||
return tensor_shape.TensorShape(
|
||||
tuple(self.source_column.shape) + (len(self.boundaries) + 1,))
|
||||
|
||||
def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None):
|
||||
del weight_collections
|
||||
|
@ -65,7 +65,7 @@ class LazyColumnTest(test.TestCase):
|
||||
def _parse_example_config(self):
|
||||
pass
|
||||
|
||||
builder = fc._LazyBuilder(features={'a': constant_op.constant([[2], [3.]])})
|
||||
builder = fc._LazyBuilder(features={'a': [[2], [3.]]})
|
||||
column = TransformCounter()
|
||||
self.assertEqual(0, column.num_transform)
|
||||
builder.get(column)
|
||||
@ -88,7 +88,7 @@ class LazyColumnTest(test.TestCase):
|
||||
def _parse_example_config(self):
|
||||
pass
|
||||
|
||||
builder = fc._LazyBuilder(features={'a': constant_op.constant([[2], [3.]])})
|
||||
builder = fc._LazyBuilder(features={'a': [[2], [3.]]})
|
||||
column = Transformer()
|
||||
self.assertEqual('Output', builder.get(column))
|
||||
self.assertEqual('Output', builder.get(column))
|
||||
@ -108,13 +108,13 @@ class LazyColumnTest(test.TestCase):
|
||||
def _parse_example_config(self):
|
||||
pass
|
||||
|
||||
features = {'a': constant_op.constant([[2], [3.]])}
|
||||
features = {'a': [[2], [3.]]}
|
||||
builder = fc._LazyBuilder(features=features)
|
||||
builder.get(Transformer())
|
||||
self.assertEqual(['a'], list(features.keys()))
|
||||
|
||||
def test_error_if_feature_is_not_found(self):
|
||||
builder = fc._LazyBuilder(features={'a': constant_op.constant([[2], [3.]])})
|
||||
builder = fc._LazyBuilder(features={'a': [[2], [3.]]})
|
||||
with self.assertRaisesRegexp(ValueError,
|
||||
'bbb is not in features dictionary'):
|
||||
builder.get('bbb')
|
||||
@ -135,7 +135,7 @@ class LazyColumnTest(test.TestCase):
|
||||
def _parse_example_config(self):
|
||||
pass
|
||||
|
||||
builder = fc._LazyBuilder(features={'a': constant_op.constant([[2], [3.]])})
|
||||
builder = fc._LazyBuilder(features={'a': [[2], [3.]]})
|
||||
with self.assertRaisesRegexp(ValueError,
|
||||
'NotAProperColumn is not supported'):
|
||||
builder.get(NotAProperColumn())
|
||||
@ -145,7 +145,7 @@ class LazyColumnTest(test.TestCase):
|
||||
class NotAFeatureColumn(object):
|
||||
pass
|
||||
|
||||
builder = fc._LazyBuilder(features={'a': constant_op.constant([[2], [3.]])})
|
||||
builder = fc._LazyBuilder(features={'a': [[2], [3.]]})
|
||||
with self.assertRaisesRegexp(
|
||||
TypeError, '"key" must be either a "str" or "_FeatureColumn".'):
|
||||
builder.get(NotAFeatureColumn())
|
||||
@ -273,7 +273,7 @@ class NumericColumnTest(test.TestCase):
|
||||
|
||||
price = fc.numeric_column('price', shape=[2], normalizer_fn=_increment_two)
|
||||
builder = fc._LazyBuilder({
|
||||
'price': constant_op.constant([[1., 2.], [5., 6.]])
|
||||
'price': [[1., 2.], [5., 6.]]
|
||||
})
|
||||
output = builder.get(price)
|
||||
with self.test_session():
|
||||
@ -286,7 +286,7 @@ class NumericColumnTest(test.TestCase):
|
||||
|
||||
price = fc.numeric_column('price', shape=[2], normalizer_fn=_increment_two)
|
||||
builder = fc._LazyBuilder({
|
||||
'price': constant_op.constant([[1., 2.], [5., 6.]])
|
||||
'price': [[1., 2.], [5., 6.]]
|
||||
})
|
||||
self.assertEqual(builder.get(price), price._get_dense_tensor(builder))
|
||||
|
||||
@ -315,7 +315,7 @@ class NumericColumnTest(test.TestCase):
|
||||
def test_make_linear_model(self):
|
||||
price = fc.numeric_column('price')
|
||||
with ops.Graph().as_default():
|
||||
features = {'price': constant_op.constant([[1.], [5.]])}
|
||||
features = {'price': [[1.], [5.]]}
|
||||
predictions = fc.make_linear_model(features, [price])
|
||||
bias = get_linear_model_bias()
|
||||
price_var = get_linear_model_column_var(price)
|
||||
@ -402,7 +402,7 @@ class BucketizedColumnTest(test.TestCase):
|
||||
bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
|
||||
with ops.Graph().as_default():
|
||||
builder = fc._LazyBuilder({
|
||||
'price': constant_op.constant([[-1., 1.], [5., 6.]])
|
||||
'price': [[-1., 1.], [5., 6.]]
|
||||
})
|
||||
transformed_tensor = builder.get(bucketized_price)
|
||||
with _initialized_session():
|
||||
@ -414,7 +414,7 @@ class BucketizedColumnTest(test.TestCase):
|
||||
bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
|
||||
with ops.Graph().as_default():
|
||||
builder = fc._LazyBuilder({
|
||||
'price': constant_op.constant([[-1.], [1.], [5.], [6.]])
|
||||
'price': [[-1.], [1.], [5.], [6.]]
|
||||
})
|
||||
with _initialized_session():
|
||||
bucketized_price_tensor = bucketized_price._get_dense_tensor(builder)
|
||||
@ -432,7 +432,7 @@ class BucketizedColumnTest(test.TestCase):
|
||||
bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
|
||||
with ops.Graph().as_default():
|
||||
builder = fc._LazyBuilder({
|
||||
'price': constant_op.constant([[-1., 1.], [5., 6.]])
|
||||
'price': [[-1., 1.], [5., 6.]]
|
||||
})
|
||||
with _initialized_session():
|
||||
bucketized_price_tensor = bucketized_price._get_dense_tensor(builder)
|
||||
@ -448,7 +448,7 @@ class BucketizedColumnTest(test.TestCase):
|
||||
bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
|
||||
with ops.Graph().as_default():
|
||||
builder = fc._LazyBuilder({
|
||||
'price': constant_op.constant([[-1.], [1.], [5.], [6.]])
|
||||
'price': [[-1.], [1.], [5.], [6.]]
|
||||
})
|
||||
with _initialized_session() as sess:
|
||||
id_weight_pair = bucketized_price._get_sparse_tensors(builder)
|
||||
@ -465,7 +465,7 @@ class BucketizedColumnTest(test.TestCase):
|
||||
bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
|
||||
with ops.Graph().as_default():
|
||||
builder = fc._LazyBuilder({
|
||||
'price': constant_op.constant([[-1., 1.], [5., 6.]])
|
||||
'price': [[-1., 1.], [5., 6.]]
|
||||
})
|
||||
with _initialized_session() as sess:
|
||||
id_weight_pair = bucketized_price._get_sparse_tensors(builder)
|
||||
@ -502,7 +502,7 @@ class BucketizedColumnTest(test.TestCase):
|
||||
price = fc.numeric_column('price', shape=[1])
|
||||
bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
|
||||
with ops.Graph().as_default():
|
||||
features = {'price': constant_op.constant([[-1.], [1.], [5.], [6.]])}
|
||||
features = {'price': [[-1.], [1.], [5.], [6.]]}
|
||||
predictions = fc.make_linear_model(features, [bucketized_price])
|
||||
bias = get_linear_model_bias()
|
||||
bucketized_price_var = get_linear_model_column_var(bucketized_price)
|
||||
@ -527,7 +527,7 @@ class BucketizedColumnTest(test.TestCase):
|
||||
price = fc.numeric_column('price', shape=[2])
|
||||
bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
|
||||
with ops.Graph().as_default():
|
||||
features = {'price': constant_op.constant([[-1., 1.], [5., 6.]])}
|
||||
features = {'price': [[-1., 1.], [5., 6.]]}
|
||||
predictions = fc.make_linear_model(features, [bucketized_price])
|
||||
bias = get_linear_model_bias()
|
||||
bucketized_price_var = get_linear_model_column_var(bucketized_price)
|
||||
@ -621,15 +621,15 @@ class SparseColumnHashedTest(test.TestCase):
|
||||
float_fc = fc.categorical_column_with_hash_bucket(
|
||||
'a_float', 10, dtype=dtypes.string)
|
||||
int_tensor = sparse_tensor.SparseTensor(
|
||||
values=constant_op.constant([101]),
|
||||
values=[101],
|
||||
indices=[[0, 0]],
|
||||
dense_shape=[1, 1])
|
||||
string_tensor = sparse_tensor.SparseTensor(
|
||||
values=constant_op.constant(['101']),
|
||||
values=['101'],
|
||||
indices=[[0, 0]],
|
||||
dense_shape=[1, 1])
|
||||
float_tensor = sparse_tensor.SparseTensor(
|
||||
values=constant_op.constant([101.]),
|
||||
values=[101.],
|
||||
indices=[[0, 0]],
|
||||
dense_shape=[1, 1])
|
||||
builder = fc._LazyBuilder({
|
||||
@ -745,7 +745,7 @@ class MakeLinearModelTest(test.TestCase):
|
||||
def test_dense_bias(self):
|
||||
price = fc.numeric_column('price')
|
||||
with ops.Graph().as_default():
|
||||
features = {'price': constant_op.constant([[1.], [5.]])}
|
||||
features = {'price': [[1.], [5.]]}
|
||||
predictions = fc.make_linear_model(features, [price])
|
||||
bias = get_linear_model_bias()
|
||||
price_var = get_linear_model_column_var(price)
|
||||
@ -848,7 +848,7 @@ class MakeLinearModelTest(test.TestCase):
|
||||
def test_dense_multi_output(self):
|
||||
price = fc.numeric_column('price')
|
||||
with ops.Graph().as_default():
|
||||
features = {'price': constant_op.constant([[1.], [5.]])}
|
||||
features = {'price': [[1.], [5.]]}
|
||||
predictions = fc.make_linear_model(features, [price], units=3)
|
||||
bias = get_linear_model_bias()
|
||||
price_var = get_linear_model_column_var(price)
|
||||
@ -885,7 +885,7 @@ class MakeLinearModelTest(test.TestCase):
|
||||
def test_dense_multi_dimension(self):
|
||||
price = fc.numeric_column('price', shape=2)
|
||||
with ops.Graph().as_default():
|
||||
features = {'price': constant_op.constant([[1., 2.], [5., 6.]])}
|
||||
features = {'price': [[1., 2.], [5., 6.]]}
|
||||
predictions = fc.make_linear_model(features, [price])
|
||||
price_var = get_linear_model_column_var(price)
|
||||
with _initialized_session() as sess:
|
||||
@ -913,7 +913,7 @@ class MakeLinearModelTest(test.TestCase):
|
||||
def test_dense_multi_dimension_multi_output(self):
|
||||
price = fc.numeric_column('price', shape=2)
|
||||
with ops.Graph().as_default():
|
||||
features = {'price': constant_op.constant([[1., 2.], [5., 6.]])}
|
||||
features = {'price': [[1., 2.], [5., 6.]]}
|
||||
predictions = fc.make_linear_model(features, [price], units=3)
|
||||
bias = get_linear_model_bias()
|
||||
price_var = get_linear_model_column_var(price)
|
||||
@ -928,7 +928,7 @@ class MakeLinearModelTest(test.TestCase):
|
||||
def test_raises_if_shape_mismatch(self):
|
||||
price = fc.numeric_column('price', shape=2)
|
||||
with ops.Graph().as_default():
|
||||
features = {'price': constant_op.constant([[1.], [5.]])}
|
||||
features = {'price': [[1.], [5.]]}
|
||||
predictions = fc.make_linear_model(features, [price])
|
||||
with _initialized_session():
|
||||
with self.assertRaisesRegexp(Exception, 'requested shape has 4'):
|
||||
@ -937,7 +937,7 @@ class MakeLinearModelTest(test.TestCase):
|
||||
def test_dense_reshaping(self):
|
||||
price = fc.numeric_column('price', shape=[1, 2])
|
||||
with ops.Graph().as_default():
|
||||
features = {'price': constant_op.constant([[[1., 2.]], [[5., 6.]]])}
|
||||
features = {'price': [[[1., 2.]], [[5., 6.]]]}
|
||||
predictions = fc.make_linear_model(features, [price])
|
||||
bias = get_linear_model_bias()
|
||||
price_var = get_linear_model_column_var(price)
|
||||
@ -953,8 +953,8 @@ class MakeLinearModelTest(test.TestCase):
|
||||
price2 = fc.numeric_column('price2')
|
||||
with ops.Graph().as_default():
|
||||
features = {
|
||||
'price1': constant_op.constant([[1., 2.], [5., 6.]]),
|
||||
'price2': constant_op.constant([[3.], [4.]])
|
||||
'price1': [[1., 2.], [5., 6.]],
|
||||
'price2': [[3.], [4.]]
|
||||
}
|
||||
predictions = fc.make_linear_model(features, [price1, price2])
|
||||
bias = get_linear_model_bias()
|
||||
@ -973,7 +973,7 @@ class MakeLinearModelTest(test.TestCase):
|
||||
def test_dense_collection(self):
|
||||
price = fc.numeric_column('price')
|
||||
with ops.Graph().as_default() as g:
|
||||
features = {'price': constant_op.constant([[1.], [5.]])}
|
||||
features = {'price': [[1.], [5.]]}
|
||||
fc.make_linear_model(features, [price], weight_collections=['my-vars'])
|
||||
my_vars = g.get_collection('my-vars')
|
||||
bias = get_linear_model_bias()
|
||||
@ -998,7 +998,7 @@ class MakeLinearModelTest(test.TestCase):
|
||||
def test_dense_trainable_default(self):
|
||||
price = fc.numeric_column('price')
|
||||
with ops.Graph().as_default() as g:
|
||||
features = {'price': constant_op.constant([[1.], [5.]])}
|
||||
features = {'price': [[1.], [5.]]}
|
||||
fc.make_linear_model(features, [price])
|
||||
bias = get_linear_model_bias()
|
||||
price_var = get_linear_model_column_var(price)
|
||||
@ -1022,7 +1022,7 @@ class MakeLinearModelTest(test.TestCase):
|
||||
def test_dense_trainable_false(self):
|
||||
price = fc.numeric_column('price')
|
||||
with ops.Graph().as_default() as g:
|
||||
features = {'price': constant_op.constant([[1.], [5.]])}
|
||||
features = {'price': [[1.], [5.]]}
|
||||
fc.make_linear_model(features, [price], trainable=False)
|
||||
trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
|
||||
self.assertEqual([], trainable_vars)
|
||||
@ -1074,5 +1074,89 @@ class MakeLinearModelTest(test.TestCase):
|
||||
self.assertIn('wire_cast', my_vars[2].name)
|
||||
|
||||
|
||||
class MakeInputLayerTest(test.TestCase):
|
||||
|
||||
def test_should_be_dense_column(self):
|
||||
with self.assertRaisesRegexp(ValueError, 'must be a _DenseColumn'):
|
||||
fc.make_input_layer(
|
||||
features={'a': [[0]]},
|
||||
feature_columns=[
|
||||
fc.categorical_column_with_hash_bucket('wire_cast', 4)
|
||||
])
|
||||
|
||||
def test_does_not_support_dict_columns(self):
|
||||
with self.assertRaisesRegexp(
|
||||
ValueError, 'Expected feature_columns to be iterable, found dict.'):
|
||||
fc.make_input_layer(
|
||||
features={'a': [[0]]}, feature_columns={'a': fc.numeric_column('a')})
|
||||
|
||||
def test_raises_if_duplicate_name(self):
|
||||
with self.assertRaisesRegexp(
|
||||
ValueError, 'Duplicate feature column name found for columns'):
|
||||
fc.make_input_layer(
|
||||
features={'a': [[0]]},
|
||||
feature_columns=[fc.numeric_column('a'),
|
||||
fc.numeric_column('a')])
|
||||
|
||||
def test_one_column(self):
|
||||
price = fc.numeric_column('price')
|
||||
with ops.Graph().as_default():
|
||||
features = {'price': [[1.], [5.]]}
|
||||
net = fc.make_input_layer(features, [price])
|
||||
with _initialized_session():
|
||||
self.assertAllClose([[1.], [5.]], net.eval())
|
||||
|
||||
def test_multi_dimension(self):
|
||||
price = fc.numeric_column('price', shape=2)
|
||||
with ops.Graph().as_default():
|
||||
features = {'price': [[1., 2.], [5., 6.]]}
|
||||
net = fc.make_input_layer(features, [price])
|
||||
with _initialized_session():
|
||||
self.assertAllClose([[1., 2.], [5., 6.]], net.eval())
|
||||
|
||||
def test_raises_if_shape_mismatch(self):
|
||||
price = fc.numeric_column('price', shape=2)
|
||||
with ops.Graph().as_default():
|
||||
features = {'price': [[1.], [5.]]}
|
||||
net = fc.make_input_layer(features, [price])
|
||||
with _initialized_session():
|
||||
with self.assertRaisesRegexp(Exception, 'requested shape has 4'):
|
||||
net.eval()
|
||||
|
||||
def test_reshaping(self):
|
||||
price = fc.numeric_column('price', shape=[1, 2])
|
||||
with ops.Graph().as_default():
|
||||
features = {'price': [[[1., 2.]], [[5., 6.]]]}
|
||||
net = fc.make_input_layer(features, [price])
|
||||
with _initialized_session():
|
||||
self.assertAllClose([[1., 2.], [5., 6.]], net.eval())
|
||||
|
||||
def test_multi_column(self):
|
||||
price1 = fc.numeric_column('price1', shape=2)
|
||||
price2 = fc.numeric_column('price2')
|
||||
with ops.Graph().as_default():
|
||||
features = {
|
||||
'price1': [[1., 2.], [5., 6.]],
|
||||
'price2': [[3.], [4.]]
|
||||
}
|
||||
net = fc.make_input_layer(features, [price1, price2])
|
||||
with _initialized_session():
|
||||
self.assertAllClose([[1., 2., 3.], [5., 6., 4.]], net.eval())
|
||||
|
||||
def test_column_order(self):
|
||||
price_a = fc.numeric_column('price_a')
|
||||
price_b = fc.numeric_column('price_b')
|
||||
with ops.Graph().as_default():
|
||||
features = {
|
||||
'price_a': [[1.]],
|
||||
'price_b': [[3.]],
|
||||
}
|
||||
net1 = fc.make_input_layer(features, [price_a, price_b])
|
||||
net2 = fc.make_input_layer(features, [price_b, price_a])
|
||||
with _initialized_session():
|
||||
self.assertAllClose([[1., 3.]], net1.eval())
|
||||
self.assertAllClose([[1., 3.]], net2.eval())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test.main()
|
||||
|
@ -774,6 +774,11 @@ class VariableScopeTest(test.TestCase):
|
||||
self.assertEqual([v.name
|
||||
for v in scope.global_variables()], ["foo/b:0"])
|
||||
|
||||
def testGetVariableWithRefDtype(self):
|
||||
v = variable_scope.get_variable("v", shape=[3, 4], dtype=dtypes.float32)
|
||||
# Ensure it is possible to do get_variable with a _ref dtype passed in.
|
||||
_ = variable_scope.get_variable("w", shape=[5, 6], dtype=v.dtype)
|
||||
|
||||
|
||||
def axis0_into1_partitioner(shape=None, **unused_kwargs):
|
||||
part = [1] * len(shape)
|
||||
|
@ -280,6 +280,17 @@ class _VariableStore(object):
|
||||
raise ValueError(
|
||||
"Passed a custom_getter which is not callable: %s" % custom_getter)
|
||||
|
||||
# If a *_ref type is passed in an error would be triggered further down the
|
||||
# stack. We prevent this using base_dtype to get a non-ref version of the
|
||||
# type, before doing anything else. When _ref types are removed in favour of
|
||||
# resources, this line can be removed.
|
||||
try:
|
||||
dtype = dtype.base_dtype
|
||||
except AttributeError:
|
||||
# .base_dtype not existing means that we will try and use the raw dtype
|
||||
# which was passed in - this might be a NumPy type which is valid.
|
||||
pass
|
||||
|
||||
# This is the main logic of get_variable. However, custom_getter
|
||||
# may override this logic. So we save it as a callable and pass
|
||||
# it to custom_getter.
|
||||
|
@ -994,7 +994,7 @@ class SVSummaryThread(coordinator.LooperThread):
|
||||
summary_strs = self._sess.run(self._sv.summary_op)
|
||||
global_step = None
|
||||
if self._sv.summary_writer:
|
||||
logging.info("Recording summary at step %d.", global_step)
|
||||
logging.info("Recording summary at step %s.", global_step)
|
||||
self._sv.summary_writer.add_summary(summary_strs, global_step)
|
||||
|
||||
|
||||
|
@ -227,7 +227,7 @@ string ToString(CUresult result) {
|
||||
// created by StreamExecutor (to ensure that the CUDA runtime didn't create a
|
||||
// context behind our backs).
|
||||
CUcontext CurrentContext() {
|
||||
CUcontext current = CUDADriver::CurrentContextOrDie();
|
||||
CUcontext current = CUDADriver::CurrentContextOrDie();
|
||||
if (current != nullptr && !CreatedContexts::Has(current)) {
|
||||
LOG(FATAL) << "current context was not created by the StreamExecutor "
|
||||
"cuda_driver API: "
|
||||
@ -480,27 +480,56 @@ bool DeviceOptionsToContextFlags(DeviceOptions device_options, int *flags) {
|
||||
CUdevice device, DeviceOptions device_options, CudaContext** context) {
|
||||
*context = nullptr;
|
||||
|
||||
CUcontext former_context = CurrentContext();
|
||||
if (former_context != nullptr) {
|
||||
LOG(WARNING) << "creating context when one is currently active; existing: "
|
||||
<< former_context;
|
||||
}
|
||||
|
||||
int flags = 0;
|
||||
if (!DeviceOptionsToContextFlags(device_options, &flags)) {
|
||||
LOG(WARNING) << "could not convert all device options into context flags";
|
||||
}
|
||||
|
||||
CUresult res;
|
||||
CUcontext former_context;
|
||||
CUcontext new_context;
|
||||
{
|
||||
// TODO(leary) Need to see if NVIDIA can expunge the leakiness in their
|
||||
// context creation: see http://b/13248943
|
||||
|
||||
#if CUDA_VERSION >= 7000
|
||||
res = cuDevicePrimaryCtxSetFlags(device, flags);
|
||||
{
|
||||
unsigned int former_primary_context_flags;
|
||||
int former_primary_context_is_active;
|
||||
CHECK_EQ(CUDA_SUCCESS,
|
||||
cuDevicePrimaryCtxGetState(device, &former_primary_context_flags,
|
||||
&former_primary_context_is_active));
|
||||
if (former_primary_context_flags != flags) {
|
||||
if (former_primary_context_is_active) {
|
||||
LOG(ERROR)
|
||||
<< "The primary context is active and has a different flag set ("
|
||||
<< former_primary_context_flags << ") than the desired flag set ("
|
||||
<< flags << ").";
|
||||
} else {
|
||||
CHECK_EQ(CUDA_SUCCESS, cuDevicePrimaryCtxSetFlags(device, flags));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
former_context = CUDADriver::CurrentContextOrDie();
|
||||
res = cuDevicePrimaryCtxRetain(&new_context, device);
|
||||
if (former_context != nullptr) {
|
||||
if (former_context == new_context) {
|
||||
VLOG(2) << "The primary context " << former_context
|
||||
<< " exists before initializing the StreamExecutor.";
|
||||
} else {
|
||||
LOG(WARNING) << "A non-primary context " << former_context
|
||||
<< " exists before initializing the StreamExecutor. We "
|
||||
"haven't verified StreamExecutor works with that.";
|
||||
}
|
||||
}
|
||||
#else
|
||||
former_context = CurrentContext();
|
||||
if (former_context != nullptr) {
|
||||
LOG(WARNING)
|
||||
<< "creating context when one is currently active; existing: "
|
||||
<< former_context;
|
||||
}
|
||||
res = cuCtxCreate(&new_context, flags, device);
|
||||
#endif
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user