Internal change.

Change: 155009390
This commit is contained in:
A. Unique TensorFlower 2017-05-03 14:03:00 -08:00 committed by TensorFlower Gardener
parent 7828637e07
commit 965d620104
29 changed files with 1865 additions and 492 deletions

View File

@ -388,6 +388,16 @@ tf_gen_op_wrappers_cc(
visibility = ["//tensorflow:internal"],
)
tf_gen_op_wrappers_cc(
name = "functional_ops",
include_internal_ops = 1,
op_lib_names = [
"functional_ops",
],
pkg = "//tensorflow/core",
visibility = ["//tensorflow:internal"],
)
tf_gen_op_wrappers_cc(
name = "resource_variable_ops",
include_internal_ops = 1,

View File

@ -422,7 +422,7 @@ class ReferenceUtil {
static std::unique_ptr<Array2D<T1>> ApplyElementwise2D(
F&& f, const Array2D<T1>& array1, const Array2D<Ts>&... arrays) {
AssertSameSize2D(array1, arrays...);
auto result = MakeUnique<Array2D<T1>>(array1.n1(), array1.n1());
auto result = MakeUnique<Array2D<T1>>(array1.n1(), array1.n2());
for (int64 i = 0; i < array1.n1(); ++i) {
for (int64 j = 0; j < array1.n2(); ++j) {
(*result)(i, j) = f(array1(i, j), arrays(i, j)...);

View File

@ -24,6 +24,11 @@ bool CpuInstructionFusion::ShouldFuse(HloInstruction* consumer,
int64 operand_index) {
HloInstruction* producer = consumer->mutable_operand(operand_index);
// Output fusion is not currently supported on CPUs.
if (producer->opcode() == HloOpcode::kFusion) {
return false;
}
// Condition for consumer: must be elementwise or a fusion op
// (which necessarily only contains elementwise operations)
if (!(consumer->opcode() == HloOpcode::kFusion ||

View File

@ -46,6 +46,11 @@ bool GpuInstructionFusion::ShouldFuse(HloInstruction* consumer,
int64 operand_index) {
HloInstruction* producer = consumer->mutable_operand(operand_index);
// Output fusion is not currently supported on GPUs.
if (producer->opcode() == HloOpcode::kFusion) {
return false;
}
// RNG operations are not currently parallel-friendly on GPU.
if (producer->opcode() == HloOpcode::kRng) {
return false;

View File

@ -1570,7 +1570,9 @@ string HloInstruction::ToCategory() const {
return "non-elementwise fusion";
}
case FusionKind::kInput:
return "reduce fusion";
return "input fusion";
case FusionKind::kOutput:
return "output fusion";
case FusionKind::kTransposeDot:
return "dot fusion";
case FusionKind::kConvBackwardFilter:
@ -1618,7 +1620,6 @@ bool HloInstruction::IsFusable() const {
// Some kinds of instructions don't make sense to fuse.
switch (opcode_) {
case HloOpcode::kFusion:
case HloOpcode::kInfeed:
case HloOpcode::kOutfeed:
case HloOpcode::kParameter:
@ -2186,6 +2187,8 @@ string ToString(HloInstruction::FusionKind kind) {
return "kLoop";
case HloInstruction::FusionKind::kInput:
return "kInput";
case HloInstruction::FusionKind::kOutput:
return "kOutput";
case HloInstruction::FusionKind::kTransposeDot:
return "kTransposeDot";
case HloInstruction::FusionKind::kConvBackwardFilter:

View File

@ -54,7 +54,8 @@ class HloInstruction {
public:
enum class FusionKind {
kLoop, // Fused into a loop.
kInput, // Fused into a reduction kernel.
kInput, // Op's input is fused into the op itself.
kOutput, // Op's output is fused into the op itself.
kTransposeDot, // Fused into a dot with transposed operands.
kConvBackwardFilter, // Fused into a backward filter convolution.
kConvBackwardInput, // Fused into a backward input convolution.

View File

@ -379,7 +379,12 @@ def multi_label_head(n_classes,
loss_fn=None):
"""Creates a Head for multi label classification.
The Head uses sigmoid cross entropy loss.
Multi-label classification handles the case where each example may have zero
or more associated labels, from a discrete set. This is distinct from
`multi_class_head` which has exactly one label from a discrete set.
This head by default uses sigmoid cross entropy loss, which expects as input
a multi-hot tensor of shape `(batch_size, num_classes)`.
Args:
n_classes: Integer, number of classes, must be >= 2

View File

@ -2417,6 +2417,9 @@ tf_cc_test(
":test_main",
":testlib",
"//tensorflow/cc:cc_ops",
"//tensorflow/cc:cc_ops_internal",
"//tensorflow/cc:function_ops",
"//tensorflow/cc:functional_ops",
"//tensorflow/core/kernels:cast_op",
"//tensorflow/core/kernels:cwise_op",
"//tensorflow/core/kernels:function_ops",

View File

@ -1001,25 +1001,19 @@ string NewName(const Node* n, bool pretty) {
void ToGraphDef(const Graph* g, GraphDef* gdef, bool pretty) {
// We visit nodes in forward topological sort order, which is a
// possible execution order of the graph.
std::vector<size_t> pending(g->num_node_ids());
std::deque<const Node*> ready;
for (const Node* n : g->nodes()) {
pending[n->id()] = n->in_edges().size();
if (pending[n->id()] == 0) ready.push_back(n);
}
gtl::InlinedVector<const Edge*, 4> inputs;
gdef->Clear();
gdef->mutable_versions()->CopyFrom(g->versions());
while (!ready.empty()) {
const Node* n = ready.front();
ready.pop_front();
for (const Edge* e : n->out_edges()) {
const Node* next = e->dst();
if (--pending[next->id()] == 0) {
ready.push_back(next);
}
std::vector<Node*> start_nodes;
for (Node* n : g->nodes()) {
if (n->out_edges().empty()) {
start_nodes.push_back(n);
}
if (!n->IsOp()) continue;
}
ReverseDFSFrom(*g, start_nodes, nullptr, [gdef, pretty, &inputs](Node* n) {
if (!n->IsOp()) return;
NodeDef* ndef = gdef->add_node();
ndef->set_name(NewName(n, pretty));
ndef->set_op(n->type_string());
@ -1054,7 +1048,7 @@ void ToGraphDef(const Graph* g, GraphDef* gdef, bool pretty) {
ndef->add_input(strings::StrCat(srcname, ":", e->src_output()));
}
}
}
});
}
string DebugString(const Graph* g) {

File diff suppressed because it is too large Load Diff

View File

@ -400,16 +400,33 @@ void SetAttrValue(gtl::ArraySlice<NameAttrList> value, AttrValue* out) {
}
}
// Wrapper around protocol buffer serialization that requests deterministic
// serialization, in particular for Map fields, which serialize in a random
// order by default. Returns true on success.
template <typename T>
static bool DeterministicSerialization(const T& t, string* result) {
const int size = t.ByteSize();
*result = string(size, '\0');
::tensorflow::protobuf::io::ArrayOutputStream array_stream(&(*result)[0],
size);
::tensorflow::protobuf::io::CodedOutputStream output_stream(&array_stream);
output_stream.SetSerializationDeterministic(true);
t.SerializeWithCachedSizes(&output_stream);
return !output_stream.HadError() && size == output_stream.ByteCount();
}
bool AreAttrValuesEqual(const AttrValue& a, const AttrValue& b) {
string a_str, b_str;
a.SerializeToString(&a_str);
b.SerializeToString(&b_str);
DeterministicSerialization(a, &a_str);
DeterministicSerialization(b, &b_str);
// Note: it should be safe to compare proto serializations of the attr
// values since at most one field should be set in each (indeed, it
// must be the same field if they are to compare equal).
// Exception: there are multiple equivalent representations of
// TensorProtos. So a return value of true implies a == b, but not the
// converse.
// TODO(phawkins): this is incorrect for NameAttrList attributes that may
// contain nested AttrValue maps.
return a_str == b_str;
}

View File

@ -23,8 +23,8 @@ limitations under the License.
namespace tensorflow {
void DFS(const Graph& g, std::function<void(Node*)> enter,
std::function<void(Node*)> leave) {
void DFS(const Graph& g, const std::function<void(Node*)>& enter,
const std::function<void(Node*)>& leave) {
// Stack of work to do.
struct Work {
Node* node;
@ -61,15 +61,23 @@ void DFS(const Graph& g, std::function<void(Node*)> enter,
}
}
void ReverseDFS(const Graph& g, std::function<void(Node*)> enter,
std::function<void(Node*)> leave) {
void ReverseDFS(const Graph& g, const std::function<void(Node*)>& enter,
const std::function<void(Node*)>& leave) {
ReverseDFSFrom(g, {g.sink_node()}, enter, leave);
}
void ReverseDFSFrom(const Graph& g, gtl::ArraySlice<Node*> start,
const std::function<void(Node*)>& enter,
const std::function<void(Node*)>& leave) {
// Stack of work to do.
struct Work {
Node* node;
bool leave; // Are we entering or leaving n?
};
std::vector<Work> stack;
stack.push_back(Work{g.sink_node(), false});
std::vector<Work> stack(start.size());
for (int i = 0; i < start.size(); ++i) {
stack[i] = Work{start[i], false};
}
std::vector<bool> visited(g.num_node_ids(), false);
while (!stack.empty()) {

View File

@ -21,20 +21,28 @@ limitations under the License.
#include <vector>
#include "tensorflow/core/graph/graph.h"
#include "tensorflow/core/lib/gtl/array_slice.h"
namespace tensorflow {
// Perform a depth-first-search on g starting at the source node.
// If enter is not empty, calls enter(n) before visiting any children of n.
// If leave is not empty, calls leave(n) after visiting all children of n.
extern void DFS(const Graph& g, std::function<void(Node*)> enter,
std::function<void(Node*)> leave);
extern void DFS(const Graph& g, const std::function<void(Node*)>& enter,
const std::function<void(Node*)>& leave);
// Perform a reverse depth-first-search on g starting at the sink node.
// If enter is not empty, calls enter(n) before visiting any parents of n.
// If leave is not empty, calls leave(n) after visiting all parents of n.
extern void ReverseDFS(const Graph& g, std::function<void(Node*)> enter,
std::function<void(Node*)> leave);
extern void ReverseDFS(const Graph& g, const std::function<void(Node*)>& enter,
const std::function<void(Node*)>& leave);
// Perform a reverse depth-first-search on g starting at the 'start' nodes.
// If enter is not empty, calls enter(n) before visiting any parents of n.
// If leave is not empty, calls leave(n) after visiting all parents of n.
extern void ReverseDFSFrom(const Graph& g, gtl::ArraySlice<Node*> start,
const std::function<void(Node*)>& enter,
const std::function<void(Node*)>& leave);
// Stores in *order the post-order numbering of all nodes
// in graph found via a depth first search starting at the source node.

View File

@ -90,6 +90,23 @@ cc_test(
],
)
cc_library(
name = "robust_stats",
srcs = ["robust_stats.cc"],
hdrs = ["robust_stats.h"],
visibility = ["//visibility:public"],
)
cc_test(
name = "robust_stats_test",
srcs = ["robust_stats_test.cc"],
deps = [
":robust_stats",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
],
)
cc_library(
name = "utils",
srcs = ["utils.cc"],
@ -116,3 +133,37 @@ cc_library(
"//tensorflow/core:lib",
],
)
cc_library(
name = "virtual_scheduler",
srcs = ["virtual_scheduler.cc"],
hdrs = ["virtual_scheduler.h"],
visibility = ["//visibility:public"],
deps = [
"//tensorflow/core:protos_all_cc",
"//tensorflow/core/grappler:grappler_item",
"//tensorflow/core/grappler:utils",
"//tensorflow/core/grappler/costs:cost_estimator",
],
)
cc_library(
name = "measuring_cost_estimator",
srcs = ["measuring_cost_estimator.cc"],
hdrs = ["measuring_cost_estimator.h"],
visibility = ["//visibility:public"],
deps = [
":robust_stats",
"//tensorflow/core:core_cpu",
"//tensorflow/core:framework",
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core:lib_proto_parsing",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core/grappler:grappler_item",
"//tensorflow/core/grappler:grappler_item_builder",
"//tensorflow/core/grappler/clusters:cluster",
"//tensorflow/core/grappler/costs:cost_estimator",
"//tensorflow/core/kernels:ops_util",
],
)

View File

@ -0,0 +1,133 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/grappler/costs/measuring_cost_estimator.h"
#include <limits>
#include "tensorflow/core/grappler/clusters/cluster.h"
#include "tensorflow/core/grappler/costs/robust_stats.h"
#include "tensorflow/core/grappler/grappler_item.h"
#include "tensorflow/core/kernels/ops_util.h"
#include "tensorflow/core/lib/core/blocking_counter.h"
#include "tensorflow/core/platform/env.h"
#include "tensorflow/core/public/session.h"
namespace tensorflow {
namespace grappler {
MeasuringCostEstimator::MeasuringCostEstimator(Cluster* cluster,
int measurement_steps,
int measurement_threads)
: measurement_steps_(measurement_steps),
measurement_threads_(measurement_threads) {
CHECK_GE(measurement_steps, 1);
if (measurement_threads > 0) {
thread_pool_.reset(new thread::ThreadPool(
Env::Default(), SanitizeThreadSuffix("measurements"),
measurement_threads));
}
cluster_ = cluster;
}
Status MeasuringCostEstimator::Initialize(const GrapplerItem& item) {
feed_ = item.feed;
fetch_ = item.fetch;
return cluster_->Initialize(item);
}
Status MeasuringCostEstimator::PredictCosts(const GraphDef& optimized_graph,
CostGraphDef* cost_graph,
Costs* costs) const {
std::vector<double> times(measurement_steps_);
BlockingCounter barrier(measurement_steps_);
mutex status_mu;
Status status;
auto measurement_fn = [&](const int step) {
const Costs::MicroSeconds start = Env::Default()->NowMicros();
RunMetadata metadata;
const Status local_status =
cluster_->Run(optimized_graph, feed_, fetch_, &metadata);
{
mutex_lock lock(status_mu);
status.Update(local_status);
}
if (step < 0) {
// Discard the first iteration as it triggers the warmup, and therefore
// takes much longer than a normal step.
return;
}
if (!local_status.ok()) {
// Discard the data if the run wasn't sucessful.
barrier.DecrementCount();
return;
}
const Costs::MicroSeconds finish = Env::Default()->NowMicros();
const double time = (finish - start).count() * 1e3;
times[step] = time;
if (cost_graph && (step + 1 == measurement_steps_)) {
metadata.mutable_cost_graph()->Swap(cost_graph);
}
barrier.DecrementCount();
};
// Initialize the computation and warm up TensorFlow.
measurement_fn(-1);
if (!status.ok()) {
LOG(ERROR) << "Failed to run start measurements: "
<< status.error_message();
costs->execution_time = Costs::Duration::max();
return status;
}
// Run "measurement_steps_" and measure the time.
if (measurement_threads_ > 0) {
for (int i = 0; i < measurement_steps_; ++i) {
thread_pool_->Schedule([i, &measurement_fn]() { measurement_fn(i); });
}
barrier.Wait();
} else {
for (int i = 0; i < measurement_steps_ && status.ok(); ++i) {
measurement_fn(i);
}
}
if (!status.ok()) {
LOG(ERROR) << "Failed to measure graph performance: "
<< status.error_message();
costs->execution_time = Costs::Duration::max();
costs->max_execution_time = Costs::Duration::max();
costs->min_execution_time = 0;
return status;
}
// Compute the average time of the measure steps. Use Huber statistics
// to filter out outliers.
RobustStats stats(times);
costs->execution_time = Costs::Duration(stats.mean());
costs->max_execution_time = Costs::Duration(stats.hi());
costs->min_execution_time = Costs::Duration(stats.lo());
return Status::OK();
}
} // end namespace grappler
} // end namespace tensorflow

View File

@ -0,0 +1,76 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_GRAPPLER_COSTS_MEASURING_COST_ESTIMATOR_H_
#define TENSORFLOW_GRAPPLER_COSTS_MEASURING_COST_ESTIMATOR_H_
#include <string>
#include <utility>
#include <vector>
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/grappler/costs/cost_estimator.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/lib/core/threadpool.h"
#include "tensorflow/core/platform/types.h"
namespace tensorflow {
class CostGraphDef;
class GraphDef;
} // namespace tensorflow
namespace tensorflow {
namespace grappler {
class Cluster;
struct GrapplerItem;
// Estimate the cost of running a Grappler item by actually running the
// corresponding TensorFlow graph on the specified cluster and measuring the
// runtimes.
class MeasuringCostEstimator : public CostEstimator {
public:
// Run the model for measurement_steps to measure its average cost.
// When measurement_threads is greater than 0, use a threadpool of as many
// threads to run the measurements; otherwise, run them serially. Does not
// take ownership of cluster.
explicit MeasuringCostEstimator(Cluster* cluster, int measurement_steps,
int measurement_threads);
~MeasuringCostEstimator() override {}
// Initalizes the estimator for the specified grappler item.
// This implementation always returns OK.
Status Initialize(const GrapplerItem& item) override;
// Runs the optimized version of the graph on the cluster, measure
// the runtimes of each operation, and annotated the CostGraphDef
// with the corresponding measurements.
// Returns the average latency for the whole graph.
Status PredictCosts(const GraphDef& optimized_graph, CostGraphDef* cost_graph,
Costs* overall_cost) const override;
private:
Cluster* cluster_; // Not owned.
int measurement_steps_;
int measurement_threads_;
std::vector<std::pair<string, Tensor>> feed_;
std::vector<string> fetch_;
std::unique_ptr<thread::ThreadPool> thread_pool_;
};
} // end namespace grappler
} // end namespace tensorflow
#endif // TENSORFLOW_GRAPPLER_COSTS_MEASURING_COST_ESTIMATOR_H_

View File

@ -0,0 +1,151 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/grappler/costs/robust_stats.h"
#include <algorithm>
namespace tensorflow {
namespace grappler {
// Given a sorted vector of values, calculate the median.
// Returns 0 for an empty vector. Does not verify sortedness.
static double SortedMedian(const std::vector<double> &values) {
const int n = values.size();
if (n == 0) return 0.0;
if (n & 1) {
return values[n / 2];
} else {
return (values[n / 2] + values[n / 2 - 1]) / 2.0;
}
}
// Given a vector of values (sorted or not), calculate the median.
static double Median(std::vector<double> &&values) {
const size_t n = values.size();
if (n == 0) return 0;
const auto middle = values.begin() + (n / 2);
// Put the middle value in its place.
std::nth_element(values.begin(), middle, values.end());
if (n & 1) {
return *middle;
}
// Return the average of the two elements, the max_element lower than
// *middle is found between begin and middle as a post-cond of
// nth_element.
const auto lower_middle = std::max_element(values.begin(), middle);
// Preventing overflow. We know that '*lower_middle <= *middle'.
// If both are on oposite sides of zero, the sum won't overflow, otherwise
// the difference won't overflow.
if (*lower_middle <= 0 && *middle >= 0) {
return (*lower_middle + *middle) / 2;
}
return *lower_middle + (*middle - *lower_middle) / 2;
}
// Given a set of values, calculates the scaled Median Absolute Deviation (a
// robust approximation to the standard deviation). This is calculated as the
// median of the absolute deviations from the median, scaled by 1.4826. Its
// advantage over the standard deviation is that it is not (as) affected by
// outlier values. Returns a pair<median, mad>.
static std::pair<double, double> ScaledMedianAbsoluteDeviation(
const std::vector<double> &sorted_values) {
double median = SortedMedian(sorted_values);
// Next, we calculate the absolute deviations from the median,
// find the median of the resulting data, and scale by 1.4826.
std::vector<double> deviations;
deviations.reserve(sorted_values.size());
for (double d : sorted_values) {
deviations.push_back(std::abs(d - median));
}
double mad = Median(std::move(deviations)) * 1.4826;
return std::pair<double, double>(median, mad);
}
RobustStats::RobustStats(const std::vector<double> &values)
: RobustStats(std::vector<double>(values)) {}
RobustStats::RobustStats(std::vector<double> &&values) {
std::sort(values.begin(), values.end());
lo_ = values[0];
hi_ = values.back();
HuberMAD(values);
}
// Computes an updated mean using Huber's weighting function (values beyond
// the margin are weighted by margin / abs(value - mean).
double UpdateHuberMean(const std::vector<double> &sorted_values, double mean,
double margin) {
int num_within = 0;
double sum = 0.0;
for (double d : sorted_values) {
if (d < mean - margin) {
sum -= margin;
} else if (d > mean + margin) {
sum += margin;
} else {
sum += d;
++num_within;
}
}
// It is possible, for a set with an interquartile distance of 0, i.e., with
// more than half of the values at the median, to encounter the case where
// the Huber mean drifts slightly off the median and there are no values
// within the margin. In that case, just return the old mean, and the caller
// will quit.
if (num_within > 0) {
return sum / num_within;
} else {
return mean;
}
}
// Given a list of values, this approximates the stddev using the MAD and then
// uses it to compute a Huber robust mean (sandwich mean). A margin of
// c*stddev is defined around the current mean, and values are weighted by
// margin / abs(value - mean) if outside the margin, or 1 if inside. This
// computes the mean iteratively, because each time it changes the margin
// shifts a bit. It typically settles very quickly, but it's possible for it
// to be unstable. We limit it to 10 iterations.
//
void RobustStats::HuberMAD(const std::vector<double> &sorted_values) {
const std::pair<double, double> median_mad =
ScaledMedianAbsoluteDeviation(sorted_values);
mean_ = median_mad.first;
stddev_ = median_mad.second;
// c = 1.345 is the commonly used cutoff with 95% efficiency at the normal.
// We're using c = 1.5 to be a little more conservative, and because that's
// the default in S-plus.
// TODO(dehnert): Specialize Stats for integral types so we don't implement
// methods that don't make sense.
const double c = 1.5;
const double margin = c * stddev_;
// Iterate 10 times, or until the Huber mean stabilizes.
// If the margin is zero, we don't want mean to drift from the median.
if (margin > 0.0) {
for (int k = 0; k < 10; ++k) {
double old_mean = mean_;
mean_ = UpdateHuberMean(sorted_values, mean_, margin);
if (mean_ == old_mean) break;
}
}
}
} // namespace grappler
} // namespace tensorflow

View File

@ -0,0 +1,42 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_GRAPPLER_COSTS_ROBUST_STATS_H_
#define TENSORFLOW_GRAPPLER_COSTS_ROBUST_STATS_H_
#include <vector>
namespace tensorflow {
namespace grappler {
class RobustStats {
public:
RobustStats(const std::vector<double>& values);
RobustStats(std::vector<double>&& values);
double lo() const { return lo_; }
double hi() const { return hi_; }
double mean() const { return mean_; }
private:
void HuberMAD(const std::vector<double>& values);
double lo_;
double hi_;
double mean_;
double stddev_;
};
} // namespace grappler
} // namespace tensorflow
#endif // TENSORFLOW_GRAPPLER_COSTS_ROBUST_STATS_H_

View File

@ -0,0 +1,63 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/grappler/costs/robust_stats.h"
#include "tensorflow/core/platform/test.h"
namespace tensorflow {
namespace grappler {
namespace {
class RobustStatsTest : public ::testing::Test {
public:
void SetUp() override {
for (double d = 1.0; d <= 5.0; d += 1.0) {
values1_.push_back(5.0 - d);
values1_.push_back(5.0 + d);
values2_.push_back(25.0 - 2 * d);
values2_.push_back(25.0 + 2 * d);
values3_.push_back(-3.0 - d);
values3_.push_back(-3.0 + d);
}
values1_.push_back(5.0); // Odd # elements, mean is 5.0
values3_.push_back(197.0);
values3_.push_back(-203.0); // Even # elements, mean is -3.0
}
std::vector<double> values1_;
std::vector<double> values2_;
std::vector<double> values3_;
};
TEST_F(RobustStatsTest, Simple) {
RobustStats s1(values1_);
EXPECT_EQ(5.0, s1.mean());
EXPECT_EQ(0.0, s1.lo());
EXPECT_EQ(10.0, s1.hi());
RobustStats s2(values2_);
EXPECT_EQ(25.0, s2.mean());
EXPECT_EQ(15.0, s2.lo());
EXPECT_EQ(35.0, s2.hi());
RobustStats s3(values3_);
EXPECT_EQ(-3.0, s3.mean());
EXPECT_EQ(-203.0, s3.lo());
EXPECT_EQ(197.0, s3.hi());
}
} // namespace
} // namespace grappler
} // namespace tensorflow

View File

@ -0,0 +1,215 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/core/grappler/costs/virtual_scheduler.h"
#include "tensorflow/core/framework/node_def.pb.h"
#include "tensorflow/core/grappler/utils.h"
namespace tensorflow {
namespace grappler {
namespace {
Costs CombineCosts(const Costs& left, const Costs& right) {
CHECK_NE(left.max_memory, kMemoryUnknown);
CHECK_NE(left.max_per_op_buffers, kMemoryUnknown);
CHECK_NE(left.max_per_op_streaming, kMemoryUnknown);
Costs result = left;
result.execution_time += right.execution_time;
if (right.max_memory != kMemoryUnknown) {
result.max_memory += right.max_memory;
}
if (right.max_per_op_buffers != kMemoryUnknown) {
result.max_per_op_buffers =
std::max(left.max_per_op_buffers, right.max_per_op_buffers);
}
if (right.max_per_op_streaming != kMemoryUnknown) {
result.max_per_op_streaming =
std::max(left.max_per_op_streaming, right.max_per_op_streaming);
}
VLOG(2) << "costs execution_time=" << result.execution_time.count()
<< " max_memory=" << result.max_memory
<< " max_per_op_buffers=" << result.max_per_op_buffers
<< " max_per_op_streaming=" << result.max_per_op_streaming;
return result;
}
} // namespace
VirtualScheduler::VirtualScheduler(const GraphDef& graph,
const std::vector<string>& fetch_nodes)
: graph_costs_(Costs::ZeroCosts()),
// TODO(dyoon): Use a better way than FIFO.
ready_nodes_(new FIFOManager()) {
// First, get the nodes that would run to output fetch_nodes.
std::vector<const NodeDef*> nodes =
ComputeTransitiveFanin(graph, fetch_nodes);
// TODO(dyoon): this is a bit inefficient as name_to_node is already built in
// ComputeTransitiveFanin().
std::unordered_map<string, const NodeDef*> name_to_node;
for (const auto& node : graph.node()) {
name_to_node[node.name()] = &node;
}
// Build node_map.
for (const auto* node : nodes) {
auto& node_state = GetNodeStateOrCreateIt(node);
// TODO(dyoon): add SendRecv considering devices and control dependency.
for (const string& input : node->input()) {
const NodeDef* in = name_to_node[NodeName(input)];
CHECK(in);
node_state.inputs.push_back(in);
auto& input_node_state = GetNodeStateOrCreateIt(in);
input_node_state.outputs.push_back(node);
}
if (node->input().empty()) {
node_state.time_ready =
Costs::Duration(); // Node without input: ready at time 0.
ready_nodes_->AddNode(node);
}
}
}
const NodeDef* VirtualScheduler::GetCurrNode() const {
return ready_nodes_->GetCurrNode();
}
NodeState& VirtualScheduler::GetNodeStateOrCreateIt(const NodeDef* node) {
auto it = node_map_.find(node);
if (it == node_map_.end()) {
it = node_map_.emplace(node, NodeState()).first;
}
return it->second;
}
bool VirtualScheduler::MarkCurrNodeExecuted(const Costs& node_costs) {
// Update graph_costs_ and per-op costs.
graph_costs_ = CombineCosts(graph_costs_, node_costs);
const auto* node = GetCurrNode();
const auto& op_name = node->op();
auto it = op_to_cost_.find(op_name);
if (it == op_to_cost_.end()) {
it = op_to_cost_.emplace(op_name, Costs::ZeroCosts()).first;
}
auto& op_cost = it->second;
op_cost = CombineCosts(op_cost, node_costs);
// Update node and device states.
auto& node_state = node_map_[node];
auto& device = device_[node->device()];
device.nodes_executed.push_back(node);
// Node is scheduled when the device is available AND all the inputs are
// ready; hence, time_scheduled is time_ready if time_ready > device curr
// time.
node_state.time_scheduled =
std::max(device.GetCurrTime(), node_state.time_ready);
// Override device curr time with the time_scheduled.
device.device_costs.execution_time = node_state.time_scheduled;
device.device_costs = CombineCosts(device.device_costs, node_costs);
auto curr_time = device.GetCurrTime();
node_state.time_finished = curr_time;
// Update device's per-op cost.
{
auto it = device.op_to_cost.find(op_name);
if (it == device.op_to_cost.end()) {
it = device.op_to_cost.emplace(op_name, Costs::ZeroCosts()).first;
}
auto& op_cost = it->second;
op_cost = CombineCosts(op_cost, node_costs);
VLOG(2) << "Op scheduled -- name: " << node->name()
<< ", op: " << node->op() << ", device: " << node->device()
<< ", ready: " << node_state.time_ready.count()
<< ", scheduled: " << node_state.time_scheduled.count()
<< ", finished: " << node_state.time_finished.count();
// Increment num_inputs_ready of the output nodes.
for (auto* output : node_state.outputs) {
auto& output_state = node_map_[output];
output_state.num_inputs_ready++;
if (output_state.num_inputs_ready == output_state.inputs.size()) {
// This output node is now ready.
output_state.time_ready = curr_time;
ready_nodes_->AddNode(output);
}
}
// Increment num_outputs_executed of the input nodes.
for (auto* input : node_state.inputs) {
auto& input_state = node_map_[input];
input_state.num_outputs_executed++;
if (input_state.num_outputs_executed == input_state.outputs.size()) {
// All the outputs are executed; no reference to this input nodel
input_state.time_no_reference = curr_time;
// TODO(dyoon): collect device memory usage; note that this input node
// use device memory between time_scheduled and time_no_reference.
}
}
}
// Remove the current node; assume FIFO.
ready_nodes_->RemoveCurrNode();
return !ready_nodes_->Empty(); // True if not empty.
}
Costs VirtualScheduler::Summary() const {
// Print out basic execution summary.
VLOG(1) << "Expected execution time: " << graph_costs_.execution_time.count();
VLOG(1) << "Expected max memory: " << graph_costs_.max_memory;
VLOG(1) << "Expected max per-op buffers: " << graph_costs_.max_per_op_buffers;
VLOG(1) << "Expected max per-op streaming buffers: "
<< graph_costs_.max_per_op_streaming;
VLOG(1) << "Per-op execution time:";
for (const auto& op_cost_pair : op_to_cost_) {
const auto& op = op_cost_pair.first;
const auto& cost = op_cost_pair.second.execution_time.count();
if (cost) { // Skip printing out zero-cost ops.
VLOG(1) << " + " << op << " : " << cost;
}
}
// Print per device summary
VLOG(1) << "Devices:";
Costs critical_path_costs = Costs::ZeroCosts();
for (const auto& device : device_) {
const auto& name = device.first;
const auto& state = device.second;
VLOG(1) << "Device = " << name
<< ", num_nodes = " << state.nodes_executed.size()
<< ", execution_time = " << state.GetCurrTime().count();
VLOG(1) << "Per-op execution time:";
for (const auto& op_cost_pair : state.op_to_cost) {
const auto& op = op_cost_pair.first;
const auto& cost = op_cost_pair.second.execution_time.count();
if (cost) { // Skip printing out zero-cost ops.
VLOG(1) << " + " << op << " : " << cost;
}
}
if (critical_path_costs.execution_time <= state.GetCurrTime()) {
critical_path_costs = state.device_costs;
}
}
VLOG(1) << "Critical path execution time: "
<< critical_path_costs.execution_time.count();
return critical_path_costs;
}
} // end namespace grappler
} // end namespace tensorflow

View File

@ -0,0 +1,116 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef THIRD_PARTY_TENSORFLOW_CORE_GRAPPLER_COSTS_VIRTUAL_SCHEDULER_H_
#define THIRD_PARTY_TENSORFLOW_CORE_GRAPPLER_COSTS_VIRTUAL_SCHEDULER_H_
#include <list>
#include <memory>
#include <unordered_map>
#include "tensorflow/core/grappler/costs/cost_estimator.h"
#include "tensorflow/core/grappler/grappler_item.h"
namespace tensorflow {
namespace grappler {
namespace {
struct NodeState {
std::vector<const NodeDef*> inputs;
std::vector<const NodeDef*> outputs;
int num_inputs_ready;
int num_outputs_executed;
Costs::Duration time_ready;
Costs::Duration time_scheduled;
Costs::Duration time_finished;
Costs::Duration time_no_reference;
// Node will be ready to be executed at time_ready, scheduled at
// time_scheduled, and finishes execution at time_finished.
// Between time_scheduled and time_no_reference, the node's output tensor
// needs to be on the device, using up device memory.
NodeState() {
num_inputs_ready = 0;
num_outputs_executed = 0;
time_ready = Costs::Duration::max();
time_scheduled = Costs::Duration::max();
time_finished = Costs::Duration::max();
time_no_reference = Costs::Duration::max();
}
};
struct DeviceState {
std::vector<const NodeDef*> nodes_executed;
Costs device_costs;
std::map<string, Costs> op_to_cost; // Per-op cost.
DeviceState() { device_costs = Costs::ZeroCosts(); }
Costs::Duration GetCurrTime() const { return device_costs.execution_time; }
};
// ReadyNodeManager (abstract class):
// Keeps ready nodes and picks the best one to be scheduled.
class ReadyNodeManager {
public:
ReadyNodeManager() {}
virtual ~ReadyNodeManager() {}
virtual void AddNode(const NodeDef* node) = 0;
virtual const NodeDef* GetCurrNode() const = 0;
virtual void RemoveCurrNode() = 0;
virtual bool Empty() const = 0;
};
class FIFOManager : public ReadyNodeManager {
public:
FIFOManager() : ReadyNodeManager() {}
~FIFOManager() override {}
void AddNode(const NodeDef* node) override { nodes_.push_back(node); }
const NodeDef* GetCurrNode() const override { return nodes_.front(); }
void RemoveCurrNode() override { nodes_.pop_front(); }
bool Empty() const override { return nodes_.empty(); }
private:
std::list<const NodeDef*> nodes_;
};
} // namespace
// The virtual scheduler emulates execution of nodes in a graph, considering
// dependencies, device, etc.
class VirtualScheduler {
public:
VirtualScheduler(const GraphDef& graph,
const std::vector<string>& fetch_nodes);
const NodeDef* GetCurrNode() const;
bool MarkCurrNodeExecuted(const Costs& node_costs);
Costs Summary() const;
private:
NodeState& GetNodeStateOrCreateIt(const NodeDef* node);
Costs graph_costs_; // Graph cost.
std::map<string, Costs> op_to_cost_; // Per-op cost.
std::unique_ptr<ReadyNodeManager> ready_nodes_;
std::unordered_map<const NodeDef*, NodeState> node_map_;
std::unordered_map<string, DeviceState> device_;
};
} // namespace grappler
} // end namespace tensorflow
#endif // THIRD_PARTY_TENSORFLOW_CORE_GRAPPLER_COSTS_VIRTUAL_SCHEDULER_H_

View File

@ -2109,7 +2109,9 @@ tf_kernel_library(
tf_kernel_library(
name = "matrix_triangular_solve_op",
prefix = "matrix_triangular_solve_op",
deps = LINALG_DEPS,
deps = LINALG_DEPS + if_cuda([
"//tensorflow/core/platform/default/build_config:cublas_plugin",
]),
)
tf_kernel_library(
@ -2350,6 +2352,8 @@ tf_kernel_library(
"//conditions:default": [],
}) + if_mkl([
"//third_party/mkl:intel_binary_blob",
]) + if_cuda([
"//tensorflow/core/platform/default/build_config:cublas_plugin",
]),
)
@ -2630,6 +2634,7 @@ tf_kernel_library(
],
"//conditions:default": [],
}) + if_cuda([
"//tensorflow/core/platform/default/build_config:cublas_plugin",
"//tensorflow/core/platform/default/build_config:cudnn_plugin",
]),
)

View File

@ -328,12 +328,16 @@ The downside is that all the weights read are from the previous training step.
So it is a different algorithm from SGD. But it is possible to improve its
convergence by adjusting learning rate and other hyperparameters.
### Executing the script
## Executing the script
This section lists the core command line arguments and a few basic examples for
executing the main script
([tf_cnn_benchmarks.py](https://github.com/tensorflow/benchmarks/tree/master/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py)).
> Note: `tf_cnn_benchmarks.py` uses the config `force_gpu_compatible`,
> which was introduced after TensorFlow 1.1. Until TensorFlow 1.2 is released
> building from source is advised.
#### Base command line arguments
* **`model`**: Model to use, e.g. `resnet50`, `inception3`, `vgg16`, and

View File

@ -139,6 +139,82 @@ from tensorflow.python.platform import tf_logging as logging
from tensorflow.python.util import nest
def make_input_layer(features,
feature_columns,
weight_collections=None,
trainable=True):
"""Returns a dense `Tensor` as input layer based on given `feature_columns`.
Generally a single example in training data is described with FeatureColumns.
At the first layer of the model, this column oriented data should be converted
to a single `Tensor`.
Example:
```python
price = numeric_column('price')
keywords_embedded = embedding_column(
categorical_column_with_hash_bucket("keywords", 10K), dimensions=16)
all_feature_columns = [price, keywords_embedded, ...]
dense_tensor = make_input_layer(features, all_feature_columns)
for units in [128, 64, 32]:
dense_tensor = tf.layers.dense(dense_tensor, units, tf.nn.relu)
prediction = tf.layers.dense(dense_tensor, 1)
```
Args:
features: A mapping from key to tensors. `FeatureColumn`s look up via these
keys. For example `numeric_column('price') will look at 'price' key in
this dict. Values can be a `SparseTensor` or a `Tensor` depends on
corresponding `FeatureColumn`.
feature_columns: An iterable containing all the `FeatureColumn`s. All items
should be instances of classes derived from `_DenseColumn` such as
`numeric_column`, `embedding_column`, `bucketized_column`,
`indicator_column`. If you have categorical features, you can wrap them
with with an `embedding_column` or `indicator_column`.
weight_collections: A list of collection names to which the Variable will be
added. Note that, variables will also be added to collections
`tf.GraphKeys.GLOBAL_VARIABLES` and `ops.GraphKeys.MODEL_VARIABLES`.
trainable: If `True` also add the variable to the graph collection
`GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`).
Returns:
A `Tensor` which represents input layer of a model. Its shape
is (batch_size, first_layer_dimension) and its dtype is `float32`.
first_layer_dimension is determined based on given `feature_columns`.
Raises:
ValueError: if an item in `feature_columns` is not a `_DenseColumn`.
"""
_check_feature_columns(feature_columns)
for column in feature_columns:
if not isinstance(column, _DenseColumn):
raise ValueError(
'Items of feature_columns must be a _DenseColumn. '
'You can wrap a categorical column with an '
'embedding_column or indicator_column. Given: {}'.format(column))
weight_collections = list(weight_collections or [])
if ops.GraphKeys.GLOBAL_VARIABLES not in weight_collections:
weight_collections.append(ops.GraphKeys.GLOBAL_VARIABLES)
if ops.GraphKeys.MODEL_VARIABLES not in weight_collections:
weight_collections.append(ops.GraphKeys.MODEL_VARIABLES)
with variable_scope.variable_scope(
None, default_name='make_input_layer', values=features.values()):
builder = _LazyBuilder(features)
output_tensors = []
for column in sorted(feature_columns, key=lambda x: x.name):
with variable_scope.variable_scope(None, default_name=column.name):
tensor = column._get_dense_tensor( # pylint: disable=protected-access
builder,
weight_collections=weight_collections,
trainable=trainable)
num_elements = column._variable_shape.num_elements() # pylint: disable=protected-access
batch_size = array_ops.shape(tensor)[0]
tensor = array_ops.reshape(tensor, shape=(batch_size, num_elements))
output_tensors.append(tensor)
return array_ops.concat(output_tensors, 1)
def make_linear_model(features,
feature_columns,
units=1,
@ -156,10 +232,21 @@ def make_linear_model(features,
while `make_input_layer` explicitly requires wrapping each of them with an
`embedding_column` or an `indicator_column`.
Example:
```python
price = numeric_column('price')
price_buckets = bucketized_column(price, boundaries=[0., 10., 100., 1000.])
keywords = categorical_column_with_hash_bucket("keywords", 10K)
all_feature_columns = [price_buckets, keywords, ...]
prediction = make_linear_model(features, all_feature_columns)
```
Args:
features: A mapping from key to tensors. 'string' key means a base feature.
It can have `_FeatureColumn` as a key too. That means that FeatureColumn
is already transformed by the input pipeline.
features: A mapping from key to tensors. `FeatureColumn`s look up via these
keys. For example `numeric_column('price')` will look at 'price' key in
this dict. Values are `Tensor` or `SparseTensor` depending on
corresponding `FeatureColumn`.
feature_columns: An iterable containing all the FeatureColumns. All items
should be instances of classes derived from FeatureColumn.
units: units: An integer, dimensionality of the output space. Default
@ -191,9 +278,10 @@ def make_linear_model(features,
raise ValueError('Items of feature_columns must be either a _DenseColumn '
'or _CategoricalColumn. Given: {}'.format(column))
weight_collections = list(weight_collections or [])
weight_collections += [
ops.GraphKeys.GLOBAL_VARIABLES, ops.GraphKeys.MODEL_VARIABLES
]
if ops.GraphKeys.GLOBAL_VARIABLES not in weight_collections:
weight_collections.append(ops.GraphKeys.GLOBAL_VARIABLES)
if ops.GraphKeys.MODEL_VARIABLES not in weight_collections:
weight_collections.append(ops.GraphKeys.MODEL_VARIABLES)
with variable_scope.variable_scope(
None, default_name='make_linear_model', values=features.values()):
weigthed_sums = []
@ -228,7 +316,8 @@ def numeric_column(key,
normalizer_fn=None):
"""Represents real valued or numerical features.
An example:
Example:
```python
price = numeric_column('price')
all_feature_columns = [price, ...]
@ -298,7 +387,8 @@ def bucketized_column(source_column, boundaries):
`boundaries=[0., 1., 2.]` generates buckets `(-inf, 0.)`, `[0., 1.)`,
`[1., 2.)`, and `[2., +inf)`.
An example:
Example:
```python
price = numeric_column('price')
bucketized_price = bucketized_column(price, boundaries=[...])
@ -349,7 +439,8 @@ def categorical_column_with_hash_bucket(key,
want to distribute your inputs into a finite number of buckets by hashing.
output_id = Hash(input_feature_string) % bucket_size
An example:
Example:
```python
keywords = categorical_column_with_hash_bucket("keywords", 10K)
all_feature_columns = [keywords, ...]
@ -471,7 +562,7 @@ class _DenseColumn(_FeatureColumn):
@abc.abstractproperty
def _variable_shape(self):
"""Returns shape of variable which is compatible with _get_dense_tensor."""
"""Returns a `TensorShape` of variable compatible with _get_dense_tensor."""
pass
@abc.abstractmethod
@ -480,6 +571,7 @@ class _DenseColumn(_FeatureColumn):
The output of this function will be used by model-buildier-functions. For
example the pseudo code of `make_input_layer` will be like that:
```python
def make_input_layer(features, feature_columns, ...):
outputs = [fc._get_dense_tensor(...) for fc in feature_columns]
@ -503,7 +595,7 @@ def _create_dense_column_weighted_sum(
builder,
weight_collections=weight_collections,
trainable=trainable)
num_elements = tensor_shape.TensorShape(column._variable_shape).num_elements() # pylint: disable=protected-access
num_elements = column._variable_shape.num_elements() # pylint: disable=protected-access
batch_size = array_ops.shape(tensor)[0]
tensor = array_ops.reshape(tensor, shape=(batch_size, num_elements))
weight = variable_scope.get_variable(
@ -615,12 +707,15 @@ class _LazyBuilder(object):
"""Creates a `_LazyBuilder`.
Args:
features: A mapping from feature column to tensors. A `string` key
features: A mapping from feature column to objects that are `Tensor` or
`SparseTensor`, or can be converted to same via
`sparse_tensor.convert_to_tensor_or_sparse_tensor`. A `string` key
signifies a base feature (not-transformed). A `FeatureColumn` key
means that this `Tensor` is the output of an existing `FeatureColumn`
which can be reused.
"""
self._columns_to_tensors = features.copy()
self._features = features.copy()
self._feature_tensors = {}
def get(self, key):
"""Returns a `Tensor` for the given key.
@ -640,9 +735,16 @@ class _LazyBuilder(object):
ValueError: if key is not found or a transformed `Tensor` cannot be
computed.
"""
if key in self._columns_to_tensors:
# Feature_column is already transformed or it's a raw feature.
return self._columns_to_tensors[key]
if key in self._feature_tensors:
# FeatureColumn is already transformed or converted.
return self._feature_tensors[key]
if key in self._features:
# FeatureColumn is a raw feature.
feature_tensor = sparse_tensor_lib.convert_to_tensor_or_sparse_tensor(
self._features[key])
self._feature_tensors[key] = feature_tensor
return feature_tensor
if not isinstance(key, (str, _FeatureColumn)):
raise TypeError('"key" must be either a "str" or "_FeatureColumn". '
@ -653,11 +755,13 @@ class _LazyBuilder(object):
column = key
logging.debug('Transforming feature_column %s.', column)
transformed = column._transform_feature(self) # pylint: disable=protected-access
# pylint: disable=protected-access
transformed = column._transform_feature(self)
# pylint: enable=protected-access
if transformed is None:
raise ValueError('Column {} is not supported.'.format(column.name))
self._columns_to_tensors[column] = transformed
return self._columns_to_tensors[column]
self._feature_tensors[column] = transformed
return transformed
def _check_feature_columns(feature_columns):
@ -709,7 +813,7 @@ class _NumericColumn(_DenseColumn,
@property
def _variable_shape(self):
return self.shape
return tensor_shape.TensorShape(self.shape)
def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None):
del weight_collections
@ -738,7 +842,8 @@ class _BucketizedColumn(_DenseColumn, _CategoricalColumn,
@property
def _variable_shape(self):
return tuple(self.source_column.shape) + (len(self.boundaries) + 1,)
return tensor_shape.TensorShape(
tuple(self.source_column.shape) + (len(self.boundaries) + 1,))
def _get_dense_tensor(self, inputs, weight_collections=None, trainable=None):
del weight_collections

View File

@ -65,7 +65,7 @@ class LazyColumnTest(test.TestCase):
def _parse_example_config(self):
pass
builder = fc._LazyBuilder(features={'a': constant_op.constant([[2], [3.]])})
builder = fc._LazyBuilder(features={'a': [[2], [3.]]})
column = TransformCounter()
self.assertEqual(0, column.num_transform)
builder.get(column)
@ -88,7 +88,7 @@ class LazyColumnTest(test.TestCase):
def _parse_example_config(self):
pass
builder = fc._LazyBuilder(features={'a': constant_op.constant([[2], [3.]])})
builder = fc._LazyBuilder(features={'a': [[2], [3.]]})
column = Transformer()
self.assertEqual('Output', builder.get(column))
self.assertEqual('Output', builder.get(column))
@ -108,13 +108,13 @@ class LazyColumnTest(test.TestCase):
def _parse_example_config(self):
pass
features = {'a': constant_op.constant([[2], [3.]])}
features = {'a': [[2], [3.]]}
builder = fc._LazyBuilder(features=features)
builder.get(Transformer())
self.assertEqual(['a'], list(features.keys()))
def test_error_if_feature_is_not_found(self):
builder = fc._LazyBuilder(features={'a': constant_op.constant([[2], [3.]])})
builder = fc._LazyBuilder(features={'a': [[2], [3.]]})
with self.assertRaisesRegexp(ValueError,
'bbb is not in features dictionary'):
builder.get('bbb')
@ -135,7 +135,7 @@ class LazyColumnTest(test.TestCase):
def _parse_example_config(self):
pass
builder = fc._LazyBuilder(features={'a': constant_op.constant([[2], [3.]])})
builder = fc._LazyBuilder(features={'a': [[2], [3.]]})
with self.assertRaisesRegexp(ValueError,
'NotAProperColumn is not supported'):
builder.get(NotAProperColumn())
@ -145,7 +145,7 @@ class LazyColumnTest(test.TestCase):
class NotAFeatureColumn(object):
pass
builder = fc._LazyBuilder(features={'a': constant_op.constant([[2], [3.]])})
builder = fc._LazyBuilder(features={'a': [[2], [3.]]})
with self.assertRaisesRegexp(
TypeError, '"key" must be either a "str" or "_FeatureColumn".'):
builder.get(NotAFeatureColumn())
@ -273,7 +273,7 @@ class NumericColumnTest(test.TestCase):
price = fc.numeric_column('price', shape=[2], normalizer_fn=_increment_two)
builder = fc._LazyBuilder({
'price': constant_op.constant([[1., 2.], [5., 6.]])
'price': [[1., 2.], [5., 6.]]
})
output = builder.get(price)
with self.test_session():
@ -286,7 +286,7 @@ class NumericColumnTest(test.TestCase):
price = fc.numeric_column('price', shape=[2], normalizer_fn=_increment_two)
builder = fc._LazyBuilder({
'price': constant_op.constant([[1., 2.], [5., 6.]])
'price': [[1., 2.], [5., 6.]]
})
self.assertEqual(builder.get(price), price._get_dense_tensor(builder))
@ -315,7 +315,7 @@ class NumericColumnTest(test.TestCase):
def test_make_linear_model(self):
price = fc.numeric_column('price')
with ops.Graph().as_default():
features = {'price': constant_op.constant([[1.], [5.]])}
features = {'price': [[1.], [5.]]}
predictions = fc.make_linear_model(features, [price])
bias = get_linear_model_bias()
price_var = get_linear_model_column_var(price)
@ -402,7 +402,7 @@ class BucketizedColumnTest(test.TestCase):
bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
with ops.Graph().as_default():
builder = fc._LazyBuilder({
'price': constant_op.constant([[-1., 1.], [5., 6.]])
'price': [[-1., 1.], [5., 6.]]
})
transformed_tensor = builder.get(bucketized_price)
with _initialized_session():
@ -414,7 +414,7 @@ class BucketizedColumnTest(test.TestCase):
bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
with ops.Graph().as_default():
builder = fc._LazyBuilder({
'price': constant_op.constant([[-1.], [1.], [5.], [6.]])
'price': [[-1.], [1.], [5.], [6.]]
})
with _initialized_session():
bucketized_price_tensor = bucketized_price._get_dense_tensor(builder)
@ -432,7 +432,7 @@ class BucketizedColumnTest(test.TestCase):
bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
with ops.Graph().as_default():
builder = fc._LazyBuilder({
'price': constant_op.constant([[-1., 1.], [5., 6.]])
'price': [[-1., 1.], [5., 6.]]
})
with _initialized_session():
bucketized_price_tensor = bucketized_price._get_dense_tensor(builder)
@ -448,7 +448,7 @@ class BucketizedColumnTest(test.TestCase):
bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
with ops.Graph().as_default():
builder = fc._LazyBuilder({
'price': constant_op.constant([[-1.], [1.], [5.], [6.]])
'price': [[-1.], [1.], [5.], [6.]]
})
with _initialized_session() as sess:
id_weight_pair = bucketized_price._get_sparse_tensors(builder)
@ -465,7 +465,7 @@ class BucketizedColumnTest(test.TestCase):
bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
with ops.Graph().as_default():
builder = fc._LazyBuilder({
'price': constant_op.constant([[-1., 1.], [5., 6.]])
'price': [[-1., 1.], [5., 6.]]
})
with _initialized_session() as sess:
id_weight_pair = bucketized_price._get_sparse_tensors(builder)
@ -502,7 +502,7 @@ class BucketizedColumnTest(test.TestCase):
price = fc.numeric_column('price', shape=[1])
bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
with ops.Graph().as_default():
features = {'price': constant_op.constant([[-1.], [1.], [5.], [6.]])}
features = {'price': [[-1.], [1.], [5.], [6.]]}
predictions = fc.make_linear_model(features, [bucketized_price])
bias = get_linear_model_bias()
bucketized_price_var = get_linear_model_column_var(bucketized_price)
@ -527,7 +527,7 @@ class BucketizedColumnTest(test.TestCase):
price = fc.numeric_column('price', shape=[2])
bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
with ops.Graph().as_default():
features = {'price': constant_op.constant([[-1., 1.], [5., 6.]])}
features = {'price': [[-1., 1.], [5., 6.]]}
predictions = fc.make_linear_model(features, [bucketized_price])
bias = get_linear_model_bias()
bucketized_price_var = get_linear_model_column_var(bucketized_price)
@ -621,15 +621,15 @@ class SparseColumnHashedTest(test.TestCase):
float_fc = fc.categorical_column_with_hash_bucket(
'a_float', 10, dtype=dtypes.string)
int_tensor = sparse_tensor.SparseTensor(
values=constant_op.constant([101]),
values=[101],
indices=[[0, 0]],
dense_shape=[1, 1])
string_tensor = sparse_tensor.SparseTensor(
values=constant_op.constant(['101']),
values=['101'],
indices=[[0, 0]],
dense_shape=[1, 1])
float_tensor = sparse_tensor.SparseTensor(
values=constant_op.constant([101.]),
values=[101.],
indices=[[0, 0]],
dense_shape=[1, 1])
builder = fc._LazyBuilder({
@ -745,7 +745,7 @@ class MakeLinearModelTest(test.TestCase):
def test_dense_bias(self):
price = fc.numeric_column('price')
with ops.Graph().as_default():
features = {'price': constant_op.constant([[1.], [5.]])}
features = {'price': [[1.], [5.]]}
predictions = fc.make_linear_model(features, [price])
bias = get_linear_model_bias()
price_var = get_linear_model_column_var(price)
@ -848,7 +848,7 @@ class MakeLinearModelTest(test.TestCase):
def test_dense_multi_output(self):
price = fc.numeric_column('price')
with ops.Graph().as_default():
features = {'price': constant_op.constant([[1.], [5.]])}
features = {'price': [[1.], [5.]]}
predictions = fc.make_linear_model(features, [price], units=3)
bias = get_linear_model_bias()
price_var = get_linear_model_column_var(price)
@ -885,7 +885,7 @@ class MakeLinearModelTest(test.TestCase):
def test_dense_multi_dimension(self):
price = fc.numeric_column('price', shape=2)
with ops.Graph().as_default():
features = {'price': constant_op.constant([[1., 2.], [5., 6.]])}
features = {'price': [[1., 2.], [5., 6.]]}
predictions = fc.make_linear_model(features, [price])
price_var = get_linear_model_column_var(price)
with _initialized_session() as sess:
@ -913,7 +913,7 @@ class MakeLinearModelTest(test.TestCase):
def test_dense_multi_dimension_multi_output(self):
price = fc.numeric_column('price', shape=2)
with ops.Graph().as_default():
features = {'price': constant_op.constant([[1., 2.], [5., 6.]])}
features = {'price': [[1., 2.], [5., 6.]]}
predictions = fc.make_linear_model(features, [price], units=3)
bias = get_linear_model_bias()
price_var = get_linear_model_column_var(price)
@ -928,7 +928,7 @@ class MakeLinearModelTest(test.TestCase):
def test_raises_if_shape_mismatch(self):
price = fc.numeric_column('price', shape=2)
with ops.Graph().as_default():
features = {'price': constant_op.constant([[1.], [5.]])}
features = {'price': [[1.], [5.]]}
predictions = fc.make_linear_model(features, [price])
with _initialized_session():
with self.assertRaisesRegexp(Exception, 'requested shape has 4'):
@ -937,7 +937,7 @@ class MakeLinearModelTest(test.TestCase):
def test_dense_reshaping(self):
price = fc.numeric_column('price', shape=[1, 2])
with ops.Graph().as_default():
features = {'price': constant_op.constant([[[1., 2.]], [[5., 6.]]])}
features = {'price': [[[1., 2.]], [[5., 6.]]]}
predictions = fc.make_linear_model(features, [price])
bias = get_linear_model_bias()
price_var = get_linear_model_column_var(price)
@ -953,8 +953,8 @@ class MakeLinearModelTest(test.TestCase):
price2 = fc.numeric_column('price2')
with ops.Graph().as_default():
features = {
'price1': constant_op.constant([[1., 2.], [5., 6.]]),
'price2': constant_op.constant([[3.], [4.]])
'price1': [[1., 2.], [5., 6.]],
'price2': [[3.], [4.]]
}
predictions = fc.make_linear_model(features, [price1, price2])
bias = get_linear_model_bias()
@ -973,7 +973,7 @@ class MakeLinearModelTest(test.TestCase):
def test_dense_collection(self):
price = fc.numeric_column('price')
with ops.Graph().as_default() as g:
features = {'price': constant_op.constant([[1.], [5.]])}
features = {'price': [[1.], [5.]]}
fc.make_linear_model(features, [price], weight_collections=['my-vars'])
my_vars = g.get_collection('my-vars')
bias = get_linear_model_bias()
@ -998,7 +998,7 @@ class MakeLinearModelTest(test.TestCase):
def test_dense_trainable_default(self):
price = fc.numeric_column('price')
with ops.Graph().as_default() as g:
features = {'price': constant_op.constant([[1.], [5.]])}
features = {'price': [[1.], [5.]]}
fc.make_linear_model(features, [price])
bias = get_linear_model_bias()
price_var = get_linear_model_column_var(price)
@ -1022,7 +1022,7 @@ class MakeLinearModelTest(test.TestCase):
def test_dense_trainable_false(self):
price = fc.numeric_column('price')
with ops.Graph().as_default() as g:
features = {'price': constant_op.constant([[1.], [5.]])}
features = {'price': [[1.], [5.]]}
fc.make_linear_model(features, [price], trainable=False)
trainable_vars = g.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)
self.assertEqual([], trainable_vars)
@ -1074,5 +1074,89 @@ class MakeLinearModelTest(test.TestCase):
self.assertIn('wire_cast', my_vars[2].name)
class MakeInputLayerTest(test.TestCase):
def test_should_be_dense_column(self):
with self.assertRaisesRegexp(ValueError, 'must be a _DenseColumn'):
fc.make_input_layer(
features={'a': [[0]]},
feature_columns=[
fc.categorical_column_with_hash_bucket('wire_cast', 4)
])
def test_does_not_support_dict_columns(self):
with self.assertRaisesRegexp(
ValueError, 'Expected feature_columns to be iterable, found dict.'):
fc.make_input_layer(
features={'a': [[0]]}, feature_columns={'a': fc.numeric_column('a')})
def test_raises_if_duplicate_name(self):
with self.assertRaisesRegexp(
ValueError, 'Duplicate feature column name found for columns'):
fc.make_input_layer(
features={'a': [[0]]},
feature_columns=[fc.numeric_column('a'),
fc.numeric_column('a')])
def test_one_column(self):
price = fc.numeric_column('price')
with ops.Graph().as_default():
features = {'price': [[1.], [5.]]}
net = fc.make_input_layer(features, [price])
with _initialized_session():
self.assertAllClose([[1.], [5.]], net.eval())
def test_multi_dimension(self):
price = fc.numeric_column('price', shape=2)
with ops.Graph().as_default():
features = {'price': [[1., 2.], [5., 6.]]}
net = fc.make_input_layer(features, [price])
with _initialized_session():
self.assertAllClose([[1., 2.], [5., 6.]], net.eval())
def test_raises_if_shape_mismatch(self):
price = fc.numeric_column('price', shape=2)
with ops.Graph().as_default():
features = {'price': [[1.], [5.]]}
net = fc.make_input_layer(features, [price])
with _initialized_session():
with self.assertRaisesRegexp(Exception, 'requested shape has 4'):
net.eval()
def test_reshaping(self):
price = fc.numeric_column('price', shape=[1, 2])
with ops.Graph().as_default():
features = {'price': [[[1., 2.]], [[5., 6.]]]}
net = fc.make_input_layer(features, [price])
with _initialized_session():
self.assertAllClose([[1., 2.], [5., 6.]], net.eval())
def test_multi_column(self):
price1 = fc.numeric_column('price1', shape=2)
price2 = fc.numeric_column('price2')
with ops.Graph().as_default():
features = {
'price1': [[1., 2.], [5., 6.]],
'price2': [[3.], [4.]]
}
net = fc.make_input_layer(features, [price1, price2])
with _initialized_session():
self.assertAllClose([[1., 2., 3.], [5., 6., 4.]], net.eval())
def test_column_order(self):
price_a = fc.numeric_column('price_a')
price_b = fc.numeric_column('price_b')
with ops.Graph().as_default():
features = {
'price_a': [[1.]],
'price_b': [[3.]],
}
net1 = fc.make_input_layer(features, [price_a, price_b])
net2 = fc.make_input_layer(features, [price_b, price_a])
with _initialized_session():
self.assertAllClose([[1., 3.]], net1.eval())
self.assertAllClose([[1., 3.]], net2.eval())
if __name__ == '__main__':
test.main()

View File

@ -774,6 +774,11 @@ class VariableScopeTest(test.TestCase):
self.assertEqual([v.name
for v in scope.global_variables()], ["foo/b:0"])
def testGetVariableWithRefDtype(self):
v = variable_scope.get_variable("v", shape=[3, 4], dtype=dtypes.float32)
# Ensure it is possible to do get_variable with a _ref dtype passed in.
_ = variable_scope.get_variable("w", shape=[5, 6], dtype=v.dtype)
def axis0_into1_partitioner(shape=None, **unused_kwargs):
part = [1] * len(shape)

View File

@ -280,6 +280,17 @@ class _VariableStore(object):
raise ValueError(
"Passed a custom_getter which is not callable: %s" % custom_getter)
# If a *_ref type is passed in an error would be triggered further down the
# stack. We prevent this using base_dtype to get a non-ref version of the
# type, before doing anything else. When _ref types are removed in favour of
# resources, this line can be removed.
try:
dtype = dtype.base_dtype
except AttributeError:
# .base_dtype not existing means that we will try and use the raw dtype
# which was passed in - this might be a NumPy type which is valid.
pass
# This is the main logic of get_variable. However, custom_getter
# may override this logic. So we save it as a callable and pass
# it to custom_getter.

View File

@ -994,7 +994,7 @@ class SVSummaryThread(coordinator.LooperThread):
summary_strs = self._sess.run(self._sv.summary_op)
global_step = None
if self._sv.summary_writer:
logging.info("Recording summary at step %d.", global_step)
logging.info("Recording summary at step %s.", global_step)
self._sv.summary_writer.add_summary(summary_strs, global_step)

View File

@ -227,7 +227,7 @@ string ToString(CUresult result) {
// created by StreamExecutor (to ensure that the CUDA runtime didn't create a
// context behind our backs).
CUcontext CurrentContext() {
CUcontext current = CUDADriver::CurrentContextOrDie();
CUcontext current = CUDADriver::CurrentContextOrDie();
if (current != nullptr && !CreatedContexts::Has(current)) {
LOG(FATAL) << "current context was not created by the StreamExecutor "
"cuda_driver API: "
@ -480,27 +480,56 @@ bool DeviceOptionsToContextFlags(DeviceOptions device_options, int *flags) {
CUdevice device, DeviceOptions device_options, CudaContext** context) {
*context = nullptr;
CUcontext former_context = CurrentContext();
if (former_context != nullptr) {
LOG(WARNING) << "creating context when one is currently active; existing: "
<< former_context;
}
int flags = 0;
if (!DeviceOptionsToContextFlags(device_options, &flags)) {
LOG(WARNING) << "could not convert all device options into context flags";
}
CUresult res;
CUcontext former_context;
CUcontext new_context;
{
// TODO(leary) Need to see if NVIDIA can expunge the leakiness in their
// context creation: see http://b/13248943
#if CUDA_VERSION >= 7000
res = cuDevicePrimaryCtxSetFlags(device, flags);
{
unsigned int former_primary_context_flags;
int former_primary_context_is_active;
CHECK_EQ(CUDA_SUCCESS,
cuDevicePrimaryCtxGetState(device, &former_primary_context_flags,
&former_primary_context_is_active));
if (former_primary_context_flags != flags) {
if (former_primary_context_is_active) {
LOG(ERROR)
<< "The primary context is active and has a different flag set ("
<< former_primary_context_flags << ") than the desired flag set ("
<< flags << ").";
} else {
CHECK_EQ(CUDA_SUCCESS, cuDevicePrimaryCtxSetFlags(device, flags));
}
}
}
former_context = CUDADriver::CurrentContextOrDie();
res = cuDevicePrimaryCtxRetain(&new_context, device);
if (former_context != nullptr) {
if (former_context == new_context) {
VLOG(2) << "The primary context " << former_context
<< " exists before initializing the StreamExecutor.";
} else {
LOG(WARNING) << "A non-primary context " << former_context
<< " exists before initializing the StreamExecutor. We "
"haven't verified StreamExecutor works with that.";
}
}
#else
former_context = CurrentContext();
if (former_context != nullptr) {
LOG(WARNING)
<< "creating context when one is currently active; existing: "
<< former_context;
}
res = cuCtxCreate(&new_context, flags, device);
#endif
}