diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 20e5c573c6b..5b6a18b6a69 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -296,6 +296,7 @@ filegroup( "//tensorflow/contrib/ffmpeg/default:all_files", "//tensorflow/contrib/framework:all_files", "//tensorflow/contrib/fused_conv:all_files", + "//tensorflow/contrib/gan:all_files", "//tensorflow/contrib/graph_editor:all_files", "//tensorflow/contrib/grid_rnn:all_files", "//tensorflow/contrib/hooks:all_files", @@ -323,6 +324,7 @@ filegroup( "//tensorflow/contrib/nn:all_files", "//tensorflow/contrib/opt:all_files", "//tensorflow/contrib/predictor:all_files", + "//tensorflow/contrib/receptive_field:all_files", "//tensorflow/contrib/reduce_slice_ops:all_files", "//tensorflow/contrib/remote_fused_graph/pylib:all_files", "//tensorflow/contrib/resampler:all_files", @@ -342,6 +344,7 @@ filegroup( "//tensorflow/contrib/staging:all_files", "//tensorflow/contrib/stat_summarizer:all_files", "//tensorflow/contrib/stateless:all_files", + "//tensorflow/contrib/summary:all_files", "//tensorflow/contrib/tensor_forest:all_files", "//tensorflow/contrib/tensor_forest/hybrid:all_files", "//tensorflow/contrib/tensor_forest/kernels/v4:all_files", diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD index 604dfab148b..1822e235eba 100644 --- a/tensorflow/c/BUILD +++ b/tensorflow/c/BUILD @@ -45,8 +45,13 @@ tf_cuda_library( tf_cuda_library( name = "c_api", - srcs = ["c_api.cc"], - hdrs = ["c_api.h"], + srcs = [ + "c_api.cc", + "c_api_function.cc", + ], + hdrs = [ + "c_api.h", + ], copts = tf_copts(), visibility = ["//visibility:public"], deps = select({ @@ -157,6 +162,21 @@ tf_cc_test( ], ) +tf_cc_test( + name = "c_api_function_test", + size = "small", + srcs = ["c_api_function_test.cc"], + deps = [ + ":c_api", + ":c_test_util", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + tf_cc_test( name = "while_loop_test", size = "small", diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc index 07c8277a6f2..c454c94249b 100644 --- a/tensorflow/c/c_api.cc +++ b/tensorflow/c/c_api.cc @@ -165,22 +165,6 @@ void deallocate_buffer(void* data, size_t len, void* arg) { tensorflow::cpu_allocator()->DeallocateRaw(data); } -Status MessageToBuffer(const tensorflow::protobuf::Message& in, - TF_Buffer* out) { - if (out->data != nullptr) { - return InvalidArgument("Passing non-empty TF_Buffer is invalid."); - } - const auto proto_size = in.ByteSizeLong(); - void* buf = tensorflow::port::Malloc(proto_size); - in.SerializeToArray(buf, proto_size); - out->data = buf; - out->length = proto_size; - out->data_deallocator = [](void* data, size_t length) { - tensorflow::port::Free(data); - }; - return Status::OK(); -} - } // namespace TF_Tensor::~TF_Tensor() { buffer->Unref(); } @@ -559,6 +543,27 @@ TF_Tensor* TF_TensorFromTensor(const tensorflow::Tensor& src, dimvec.size(), base, size, DeleteArray, base); } +Status MessageToBuffer(const tensorflow::protobuf::Message& in, + TF_Buffer* out) { + if (out->data != nullptr) { + return InvalidArgument("Passing non-empty TF_Buffer is invalid."); + } + const size_t proto_size = in.ByteSizeLong(); + void* buf = tensorflow::port::Malloc(proto_size); + if (buf == nullptr) { + return tensorflow::errors::ResourceExhausted( + "Failed to allocate memory to serialize message of type '", + in.GetTypeName(), "' and size ", proto_size); + } + in.SerializeToArray(buf, proto_size); + out->data = buf; + out->length = proto_size; + out->data_deallocator = [](void* data, size_t length) { + tensorflow::port::Free(data); + }; + return Status::OK(); +} + // Helpers for loading a TensorFlow plugin (a .so file). Status LoadLibrary(const char* library_filename, void** result, const void** buf, size_t* len); diff --git a/tensorflow/c/c_api.h b/tensorflow/c/c_api.h index 43b50780137..ee110d88cea 100644 --- a/tensorflow/c/c_api.h +++ b/tensorflow/c/c_api.h @@ -357,6 +357,14 @@ typedef struct TF_Output { int index; // The index of the output within oper. } TF_Output; +// TF_Function is a grouping of operations with defined inputs and outputs. +// Once created and added to graphs, functions can be invoked by creating an +// operation whose operation type matches the function name. +typedef struct TF_Function TF_Function; + +// Function definition options. TODO(iga): Define and implement +typedef struct TF_FunctionOptions TF_FunctionOptions; + // Sets the shape of the Tensor referenced by `output` in `graph` to // the shape described by `dims` and `num_dims`. // @@ -914,6 +922,15 @@ TF_CAPI_EXPORT extern void TF_GraphImportGraphDef( TF_Graph* graph, const TF_Buffer* graph_def, const TF_ImportGraphDefOptions* options, TF_Status* status); +// Add `function` to graph `g`. Once `function` is added to `g`, +// it can be called by creating an operation using the function's name. +// +// If successful, status is set to OK and function is added to g +// Otherwise, status is set to the encountered error and g is unmodified +TF_CAPI_EXPORT extern void TF_GraphAddFunction(TF_Graph* g, + const TF_Function* function, + TF_Status* status); + // Note: The following function may fail on very large protos in the future. TF_CAPI_EXPORT extern void TF_OperationToNodeDef(TF_Operation* oper, @@ -1001,6 +1018,105 @@ TF_CAPI_EXPORT void TF_AddGradients(TF_Graph* g, TF_Output* y, int ny, TF_Output* x, int nx, TF_Output* dx, TF_Status* status, TF_Output* dy); +// Create a TF_Function from a TF_Graph +// +// Params: +// fn_body - the graph whose operations (or subset of whose operations) will be +// converted to TF_Function. +// fn_name - the name of the new TF_Function. Should match the operation +// name (OpDef.name) regexp [A-Z][A-Za-z0-9_.\\-/]* and be distinct +// from other operation names (at least those registered in graphs +// where this function will be used). +// TODO(iga): Allow null in here and have C API come up with +// a unique name with high probability (similarly to +// _create_hash_str in function.py) +// num_opers - `num_opers` contains the number of elements in the `opers` array +// or a special value of -1 meaning that no array is given. +// The distinction between an empty array of operations and no +// array of operations is necessary to distinguish the case of +// creating a function with no body (e.g. identity or permutation) +// and the case of creating a function whose body contains all +// the nodes in the graph (except for the automatic skipping, see +// below). +// opers - Array of operations to become the body of the function or null. +// - If no array is given (`num_opers` = -1), all the +// operations in `fn_body` will become part of the function +// except operations referenced in `inputs`. These operations +// must have a single output (these operations are typically +// placeholders created for the sole purpose of representing +// an input. We can relax this constraint if there are +// compelling use cases). +// - If an array is given (`num_opers` >= 0), all operations +// in it will become part of the function. In particular, no +// automatic skipping of dummy input operations is performed. +// ninputs - number of elements in `inputs` array +// inputs - array of TF_Outputs that specify the inputs to the function. +// If `ninputs` is zero (the function takes no inputs), `inputs` +// can be null. The names used for function inputs are normalized +// names of the operations (usually placeholders) pointed to by +// `inputs`. These operation names should start with a letter. +// Normalization will convert all letters to lowercase and +// non-alphanumeric characters to '_' to make resulting names match +// the "[a-z][a-z0-9_]*" pattern for operation argument names. +// `inputs` cannot contain the same tensor twice. +// noutputs - number of elements in `outputs` array +// outputs - array of TF_Outputs that specify the outputs of the function. +// If `noutputs` is zero (the function returns no outputs), `outputs` +// can be null. `outputs` can contain the same tensor more than once. +// output_names - The names of the function's outputs. `output_names` array +// must either have the same length as `outputs` +// (i.e. `noutputs`) or be null. In the former case, +// the names should match the regular expression for ArgDef +// names - "[a-z][a-z0-9_]*". In the latter case, +// names for outputs will be generated automatically. +// opts - various options for the function, e.g. XLA's inlining control. +// status - Set to OK on success and an appropriate error on failure. +// +// Note that when the same TF_Output is listed as both an input and an output, +// the corresponding function's output will equal to this input, +// instead of the original node's output. +// +// Callers must also satisfy the following constraints: +// - `inputs` cannot refer to TF_Outputs within a control flow context. For +// example, one cannot use the output of "switch" node as input. +// - No TF_Output of a function (inside any of `inputs`, `outputs`, `fn_body`) +// is allowed to have a reference type. Reference types are not exposed +// through C API and are being deprecated. +// - Every node in the function's body must have all of its inputs (including +// control inputs). In other words, for every node in the body, each input +// must be either listed in `inputs` or must come from another node in +// the body. In particular, it is an error to have a control edge going from +// a node outside of the body into a node in the body. This applies to control +// edges going from nodes referenced in `inputs` to nodes in the body when +// the former nodes are not in the body (automatically skipped or not +// included in explicitly specified body). +// +// Returns: +// On successful, a newly created TF_Function instance. It must be deleted by +// calling TF_DeleteFunction. +// +// On failure, null. +// +// TODO(iga): Add input_names argument and get output_names working (they are +// currently ignored) +TF_CAPI_EXPORT extern TF_Function* TF_GraphToFunction( + const TF_Graph* fn_body, const char* fn_name, int num_opers, + const TF_Operation* const* opers, int ninputs, const TF_Output* inputs, + int noutputs, const TF_Output* outputs, const char* const* output_names, + const TF_FunctionOptions* opts, TF_Status* status); + +// Write out a serialized representation of `func` (as a FunctionDef protocol +// message) to `output_func_def` (allocated by TF_NewBuffer()). +// `output_func_def`'s underlying buffer will be freed when TF_DeleteBuffer() +// is called. +// +// May fail on very large graphs in the future. +TF_CAPI_EXPORT extern void TF_FunctionToFunctionDef(TF_Function* func, + TF_Buffer* output_func_def, + TF_Status* status); + +TF_CAPI_EXPORT extern void TF_DeleteFunction(TF_Function*); + // TODO(josh11b): Register OpDef, available to all operations added // to this graph. diff --git a/tensorflow/c/c_api_function.cc b/tensorflow/c/c_api_function.cc new file mode 100644 index 00000000000..b4c6397d0b4 --- /dev/null +++ b/tensorflow/c/c_api_function.cc @@ -0,0 +1,496 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/c/c_api_internal.h" + +#include +#include +#include + +#include "tensorflow/core/framework/attr_value_util.h" +#include "tensorflow/core/framework/function.pb.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/lib/strings/strcat.h" + +namespace tensorflow { +namespace { + +// Class that maintains a one-to-one original node name -> new node name +// mapping. We normalize the names used as input and output arguments to match +// regexp "[a-z][a-z0-9_]*" specified in definition of ArgDef.name. +// Once we rename them, we risk creating a name collision with the other +// node names, so if necessary we add a suffix to make +// names unique. If we have an input named "A" and a node in the function +// body named "a", they will be renamed to "a" and "a_0". +class NodeNameMapping { + public: + NodeNameMapping() = default; + + // Normalize the input/output name and make it unique. + string GetIOName(const string& name); + + // Make the node name unique. + string Uniquify(const string& name); + + // Look up how a node name was previously normalized/uniquified. + // Returns empty if name was never seen. + string Lookup(const string& name) const; + + private: + string UniquifyHelper(const string& name) const; + static string Normalize(string name); + + // The normalized/uniquified names already used as + // input names (in signature), output names (in signature), and node names + // (in node_def). + // This is a superset of values in name_mapping_. + std::unordered_set used_names_; + // Mapping from original node name from the graph to the normalized + // and uniqified version of it. + std::unordered_map name_mapping_; +}; + +string NodeNameMapping::Normalize(string name) { + // Convert letters to lowercase and non-alphanumeric characters to '_'. + if (name.empty()) return "unknown"; + const int n = name.size(); + for (int i = 0; i < n; ++i) { + char c = name[i]; + if (isalnum(c)) { + if (isupper(c)) { + name[i] = tolower(c); + } + } else { + name[i] = '_'; + } + } + + // Find the first letter and start with it. + int i = 0; + for (; i < n; ++i) { + if (isalpha(name[i])) break; + } + + // Return "unknown" if none of the name's chars were letters. + return i == n ? "unknown" : name.substr(i); +} + +string NodeNameMapping::UniquifyHelper(const string& name) const { + // If the name hasn't been used yet, use it as-is. + if (used_names_.find(name) == used_names_.end()) return name; + // Add a suffix to name to make it unique. + for (int i = 0;; ++i) { + const string candidate = strings::StrCat(name, "_", i); + if (used_names_.find(candidate) == used_names_.end()) return candidate; + } +} + +string NodeNameMapping::GetIOName(const string& name) { + const string& input_name = UniquifyHelper(Normalize(name)); + // Record that we used this name, but don't add it to name_mapping_ + // since this name is not for a node. + used_names_.insert(input_name); + return input_name; +} + +string NodeNameMapping::Uniquify(const string& name) { + const string uniqued = UniquifyHelper(name); + name_mapping_[name] = uniqued; + used_names_.insert(uniqued); + return uniqued; +} + +string NodeNameMapping::Lookup(const string& name) const { + const auto iter = name_mapping_.find(name); + if (iter == name_mapping_.end()) return string(); + return iter->second; +} + +Status ValidateNoRefOutputs(const Node* node) { + for (int i = 0; i < node->num_outputs(); ++i) { + const DataType& dt = node->output_type(i); + if (IsRefType(dt)) { + return errors::InvalidArgument("Output ", i, " of node '", node->name(), + "' has a reference " + "type ", + DataTypeString(dt)); + } + } + return Status::OK(); +} + +Status FillFunctionBody( + const string& fn_name, const NodeNameMapping& node_names, + const std::vector& body_nodes, + const std::unordered_map& tensor_renaming, + FunctionDef* fdef) { + std::vector in_edges; + std::vector control_edges; + for (const Node* node : body_nodes) { + NodeDef* node_def = fdef->add_node_def(); + // First, copy the node_def as is. We will patch it next. + *node_def = node->def(); + if (!node->assigned_device_name().empty()) { + node_def->set_device(node->assigned_device_name()); + } + node_def->set_name(node_names.Lookup(node->name())); + + // Input names must be set based on nested names in tensor_renaming. + // Clear the flat input names we got from the original node_def + // from the graph. + node_def->clear_input(); + + // Collect regular and control inputs. Regular inputs are indexed + // by the index at which they come into the `node`. Control inputs + // don't follow any order. + in_edges.clear(); + in_edges.resize(node->num_inputs(), nullptr); + control_edges.clear(); + for (const Edge* edge : node->in_edges()) { + if (edge->src()->IsSource()) continue; + if (edge->IsControlEdge()) { + control_edges.push_back(edge); + } else { + in_edges[edge->dst_input()] = edge; + } + } + + // Add regular inputs. + for (size_t i = 0; i < in_edges.size(); ++i) { + const Edge* edge = in_edges[i]; + string original_input_name; + if (edge == nullptr) { + // A backedge might not appear as a regular Edge, but be only present + // in the node_def. Such edges are referred to as requested_inputs(). + if (i >= node->requested_inputs().size()) { + return errors::InvalidArgument( + "Graph to be converted to function appears to be malformed. ", + "Node ", node->name(), " is missing input edge ", i); + } + original_input_name = + ParseTensorName(node->requested_inputs()[i]).ToString(); + } else { + original_input_name = + strings::StrCat(edge->src()->name(), ":", edge->src_output()); + } + + const auto iter = tensor_renaming.find(original_input_name); + if (iter == tensor_renaming.end()) { + return errors::InvalidArgument( + "Input ", i, ", '", original_input_name, "', of node '", + node->name(), "' in function '", fn_name, + "' is not available. You might need to include it in inputs " + "or include its source node in the body"); + } + node_def->add_input(iter->second); + } + + // Add control inputs. + for (const Edge* edge : control_edges) { + // Add this control input only if the src node is in the body. + const string normalized = node_names.Lookup(edge->src()->name()); + // If we did not find a name for the source of control edge, this + // source must be outside of the body. Raise an error. + if (normalized.empty()) { + return errors::InvalidArgument( + "The source of control edge ", edge->DebugString(), + " is not in the body. Encountered while creating function '", + fn_name, "'"); + } + node_def->add_input(strings::StrCat("^", normalized)); + } + } + return Status::OK(); +} + +// Graph to FunctionDef conversion. This code is closely modeled on the Python +// code in third_party/tensorflow/python/framework/function.py. +Status GraphToFunctionDef(const Graph& fn_body, const string& fn_name, + const std::vector& body_nodes, + const std::vector& inputs, + const std::vector& outputs, + const std::vector& output_names, + FunctionDef* fdef) { + fdef->mutable_signature()->set_name(fn_name); + + // Keep track of names we used and how we normalized them. + NodeNameMapping node_names; + + // Mapping from original names of tensors (i.e. ":") to the + // name we used in the function: + // - For input tensors: + // {flat_tensor_name -> normalized_name_of_src_node} + // e.g. {In:3 -> in} + // - For tensors produced by nodes in function's body: + // {flat_tensor_name -> nested_tensor_name} + // e.g. {Add:3 -> add_0:z:1} + std::unordered_map tensor_renaming; + + // Fill inputs in function's signature. + for (size_t i = 0; i < inputs.size(); ++i) { + const Node* node = inputs[i].node; + int idx = inputs[i].index; + OpDef::ArgDef* argdef = fdef->mutable_signature()->add_input_arg(); + argdef->set_type(node->output_type(idx)); + const string& input_name = node_names.GetIOName(node->name()); + argdef->set_name(input_name); + tensor_renaming[strings::StrCat(node->name(), ":", idx)] = input_name; + } + + // Fill outputs in function's signature. + for (size_t i = 0; i < outputs.size(); ++i) { + const Node* node = outputs[i].node; + int idx = outputs[i].index; + OpDef::ArgDef* argdef = fdef->mutable_signature()->add_output_arg(); + argdef->set_type(node->output_type(idx)); + argdef->set_name(node_names.GetIOName(node->name())); + } + + // Populate tensor_renaming and node_names. + // Generate the new output names for every node in the function. + // The NodeDefs in FunctionDefs use a different naming scheme for + // their inputs than the NodeDefs in a graph (see the comment for + // FunctionDef.node_def in function.proto). We do the + // graph tensor name -> function tensor name conversion for every + // possible input (i.e. every node's outputs) and store the result + // in tensor_renaming. + for (const Node* node : body_nodes) { + // Make sure node_name does not collide with an input or output name. + const string& node_name = node_names.Uniquify(node->name()); + // For each output_arg in the op_def, the output_ranges + // map will have [start, end] range of indices that this arg produces + // among all the output tensors of this op. + NameRangeMap output_ranges; + TF_RETURN_IF_ERROR( + NameRangesForNode(*node, node->op_def(), nullptr, &output_ranges)); + for (const auto& output : output_ranges) { + const string& output_name = output.first; + int index_start = output.second.first; + int index_end = output.second.second; + for (int i = index_start; i < index_end; ++i) { + const string& original_name = strings::StrCat(node->name(), ":", i); + const string& new_name = + strings::StrCat(node_name, ":", output_name, ":", i - index_start); + // Record the mapping if this tensor is not already mapped. + // Tensor can be already mapped if it is used as an input. + if (tensor_renaming.find(original_name) == tensor_renaming.end()) { + tensor_renaming[original_name] = new_name; + } + } + } + } + + TF_RETURN_IF_ERROR( + FillFunctionBody(fn_name, node_names, body_nodes, tensor_renaming, fdef)); + + // Remap return values. + for (int r = 0; r < fdef->signature().output_arg_size(); ++r) { + const string& ret_name = fdef->signature().output_arg(r).name(); + + // We convert this flat tensor name to the nested value + // (e.g. `add:z:1`) that we stored in tensor_renaming. + const string& return_value = + strings::StrCat(outputs[r].node->name(), ":", outputs[r].index); + const auto iter = tensor_renaming.find(return_value); + if (iter == tensor_renaming.end()) { + return errors::InvalidArgument( + "TF_Output ", return_value, " is neither in the function body ", + "nor among function inputs. Encountered while creating function '", + fn_name, "'"); + } + (*fdef->mutable_ret())[ret_name] = iter->second; + } + + return Status::OK(); +} + +// Converts `ninputs` and `inputs` into `inputs_tensors` and `input_nodes` and +// does various checks while doing so. `input_nodes` will contain the same +// information as input_tensors just in a different structure to make +// following processing easier. TODO(iga): Simplify this nested structure. +Status ProcessInputs( + const TF_Graph* fn_body, const char* fn_name, int ninputs, + const TF_Output* inputs, std::vector* input_tensors, + std::unordered_map>* input_nodes) + EXCLUSIVE_LOCKS_REQUIRED(fn_body->mu) { + input_tensors->reserve(ninputs); + for (int i = 0; i < ninputs; ++i) { + const Node& node = inputs[i].oper->node; + int idx = inputs[i].index; + + TF_RETURN_WITH_CONTEXT_IF_ERROR( + fn_body->graph.IsValidOutputTensor(&node, idx), + "Encountered while processing input ", i, " into function '", fn_name, + "'"); + TF_RETURN_WITH_CONTEXT_IF_ERROR(ValidateNoRefOutputs(&node), + "Encountered while processing input ", i, + " into function '", fn_name, "'"); + + input_tensors->emplace_back(&node, idx); + + const auto& iter = input_nodes->find(&node); + if (iter == input_nodes->end()) { + input_nodes->insert({&node, {idx}}); + } else { + auto& indices = iter->second; + if (std::find(indices.begin(), indices.end(), idx) != indices.end()) { + return errors::InvalidArgument( + "TF_Output ", node.name(), ":", idx, + " appears more than once in the input list"); + } + indices.push_back(idx); + } + } + return Status::OK(); +} + +// Converts `noutputs` and `outputs` into `outputs_tensors` and does various +// checks while doing so. +Status ProcessOutputs(const TF_Graph* fn_body, const char* fn_name, + int noutputs, const TF_Output* outputs, + std::vector* output_tensors) + EXCLUSIVE_LOCKS_REQUIRED(fn_body->mu) { + output_tensors->reserve(noutputs); + for (int i = 0; i < noutputs; ++i) { + const Node& node = outputs[i].oper->node; + int idx = outputs[i].index; + TF_RETURN_WITH_CONTEXT_IF_ERROR( + fn_body->graph.IsValidOutputTensor(&node, idx), + "Encountered while processing output ", i, " from function '", fn_name, + "'"); + output_tensors->emplace_back(&node, idx); + } + return Status::OK(); +} + +// Populates `body_nodes` with the nodes that will become function's body. +// Performs various checks. +Status ComputeBodyNodes( + const TF_Graph* fn_body, const char* fn_name, int num_opers, + const TF_Operation* const* opers, + const std::unordered_map>& input_nodes, + std::vector* body_nodes) + EXCLUSIVE_LOCKS_REQUIRED(fn_body->mu) { + if (num_opers == -1) { + for (const Node* node : fn_body->graph.op_nodes()) { + const auto& iter = input_nodes.find(node); + if (iter == input_nodes.end()) { + // This node is not referenced in inputs. Add it to the body. + TF_RETURN_WITH_CONTEXT_IF_ERROR(ValidateNoRefOutputs(node), + "Encountered while creating function '", + fn_name, "'"); + body_nodes->push_back(node); + } else { + // This node is referenced in inputs. Currently, we place an + // artificial restriction and require that when num_opers=-1, such + // nodes must have a single output. + if (node->num_outputs() != 1) { + return errors::InvalidArgument( + "When `num_opers` is set to -1, nodes referenced in `inputs` " + "must have a single output. Node ", + node->name(), " has ", node->num_outputs(), + " outputs. Encountered while creating function '", fn_name, "'"); + } + } + } + } else { + body_nodes->reserve(num_opers); + for (int i = 0; i < num_opers; ++i) { + const Node* node = &opers[i]->node; + TF_RETURN_WITH_CONTEXT_IF_ERROR(ValidateNoRefOutputs(node), + "Encountered while creating function '", + fn_name, "'"); + body_nodes->push_back(node); + } + } + return Status::OK(); +} + +} // anonymous namespace +} // namespace tensorflow + +using tensorflow::Node; +using tensorflow::string; + +TF_Function* TF_GraphToFunction(const TF_Graph* fn_body, const char* fn_name, + int num_opers, const TF_Operation* const* opers, + int ninputs, const TF_Output* inputs, + int noutputs, const TF_Output* outputs, + const char* const* output_names, + const TF_FunctionOptions* opts, + TF_Status* status) { + tensorflow::mutex_lock l(*const_cast(&fn_body->mu)); + + // Process inputs. + std::vector input_tensors; + std::unordered_map> input_nodes; + status->status = tensorflow::ProcessInputs(fn_body, fn_name, ninputs, inputs, + &input_tensors, &input_nodes); + if (!status->status.ok()) return nullptr; + + // Process outputs. + std::vector output_tensors; + status->status = tensorflow::ProcessOutputs(fn_body, fn_name, noutputs, + outputs, &output_tensors); + if (!status->status.ok()) return nullptr; + + // Process output names. + std::vector output_names_vec; + if (output_names) { + output_names_vec.reserve(noutputs); + for (int i = 0; i < noutputs; ++i) { + output_names_vec.push_back(string(output_names[i])); + } + } + + // Compute body nodes. + std::vector body_nodes; + status->status = tensorflow::ComputeBodyNodes( + fn_body, fn_name, num_opers, opers, input_nodes, &body_nodes); + if (!status->status.ok()) return nullptr; + + // Do the actual function creation. + TF_Function* tf_function = new TF_Function(); + status->status = tensorflow::GraphToFunctionDef( + fn_body->graph, fn_name, body_nodes, input_tensors, output_tensors, + output_names_vec, tf_function->fdef_lib.add_function()); + if (!status->status.ok()) { + TF_DeleteFunction(tf_function); + return nullptr; + } + return tf_function; +} + +void TF_GraphAddFunction(TF_Graph* g, const TF_Function* function, + TF_Status* status) { + tensorflow::mutex_lock l(g->mu); + + // At the moment, we have only one function and no gradients in fdef_lib. + // This makes the following operation atomic. + // TODO(iga): Add an atomic version of AddFunctionLibrary when we support + // gradients + status->status = g->graph.AddFunctionLibrary(function->fdef_lib); +} + +void TF_FunctionToFunctionDef(TF_Function* func, TF_Buffer* output_func_def, + TF_Status* status) { + DCHECK_EQ(1, func->fdef_lib.function_size()); + status->status = MessageToBuffer(func->fdef_lib.function(0), output_func_def); +} + +void TF_DeleteFunction(TF_Function* function) { delete function; } diff --git a/tensorflow/c/c_api_function_test.cc b/tensorflow/c/c_api_function_test.cc new file mode 100644 index 00000000000..c9dd38ea15f --- /dev/null +++ b/tensorflow/c/c_api_function_test.cc @@ -0,0 +1,1039 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/c/c_api.h" + +#include "tensorflow/c/c_test_util.h" +#include "tensorflow/core/framework/function.pb.h" +#include "tensorflow/core/framework/op_def.pb.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace { + +// Specification for expected input/output and its type. +// DataType value of DT_INVALID signifies that we don't want to +// check the data type. +typedef std::pair IOSpec; + +std::vector M(const std::initializer_list& names) { + std::vector v; + for (const string& name : names) { + v.push_back(IOSpec(name, DT_INVALID)); + } + return v; +} + +// Specification for an expected edge. +// src is either: +// - input name (as it appears in FunctionDef) +// - name of output tensor (in nested "add:z:0" format) +// dst is either: +// - output name (as it appears in FunctionDef) +// - : (this looks the same as +// output tensor naming, but it the index is actually an input index) +struct EdgeSpec : public std::pair { + typedef std::pair Base; + + // Inherit the set of constructors + using Base::pair; + + string ToString() const { return strings::StrCat(first, "->", second); } +}; + +class CApiFunctionTest : public ::testing::Test { + protected: + CApiFunctionTest() + : s_(TF_NewStatus()), + func_graph_(TF_NewGraph()), + host_graph_(TF_NewGraph()), + func_(nullptr) {} + + void SetUp() override {} + + ~CApiFunctionTest() override { + TF_DeleteFunction(func_); + TF_DeleteGraph(host_graph_); + TF_DeleteGraph(func_graph_); + TF_DeleteStatus(s_); + } + + void Run(const std::vector>& inputs, + TF_Operation* output, int32_t expected_result) { + Run(inputs, {{output, 0}}, {expected_result}); + } + + // Run the host graph, which now contains a function and check that + // outputs are as expected. + // 'T' stands for 'tensor' since the outputs are tensors, not scalars. + void RunT(const std::vector>& inputs, + std::initializer_list outputs, + const std::vector>& expected_results) { + // Create a session for this graph + CSession csession(host_graph_, s_); + ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_); + + // Run + csession.SetInputs(inputs); + csession.SetOutputs(outputs); + csession.Run(s_); + ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_); + + // Check results + for (int i = 0; i < expected_results.size(); ++i) { + TF_Tensor* out = csession.output_tensor(i); + ASSERT_TRUE(out != nullptr); + EXPECT_EQ(TF_INT32, TF_TensorType(out)); + EXPECT_EQ(1, TF_NumDims(out)); + CompareInt32Tensor(expected_results[i], out); + } + } + + // Run the host graph, which now contains a function and check that + // outputs are as expected. + void Run(const std::vector>& inputs, + std::initializer_list outputs, + const std::vector& expected_results) { + // Create a session for this graph. + CSession csession(host_graph_, s_); + ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_); + + csession.SetInputs(inputs); + csession.SetOutputs(outputs); + csession.Run(s_); + ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_); + + for (int i = 0; i < expected_results.size(); ++i) { + TF_Tensor* out = csession.output_tensor(i); + ASSERT_TRUE(out != nullptr); + EXPECT_EQ(TF_INT32, TF_TensorType(out)); + EXPECT_EQ(0, TF_NumDims(out)); // scalar + ASSERT_EQ(sizeof(int32_t), TF_TensorByteSize(out)); + int32_t* output_contents = static_cast(TF_TensorData(out)); + EXPECT_EQ(expected_results[i], *output_contents); + } + } + + void CompareInt32Tensor(const std::vector& expected, TF_Tensor* t) { + int32_t* data = static_cast(TF_TensorData(t)); + size_t size = TF_TensorByteSize(t); + ASSERT_EQ(expected.size() * sizeof(int32_t), size); + for (int i = 0; i < expected.size(); ++i) { + ASSERT_EQ(expected[i], data[i]) << "Different data at index " << i; + } + } + + std::vector ToOutput(const std::vector ops) { + std::vector out; + for (auto op : ops) { + out.push_back({op, 0}); + } + return out; + } + + void Define(int num_opers, const std::vector& opers, + const std::vector& inputs, + const std::vector& outputs, + const char** output_names, bool expect_failure = false) { + DefineT(num_opers, opers, ToOutput(inputs), ToOutput(outputs), output_names, + expect_failure); + } + + // An explicit `num_opers` is needed so that we can distinguish between the + // case of no operations specified (-1) and the case of an empty set of + // operations specified (0). + void DefineT(int num_opers, const std::vector& opers, + const std::vector& inputs, + const std::vector& outputs, const char** output_names, + bool expect_failure = false) { + ASSERT_EQ(func_, nullptr); + func_ = TF_GraphToFunction(func_graph_, func_name_, num_opers, + num_opers == -1 ? nullptr : opers.data(), + inputs.size(), inputs.data(), outputs.size(), + outputs.data(), output_names, + /*opts=*/nullptr, s_); + if (expect_failure) { + ASSERT_EQ(func_, nullptr); + return; + } + + ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_); + ASSERT_NE(func_, nullptr); + TF_GraphAddFunction(host_graph_, func_, s_); + ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_); + } + + TF_Operation* Use(const std::vector& inputs) { + return UseT(ToOutput(inputs)); + } + + TF_Operation* UseT(const std::vector& inputs) { + TF_Operation* op; + UseHelper(inputs, &op); + return op; + } + + // All the *Helper methods are used as a workaround for the restrictions that + // one cannot call ASSERT_* methods in non-void-returning functions (when + // exceptions are disabled during compilation) + void UseHelper(const std::vector& inputs, TF_Operation** op) { + TF_OperationDescription* desc = + TF_NewOperation(host_graph_, func_name_, func_node_name_); + for (auto input : inputs) { + TF_AddInput(desc, input); + } + // Set device to CPU because some ops inside the function might not be + // available on GPU. + TF_SetDevice(desc, "/cpu:0"); + *op = TF_FinishOperation(desc, s_); + ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_); + ASSERT_NE(*op, nullptr); + } + + FunctionDef fdef() { + tensorflow::FunctionDef fdef; + EXPECT_TRUE(GetFunctionDef(func_, &fdef)); + return fdef; + } + + // logging utility + template + string ToString(const Container& v) { + std::stringstream ss; + ss << "{"; + size_t i = 0; + for (const auto& e : v) { + if (i != 0) { + ss << ", "; + } + ss << e.ToString(); + ++i; + } + ss << "}"; + return ss.str(); + } + + void VerifyFDefNodes(const tensorflow::FunctionDef& fdef, + const std::unordered_set& nodes) { + ASSERT_EQ(nodes.size(), fdef.node_def_size()) + << "Got unexpected number of nodes. Expected: [" + << str_util::Join(nodes, ", ") + << "] Actual nodes in fdef: " << fdef.DebugString(); + for (const NodeDef& node_def : fdef.node_def()) { + ASSERT_TRUE(nodes.find(node_def.name()) != nodes.end()) + << "Got unexpected node: " << node_def.name() + << " in fdef: " << fdef.DebugString(); + } + } + + void VerifyFDefInputs(const tensorflow::FunctionDef& fdef, + const std::vector& inputs) { + const OpDef& signature = fdef.signature(); + ASSERT_EQ(inputs.size(), signature.input_arg_size()); + for (int i = 0; i < inputs.size(); ++i) { + const OpDef::ArgDef& arg = signature.input_arg(i); + const IOSpec& in = inputs[i]; + if (in.second != DT_INVALID) { + ASSERT_EQ(arg.type(), in.second) + << "Got unexpected type for input " << i + << ". fdef: " << fdef.DebugString(); + } + ASSERT_EQ(arg.name(), in.first) << "Got unexpected name for input " << i + << ". fdef: " << fdef.DebugString(); + } + } + + void VerifyFDefOutputs(const tensorflow::FunctionDef& fdef, + const std::vector& outputs) { + const OpDef& signature = fdef.signature(); + ASSERT_EQ(outputs.size(), signature.output_arg_size()); + for (int i = 0; i < outputs.size(); ++i) { + const OpDef::ArgDef& arg = signature.output_arg(i); + const IOSpec& out = outputs[i]; + if (out.second != DT_INVALID) { + ASSERT_EQ(arg.type(), out.second) + << "Got unexpected type for output " << i + << ". fdef: " << fdef.DebugString(); + } + ASSERT_EQ(arg.name(), out.first) << "Got unexpected name for output " << i + << ". fdef: " << fdef.DebugString(); + } + } + + void VerifyFDefEdges( + const tensorflow::FunctionDef& fdef, + const std::vector& e_edges, // expected edges + const std::vector& c_edges, // expected ctrl edges + bool is_exact_edges = true) { + // Build a set of edges from fdef + std::set a_edges; // actual edges + // Get edges from inputs to body nodes and between body nodes + for (const NodeDef& node_def : fdef.node_def()) { + for (int i = 0; i < node_def.input_size(); ++i) { + const string& in = node_def.input(i); + const auto& v = + a_edges.insert({in, strings::StrCat(node_def.name(), ":", i)}); + ASSERT_TRUE(v.second) << "Duplicate edge " << in << " -> " + << strings::StrCat(node_def.name(), ":", i) + << ". fdef: " << fdef.DebugString(); + } + } + // Get edges from body nodes to outputs and from inputs to outputs + for (const OpDef::ArgDef& arg : fdef.signature().output_arg()) { + const auto& iter = fdef.ret().find(arg.name()); + if (iter != fdef.ret().end()) { + const auto& v = a_edges.insert({iter->second, arg.name()}); + ASSERT_TRUE(v.second) << "Duplicate edge " << iter->second << " -> " + << arg.name() << ". fdef: " << fdef.DebugString(); + } else { + const auto& v = a_edges.insert({arg.name(), arg.name()}); + ASSERT_TRUE(v.second) << "Duplicate edge " << arg.name() << " -> " + << arg.name() << ". fdef: " << fdef.DebugString(); + } + } + + // Verify edges + for (const EdgeSpec& e : e_edges) { + ASSERT_TRUE(a_edges.find(e) != a_edges.end()) + << "Failed to find expected edge " << e.ToString() + << " in fdef: " << fdef.DebugString(); + } + + // If caller specified all edges, check that we have seen all + if (is_exact_edges) { + ASSERT_EQ(e_edges.size() + c_edges.size(), a_edges.size()) + << "Expected edges: " << ToString(e_edges) + << " Expected Control edges: " << ToString(c_edges) + << " Actual edges: " << ToString(a_edges) + << " in fdef: " << fdef.DebugString(); + } + } + + void VerifyFDef(const std::unordered_set& nodes, + const std::vector& inputs, + const std::vector& outputs, + const std::vector& e_edges, // expected edges + const std::vector& c_edges, // expected ctrl edges + bool is_exact_edges = true) { + tensorflow::FunctionDef fdef; + ASSERT_TRUE(GetFunctionDef(func_, &fdef)); + VerifyFDefNodes(fdef, nodes); + VerifyFDefInputs(fdef, inputs); + VerifyFDefOutputs(fdef, outputs); + VerifyFDefEdges(fdef, e_edges, c_edges, is_exact_edges); + } + + const char* func_name_ = "MyFunc"; + const char* func_node_name_ = "MyFunc_0"; + TF_Status* s_; + TF_Graph* func_graph_; + TF_Graph* host_graph_; + TF_Function* func_; + + // Workaround for not being able to initialize empty map using {} + std::unordered_set empty_; +}; + +TEST_F(CApiFunctionTest, OneOp_ZeroInputs_OneOutput) { + /* + * constant + * | + * v + */ + // Define + TF_Operation* c = ScalarConst(10, func_graph_, s_, "scalar10"); + Define(-1, {}, {}, {c}, nullptr); + + // Use, run, and verify + TF_Operation* func_op = Use({}); + Run({}, func_op, 10); + VerifyFDef({"scalar10_0"}, {}, {{"scalar10", DT_INT32}}, + {{"scalar10_0:output:0", "scalar10"}}, {}); +} + +TEST_F(CApiFunctionTest, OneOp_OneInput_OneOutput) { + /* + * | + * v + * negate + * | + * v + */ + // Define + TF_Operation* feed = Placeholder(func_graph_, s_); + TF_Operation* neg = Neg(feed, func_graph_, s_); + Define(-1, {}, {feed}, {neg}, nullptr); + + // Use, run, and verify + TF_Operation* func_feed = Placeholder(host_graph_, s_); + TF_Operation* func_op = Use({func_feed}); + Run({{func_feed, Int32Tensor(3)}}, func_op, -3); + VerifyFDef({"neg_0"}, {{"feed", DT_INT32}}, {{"neg", DT_INT32}}, + {{"feed", "neg_0:0"}, {"neg_0:y:0", "neg"}}, {}); +} + +TEST_F(CApiFunctionTest, ZeroOps_Identity) { + /* + * | + * | + * | + * v + */ + // Define + TF_Operation* feed = Placeholder(func_graph_, s_); + Define(-1, {}, {feed}, {feed}, nullptr); + + // Use, run, and verify + TF_Operation* func_feed = Placeholder(host_graph_, s_); + TF_Operation* func_op = Use({func_feed}); + Run({{func_feed, Int32Tensor(3)}}, func_op, 3); + VerifyFDef(empty_, {{"feed", DT_INT32}}, {{"feed_0", DT_INT32}}, + {{"feed", "feed_0"}}, {}); +} + +TEST_F(CApiFunctionTest, ZeroOps_Permutation) { + /* + * | | + * \ / + * \/ + * x + * /\ + * / \ + * | | + * v v + */ + // Define + TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1"); + TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2"); + Define(-1, {}, {feed1, feed2}, {feed2, feed1}, nullptr); + + // Use, run, and verify + TF_Operation* two = ScalarConst(2, host_graph_, s_); + TF_Operation* func_feed = Placeholder(host_graph_, s_); + TF_Operation* func_op = Use({two, func_feed}); + Run({{func_feed, Int32Tensor(3)}}, {{func_op, 0}, {func_op, 1}}, {3, 2}); + VerifyFDef(empty_, M({{"feed1"}, {"feed2"}}), M({{"feed2_0"}, {"feed1_0"}}), + {{"feed1", "feed1_0"}, {"feed2", "feed2_0"}}, {}); +} + +TEST_F(CApiFunctionTest, OneOp_TwoInputs_OneOutput) { + /* + * | | + * v v + * add + * | + * v + */ + // Define + TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1"); + TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2"); + TF_Operation* add = Add(feed1, feed2, func_graph_, s_); + Define(-1, {}, {feed1, feed2}, {add}, nullptr); + + // Use, run, and verify + TF_Operation* two = ScalarConst(2, host_graph_, s_); + TF_Operation* func_feed = Placeholder(host_graph_, s_); + TF_Operation* func_op = Use({two, func_feed}); + Run({{func_feed, Int32Tensor(3)}}, func_op, 2 + 3); + VerifyFDef( + {"add_0"}, M({{"feed1"}, {"feed2"}}), M({{"add"}}), + {{"feed1", "add_0:0"}, {"feed2", "add_0:1"}, {"add_0:sum:0", "add"}}, {}); +} + +TEST_F(CApiFunctionTest, OneOp_TwoInputs_ZeroOutputs) { + /* + * | | + * v v + * add + * + * (output ignored) + */ + // Define + TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1"); + TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2"); + Add(feed1, feed2, func_graph_, s_); + Define(-1, {}, {feed1, feed2}, {}, nullptr); + + // Use, run, and verify + TF_Operation* two = ScalarConst(2, host_graph_, s_); + TF_Operation* func_feed = Placeholder(host_graph_, s_); + Use({two, func_feed}); + VerifyFDef({"add"}, M({{"feed1"}, {"feed2"}}), {}, + {{"feed1", "add:0"}, {"feed2", "add:1"}}, {}); +} + +TEST_F(CApiFunctionTest, TwoOps_ThreeInputs_OneOutput) { + /* + * | | | + * v v / + * add1 / + * | | + * v v + * add2 + * | + * v + */ + // Define + TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1"); + TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2"); + TF_Operation* feed3 = Placeholder(func_graph_, s_, "feed3"); + TF_Operation* add1 = Add(feed1, feed2, func_graph_, s_, "add1"); + TF_Operation* add2 = Add(add1, feed3, func_graph_, s_, "add2"); + Define(-1, {}, {feed1, feed2, feed3}, {add2}, nullptr); + + // Use, run, and verify + TF_Operation* two = ScalarConst(2, host_graph_, s_, "two"); + TF_Operation* ten = ScalarConst(10, host_graph_, s_, "ten"); + TF_Operation* func_feed = Placeholder(host_graph_, s_); + TF_Operation* func_op = Use({two, ten, func_feed}); + Run({{func_feed, Int32Tensor(3)}}, func_op, 2 + 10 + 3); + VerifyFDef({"add1", "add2_0"}, M({{"feed1"}, {"feed2"}, {"feed3"}}), + M({{"add2"}}), + {{"feed1", "add1:0"}, + {"feed2", "add1:1"}, + {"add1:sum:0", "add2_0:0"}, + {"feed3", "add2_0:1"}, + {"add2_0:sum:0", "add2"}}, + {}); +} + +TEST_F(CApiFunctionTest, OneOp_TwoInputs_TwoDuplicateOutputs) { + /* + * | | + * v v + * add + * | + * +-+-+ + * | | + * v v + */ + // Define + TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1"); + TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2"); + TF_Operation* add = Add(feed1, feed2, func_graph_, s_); + Define(-1, {}, {feed1, feed2}, {add, add}, nullptr); + + // Use, run, and verify + TF_Operation* two = ScalarConst(2, host_graph_, s_); + TF_Operation* func_feed = Placeholder(host_graph_, s_); + TF_Operation* func_op = Use({two, func_feed}); + Run({{func_feed, Int32Tensor(3)}}, {{func_op, 0}, {func_op, 1}}, {5, 5}); + VerifyFDef({"add_1"}, M({{"feed1"}, {"feed2"}}), M({{"add"}, {"add_0"}}), + {{"feed1", "add_1:0"}, + {"feed2", "add_1:1"}, + {"add_1:sum:0", "add"}, + {"add_1:sum:0", "add_0"}}, + {}); +} + +TEST_F(CApiFunctionTest, TwoOps_ThreeInputs_TwoOutputs) { + /* + * | | | + * v v / + * add / + * | | + * +-+ | + * | | | + * | v v + * | add + * | | + * v v + */ + // Define + TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1"); + TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2"); + TF_Operation* feed3 = Placeholder(func_graph_, s_, "feed3"); + TF_Operation* add1 = Add(feed1, feed2, func_graph_, s_, "add1"); + TF_Operation* add2 = Add(add1, feed3, func_graph_, s_, "add2"); + Define(-1, {}, {feed1, feed2, feed3}, {add1, add2}, nullptr); + + // Use, run, and verify + TF_Operation* two = ScalarConst(2, host_graph_, s_, "two"); + TF_Operation* ten = ScalarConst(10, host_graph_, s_, "ten"); + TF_Operation* func_feed = Placeholder(host_graph_, s_); + TF_Operation* func_op = Use({two, ten, func_feed}); + Run({{func_feed, Int32Tensor(3)}}, {{func_op, 0}, {func_op, 1}}, {12, 15}); + VerifyFDef({"add1_0", "add2_0"}, M({{"feed1"}, {"feed2"}, {"feed3"}}), + M({{"add1"}, {"add2"}}), + {{"feed1", "add1_0:0"}, + {"feed2", "add1_0:1"}, + {"add1_0:sum:0", "add2_0:0"}, + {"feed3", "add2_0:1"}, + {"add1_0:sum:0", "add1"}, + {"add2_0:sum:0", "add2"}}, + {}); +} + +TEST_F(CApiFunctionTest, FromSubsetOfOps) { + /* + * | | | + * v v / + * add / + * | | + * +---+--+---+ + * Ops used | | | | + * for func | v v | + * | | add | + * +-------> | | | + * | v | + * | | + * +----------+ + */ + // Define + TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1"); + TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2"); + TF_Operation* feed3 = Placeholder(func_graph_, s_, "feed3"); + TF_Operation* add1 = Add(feed1, feed2, func_graph_, s_, "add1"); + TF_Operation* add2 = Add(add1, feed3, func_graph_, s_, "add2"); + Define(1, {add2}, {add1, feed3}, {add2}, nullptr); + + // Use, run, and verify + TF_Operation* two = ScalarConst(2, host_graph_, s_, "two"); + TF_Operation* func_feed = Placeholder(host_graph_, s_); + TF_Operation* func_op = Use({two, func_feed}); + Run({{func_feed, Int32Tensor(3)}}, func_op, 2 + 3); + VerifyFDef( + {"add2_0"}, M({{"add1"}, {"feed3"}}), M({{"add2"}}), + {{"add1", "add2_0:0"}, {"feed3", "add2_0:1"}, {"add2_0:sum:0", "add2"}}, + {}); +} + +TEST_F(CApiFunctionTest, UsingOneOutputOfSplit) { + /* + * feed + * | + * +---------+---+ + * | const0 | | + * | | | | + * | v / | + * | split | + * | | | | | + * | v | v | + * | | | + * +------+------+ + * | + * v + * + * Only the second output from split is used as function output + */ + // Define + TF_Operation* feed = Placeholder(func_graph_, s_); + TF_Operation* split = Split3(feed, func_graph_, s_); + DefineT(-1, {}, {{feed, 0}}, {{split, 1}}, nullptr); + + // Use, run, and verify + TF_Operation* func_feed = Placeholder(host_graph_, s_); + TF_Operation* func_op = Use({func_feed}); + RunT({{func_feed, Int32Tensor({1, 2, 3, 4, 5, 6})}}, {{func_op, 0}}, + {{3, 4}}); + VerifyFDef({"split3_const0", "split3_0"}, M({{"feed"}}), M({{"split3"}}), + {{"split3_const0:output:0", "split3_0:0"}, + {"feed", "split3_0:1"}, + {"split3_0:output:1", "split3"}}, + {}); +} + +TEST_F(CApiFunctionTest, UsingTwoOutputsOfSplit) { + /* + * feed + * | + * +---------+---+ + * | const0 | | + * | | | | + * | v / | + * | split | + * | | | | | + * | | v | | + * | | | | + * +---+-----+---+ + * | | + * v v + * + * Second output from split is not used as function output + */ + // Define + TF_Operation* feed = Placeholder(func_graph_, s_); + TF_Operation* split = Split3(feed, func_graph_, s_); + DefineT(-1, {}, {{feed, 0}}, {{split, 0}, {split, 2}}, nullptr); + + // Use, run, and verify + TF_Operation* func_feed = Placeholder(host_graph_, s_); + TF_Operation* func_op = Use({func_feed}); + RunT({{func_feed, Int32Tensor({1, 2, 3, 4, 5, 6})}}, + {{func_op, 0}, {func_op, 1}}, {{1, 2}, {5, 6}}); + VerifyFDef({"split3_const0", "split3_1"}, M({{"feed"}}), + M({{"split3"}, {"split3_0"}}), + {{"split3_const0:output:0", "split3_1:0"}, + {"feed", "split3_1:1"}, + {"split3_1:output:0", "split3"}, + {"split3_1:output:2", "split3_0"}}, + {}); +} + +TEST_F(CApiFunctionTest, UsingTwoOutputsOfSplitAsInputs) { + /* + * | + * v + * split + * | | | + * | v | + * | | + * +---+-----+---+ + * | | | | + * | v v | + * | add | + * | | | + * | | | + * +------+------+ + * | + * v + */ + // Define + TF_Operation* feed = Placeholder(func_graph_, s_); + TF_Operation* split = Split3(feed, func_graph_, s_); + TF_Operation* add = Add({split, 0}, {split, 2}, func_graph_, s_); + ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_); + DefineT(1, {add}, {{split, 0}, {split, 2}}, {{add, 0}}, nullptr); + + // Use, run, and verify + TF_Operation* two = ScalarConst(2, host_graph_, s_, "two"); + TF_Operation* func_feed = Placeholder(host_graph_, s_); + TF_Operation* func_op = Use({two, func_feed}); + Run({{func_feed, Int32Tensor(3)}}, func_op, 2 + 3); + VerifyFDef( + {"add_0"}, M({{"split3"}, {"split3_0"}}), M({{"add"}}), + {{"split3", "add_0:0"}, {"split3_0", "add_0:1"}, {"add_0:sum:0", "add"}}, + {}); +} + +TEST_F(CApiFunctionTest, NodesUsedInInputsMustHaveSingleOutput) { + /* + * | + * v + * split + * | | | + * | v | + * | | + * input --->| |<--- input + * | | + * v v + * add + * | + * | + * v + */ + // Define + TF_Tensor* tensor_123 = Int32Tensor({1, 2, 3}); + TF_Operation* c = Const(tensor_123, func_graph_, s_, "const_array"); + ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_); + TF_Operation* split = Split3(c, func_graph_, s_); + TF_Operation* add = Add({split, 0}, {split, 2}, func_graph_, s_); + ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_); + DefineT(-1, {}, {{split, 0}, {split, 2}}, {{add, 0}}, nullptr, true); + EXPECT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_)); + EXPECT_EQ(string("When `num_opers` is set to -1, nodes referenced in " + "`inputs` must have a single output. Node split3 has " + "3 outputs. Encountered while creating function 'MyFunc'"), + string(TF_Message(s_))); + + TF_DeleteTensor(tensor_123); +} + +TEST_F(CApiFunctionTest, FunctionWithWhileLoop) { + // Inputs to the while loop and the function as a whole + TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1"); + TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2"); + + // Outputs of the while loop corresponding to the two inputs above + // The first one will the function's output + std::vector outputs; + + // Add while loop to func_graph_ + { + // The inputs to the while loop + std::vector inputs = {{feed1, 0}, {feed2, 0}}; + std::unique_ptr params(new TF_WhileParams( + TF_NewWhile(func_graph_, &inputs[0], inputs.size(), s_))); + ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_); + params->name = "test_loop"; + + // Initialize outputs so we can easily detect errors/bugs + outputs.resize(2, {nullptr, -1}); + + // Create loop: while (input1 < input2) input1 += input2 + 1 + TF_Operation* less_than = LessThan( + params->cond_inputs[0], params->cond_inputs[1], params->cond_graph, s_); + ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_); + params->cond_output = {less_than, 0}; + + TF_Operation* add1 = Add(params->body_inputs[0], params->body_inputs[1], + params->body_graph, s_, "add1"); + ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_); + TF_Operation* one = ScalarConst(1, params->body_graph, s_); + ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_); + TF_Operation* add2 = Add(add1, one, params->body_graph, s_, "add2"); + ASSERT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_); + params->body_outputs[0] = {add2, 0}; + params->body_outputs[1] = params->body_inputs[1]; + + // Finalize while loop + TF_FinishWhile(params.get(), s_, &outputs[0]); + EXPECT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_); + } + + // Define function, use it in graph, and run + DefineT(-1, {}, {{feed1, 0}, {feed2, 0}}, {outputs[0]}, nullptr); + TF_Operation* five = ScalarConst(5, host_graph_, s_, "five"); + TF_Operation* func_feed = Placeholder(host_graph_, s_); + TF_Operation* func_op = Use({func_feed, five}); + Run({{func_feed, Int32Tensor(2)}}, func_op, 2 /*+=*/ + 5 + 1); + + // Verify input, output, and subset of edges in fdef. + // The subset of edges we verify is a chain between feed1 and output to + // make sure that the correct output is picked. + tensorflow::FunctionDef fdef; + ASSERT_TRUE(GetFunctionDef(func_, &fdef)); + VerifyFDefInputs(fdef, M({{"feed1"}, {"feed2"}})); + VerifyFDefOutputs(fdef, M({{"test_loop_exit"}})); + VerifyFDefEdges(fdef, + {{"feed1", "test_loop/Enter:0"}, + {"test_loop/Enter:output:0", "test_loop/Merge:0"}, + {"test_loop/Merge:output:0", "test_loop/Switch:0"}, + {"test_loop/Switch:output_false:0", "test_loop/Exit:0"}, + {"test_loop/Exit:output:0", "test_loop_exit"}}, + {}, false); +} + +TEST_F(CApiFunctionTest, ControlDependency) { + /* + * | | scalar + * | | . + * v v . <---- control dependency + * add < - + * | + * v + */ + // Define + TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1"); + TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2"); + TF_Operation* five = ScalarConst(5, func_graph_, s_); + TF_Operation* add = + AddWithCtrlDependency(feed1, feed2, func_graph_, five, s_); + EXPECT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_); + Define(-1, {}, {feed1, feed2}, {add}, nullptr); + + // Use, run, and verify + TF_Operation* two = ScalarConst(2, host_graph_, s_); + TF_Operation* func_feed = Placeholder(host_graph_, s_); + TF_Operation* func_op = Use({two, func_feed}); + Run({{func_feed, Int32Tensor(3)}}, func_op, 2 + 3); + VerifyFDef( + {"add_0", "scalar"}, M({{"feed1"}, {"feed2"}}), M({{"add"}}), + {{"feed1", "add_0:0"}, {"feed2", "add_0:1"}, {"add_0:sum:0", "add"}}, + {{"scalar", "add_0"}}); +} + +TEST_F(CApiFunctionTest, ControlDependencyOutsideOfBody) { + /* + * | | scalar + * | | . + * v v . <---- control dependency + * add < - + * | + * v + */ + // Define + TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1"); + TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2"); + TF_Operation* five = ScalarConst(5, func_graph_, s_); + TF_Operation* add = + AddWithCtrlDependency(feed1, feed2, func_graph_, five, s_); + EXPECT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_); + Define(1, {add}, {feed1, feed2}, {add}, nullptr, true); + EXPECT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_)); + EXPECT_EQ(string("The source of control edge [id=3 scalar:-1 -> add:-1] " + "is not in the body. Encountered while creating " + "function 'MyFunc'"), + string(TF_Message(s_))); +} + +TEST_F(CApiFunctionTest, ControlDependencyOutsideOfBody_FromInputNode) { + /* + * | |. + * | | . + * | | . + * v v . <---- control dependency + * add < - + * | + * v + */ + // Define + TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1"); + TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2"); + TF_Operation* add = + AddWithCtrlDependency(feed1, feed2, func_graph_, feed1, s_); + EXPECT_EQ(TF_OK, TF_GetCode(s_)) << TF_Message(s_); + Define(-1, {}, {feed1, feed2}, {add}, nullptr, true); + EXPECT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_)); + EXPECT_EQ(string("The source of control edge [id=3 feed1:-1 -> add:-1] " + "is not in the body. Encountered while creating " + "function 'MyFunc'"), + string(TF_Message(s_))); +} + +TEST_F(CApiFunctionTest, DuplicateInputsAreNotAllowed) { + /* + * feed + * | + * +++ + * | | + * +---+-+---+ + * | | | | + * | v v | + * | add | + * | | | + * | | | + * +----+----+ + * | + * v + */ + TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1"); + TF_Operation* add = Add(feed1, feed1, func_graph_, s_); + Define(-1, {}, {feed1, feed1}, {add}, nullptr, true); + EXPECT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_)); + EXPECT_EQ( + string("TF_Output feed1:0 appears more than once in the input list"), + string(TF_Message(s_))); +} + +TEST_F(CApiFunctionTest, InvalidInputTensor_HighIndex) { + /* + * | | + * v v + * add + * | + * v + */ + TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1"); + TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2"); + TF_Operation* add = Add(feed1, feed2, func_graph_, s_); + DefineT(-1, {}, {{feed1, 0}, {feed2, 2}}, {{add, 0}}, nullptr, true); + EXPECT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_)); + EXPECT_EQ(string("Node 'feed2' (type: 'Placeholder', num of outputs: 1) does " + "not have output 2\n\tEncountered while processing " + "input 1 into function 'MyFunc'"), + string(TF_Message(s_))); +} + +TEST_F(CApiFunctionTest, InvalidInputTensor_BadNodePtr) { + /* + * | | + * v v + * add + * | + * v + */ + TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1"); + TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2"); + TF_Operation* add = Add(feed1, feed2, func_graph_, s_); + DefineT(-1, {}, {{feed1, 0}, {nullptr, 0}}, {{add, 0}}, nullptr, true); + EXPECT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_)); + EXPECT_EQ(string("Node is null\n\tEncountered while processing input 1 " + "into function 'MyFunc'"), + string(TF_Message(s_))); +} + +TEST_F(CApiFunctionTest, InvalidOutputTensor_HighIndex) { + /* + * | | + * v v + * add + * | + * v + */ + TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1"); + TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2"); + TF_Operation* add = Add(feed1, feed2, func_graph_, s_); + DefineT(-1, {}, {{feed1, 0}, {feed2, 0}}, {{add, 3}}, nullptr, true); + EXPECT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_)); + EXPECT_EQ(string("Node 'add' (type: 'AddN', num of outputs: 1) does " + "not have output 3\n\tEncountered while processing " + "output 0 from function 'MyFunc'"), + string(TF_Message(s_))); +} + +TEST_F(CApiFunctionTest, InvalidOutputTensor_BadNodePtr) { + /* + * | | + * v v + * add + * | + * v + */ + TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1"); + TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2"); + Add(feed1, feed2, func_graph_, s_); + DefineT(-1, {}, {{feed1, 0}, {feed2, 0}}, {{nullptr, 3}}, nullptr, true); + EXPECT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_)); + EXPECT_EQ(string("Node is null\n\tEncountered while processing output 0 " + "from function 'MyFunc'"), + string(TF_Message(s_))); +} + +TEST_F(CApiFunctionTest, NodeMissingInput) { + /* + * input---> | | <----missing input + * v v + * body----> add + * | + * v + */ + TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1"); + TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2"); + TF_Operation* add = Add(feed1, feed2, func_graph_, s_); + DefineT(1, {add}, {{feed1, 0}}, {{add, 0}}, nullptr, true); + EXPECT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_)); + EXPECT_EQ(string("Input 1, 'feed2:0', of node 'add' in function 'MyFunc' " + "is not available. You might need to include it in inputs " + "or include its source node in the body"), + string(TF_Message(s_))); +} + +TEST_F(CApiFunctionTest, OutputOpNotInBody) { + /* + * | | + * v v + * add scalar (scalar not included in body) + * | | + * v v (function has two outputs) + */ + // Define + TF_Operation* feed1 = Placeholder(func_graph_, s_, "feed1"); + TF_Operation* feed2 = Placeholder(func_graph_, s_, "feed2"); + TF_Operation* scalar = ScalarConst(2, func_graph_, s_); + TF_Operation* add = Add(feed1, feed2, func_graph_, s_); + Define(1, {add}, {feed1, feed2}, {add, scalar}, nullptr, true); + EXPECT_EQ(TF_INVALID_ARGUMENT, TF_GetCode(s_)); + EXPECT_EQ(string("TF_Output scalar:0 is neither in the function body nor " + "among function inputs. Encountered while creating " + "function 'MyFunc'"), + string(TF_Message(s_))); +} + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/c/c_api_internal.h b/tensorflow/c/c_api_internal.h index f7d25dce8f5..6e44a72e2b9 100644 --- a/tensorflow/c/c_api_internal.h +++ b/tensorflow/c/c_api_internal.h @@ -130,6 +130,11 @@ struct TF_DeviceList { std::vector response; }; +struct TF_Function { + // Currently contains a single function and no gradients + tensorflow::FunctionDefLibrary fdef_lib; +}; + namespace tensorflow { class TensorCApi { @@ -142,6 +147,9 @@ class TensorCApi { }; TF_Tensor* TF_TensorFromTensor(const Tensor& src, TF_Status* status); + +Status MessageToBuffer(const tensorflow::protobuf::Message& in, TF_Buffer* out); + } // end namespace tensorflow #endif // TENSORFLOW_C_C_API_INTERNAL_H_ diff --git a/tensorflow/c/c_api_test.cc b/tensorflow/c/c_api_test.cc index 0aa60fb45dd..c4420290099 100644 --- a/tensorflow/c/c_api_test.cc +++ b/tensorflow/c/c_api_test.cc @@ -829,7 +829,7 @@ TEST(CAPI, ShapeInferenceError) { TF_Operation* vec3 = Const(vec3_tensor.get(), graph, status, "vec3"); ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); - TF_Operation* add = Add(vec2, vec3, graph, status); + TF_Operation* add = AddNoCheck(vec2, vec3, graph, status); ASSERT_NE(TF_OK, TF_GetCode(status)); ASSERT_TRUE(add == nullptr); diff --git a/tensorflow/c/c_test_util.cc b/tensorflow/c/c_test_util.cc index 21603c1a07c..9cd978c97ea 100644 --- a/tensorflow/c/c_test_util.cc +++ b/tensorflow/c/c_test_util.cc @@ -15,7 +15,9 @@ limitations under the License. #include "tensorflow/c/c_test_util.h" +#include "tensorflow/core/framework/function.pb.h" #include "tensorflow/core/framework/tensor.pb.h" +#include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/logging.h" using tensorflow::GraphDef; @@ -36,6 +38,23 @@ TF_Tensor* Int8Tensor(const int64_t* dims, int num_dims, const char* values) { return t; } +TF_Tensor* Int32Tensor(const int64_t* dims, int num_dims, + const int32_t* values) { + int64_t num_values = 1; + for (int i = 0; i < num_dims; ++i) { + num_values *= dims[i]; + } + TF_Tensor* t = + TF_AllocateTensor(TF_INT32, dims, num_dims, sizeof(int32_t) * num_values); + memcpy(TF_TensorData(t), values, sizeof(int32_t) * num_values); + return t; +} + +TF_Tensor* Int32Tensor(const std::vector& values) { + int64_t dims = values.size(); + return Int32Tensor(&dims, 1, values.data()); +} + TF_Tensor* Int32Tensor(int32_t v) { const int num_bytes = sizeof(int32_t); int32_t* values = new int32_t[1]; @@ -44,19 +63,40 @@ TF_Tensor* Int32Tensor(int32_t v) { &Int32Deallocator, nullptr); } -TF_Operation* Placeholder(TF_Graph* graph, TF_Status* s, const char* name) { +// All the *Helper methods are used as a workaround for the restrictions that +// one cannot call ASSERT_* methods in non-void-returning functions (when +// exceptions are disabled during compilation) +void PlaceholderHelper(TF_Graph* graph, TF_Status* s, const char* name, + TF_Operation** op) { TF_OperationDescription* desc = TF_NewOperation(graph, "Placeholder", name); TF_SetAttrType(desc, "dtype", TF_INT32); - return TF_FinishOperation(desc, s); + *op = TF_FinishOperation(desc, s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + ASSERT_NE(*op, nullptr); +} + +TF_Operation* Placeholder(TF_Graph* graph, TF_Status* s, const char* name) { + TF_Operation* op; + PlaceholderHelper(graph, s, name, &op); + return op; +} + +void ConstHelper(TF_Tensor* t, TF_Graph* graph, TF_Status* s, const char* name, + TF_Operation** op) { + TF_OperationDescription* desc = TF_NewOperation(graph, "Const", name); + TF_SetAttrTensor(desc, "value", t, s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + TF_SetAttrType(desc, "dtype", TF_TensorType(t)); + *op = TF_FinishOperation(desc, s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + ASSERT_NE(*op, nullptr); } TF_Operation* Const(TF_Tensor* t, TF_Graph* graph, TF_Status* s, const char* name) { - TF_OperationDescription* desc = TF_NewOperation(graph, "Const", name); - TF_SetAttrTensor(desc, "value", t, s); - if (TF_GetCode(s) != TF_OK) return nullptr; - TF_SetAttrType(desc, "dtype", TF_TensorType(t)); - return TF_FinishOperation(desc, s); + TF_Operation* op; + ConstHelper(t, graph, s, name, &op); + return op; } TF_Operation* ScalarConst(int32_t v, TF_Graph* graph, TF_Status* s, @@ -65,11 +105,39 @@ TF_Operation* ScalarConst(int32_t v, TF_Graph* graph, TF_Status* s, return Const(tensor.get(), graph, s, name); } -TF_Operation* Add(TF_Operation* l, TF_Operation* r, TF_Graph* graph, - TF_Status* s, const char* name) { +void AddHelper(TF_Operation* l, TF_Operation* r, TF_Graph* graph, TF_Status* s, + const char* name, TF_Operation** op, bool check) { TF_OperationDescription* desc = TF_NewOperation(graph, "AddN", name); TF_Output add_inputs[2] = {{l, 0}, {r, 0}}; TF_AddInputList(desc, add_inputs, 2); + *op = TF_FinishOperation(desc, s); + if (check) { + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + ASSERT_NE(*op, nullptr); + } +} + +TF_Operation* Add(TF_Operation* l, TF_Operation* r, TF_Graph* graph, + TF_Status* s, const char* name) { + TF_Operation* op; + AddHelper(l, r, graph, s, name, &op, true); + return op; +} + +TF_Operation* AddNoCheck(TF_Operation* l, TF_Operation* r, TF_Graph* graph, + TF_Status* s, const char* name) { + TF_Operation* op; + AddHelper(l, r, graph, s, name, &op, false); + return op; +} + +TF_Operation* AddWithCtrlDependency(TF_Operation* l, TF_Operation* r, + TF_Graph* graph, TF_Operation* ctrl_op, + TF_Status* s, const char* name) { + TF_OperationDescription* desc = TF_NewOperation(graph, "AddN", name); + TF_Output add_inputs[2] = {{l, 0}, {r, 0}}; + TF_AddInputList(desc, add_inputs, 2); + TF_AddControlInput(desc, ctrl_op); return TF_FinishOperation(desc, s); } @@ -81,11 +149,20 @@ TF_Operation* Add(TF_Output l, TF_Output r, TF_Graph* graph, TF_Status* s, return TF_FinishOperation(desc, s); } -TF_Operation* Neg(TF_Operation* n, TF_Graph* graph, TF_Status* s) { +void NegHelper(TF_Operation* n, TF_Graph* graph, TF_Status* s, + TF_Operation** op) { TF_OperationDescription* desc = TF_NewOperation(graph, "Neg", "neg"); TF_Output neg_input = {n, 0}; TF_AddInput(desc, neg_input); - return TF_FinishOperation(desc, s); + *op = TF_FinishOperation(desc, s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + ASSERT_NE(*op, nullptr); +} + +TF_Operation* Neg(TF_Operation* n, TF_Graph* graph, TF_Status* s) { + TF_Operation* op; + NegHelper(n, graph, s, &op); + return op; } TF_Operation* LessThan(TF_Output l, TF_Output r, TF_Graph* graph, @@ -96,6 +173,32 @@ TF_Operation* LessThan(TF_Output l, TF_Output r, TF_Graph* graph, return TF_FinishOperation(desc, s); } +void Split3Helper(TF_Operation* input, TF_Graph* graph, TF_Status* s, + const char* name, TF_Operation** op) { + TF_Operation* zero = ScalarConst( + 0, graph, s, ::tensorflow::strings::StrCat(name, "_const0").c_str()); + TF_OperationDescription* desc = TF_NewOperation(graph, "Split", name); + TF_AddInput(desc, {zero, 0}); + TF_AddInput(desc, {input, 0}); + TF_SetAttrInt(desc, "num_split", 3); + TF_SetAttrType(desc, "T", TF_INT32); + // Set device to CPU since there is no version of split for int32 on GPU + // TODO(iga): Convert all these helpers and tests to use floats because + // they are usually available on GPUs. After doing this, remove TF_SetDevice + // call in c_api_function_test.cc + TF_SetDevice(desc, "/cpu:0"); + *op = TF_FinishOperation(desc, s); + ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + ASSERT_NE(*op, nullptr); +} + +TF_Operation* Split3(TF_Operation* input, TF_Graph* graph, TF_Status* s, + const char* name) { + TF_Operation* op; + Split3Helper(input, graph, s, name, &op); + return op; +} + bool IsPlaceholder(const tensorflow::NodeDef& node_def) { if (node_def.op() != "Placeholder" || node_def.name() != "feed") { return false; @@ -196,6 +299,18 @@ bool GetNodeDef(TF_Operation* oper, tensorflow::NodeDef* node_def) { return ret; } +bool GetFunctionDef(TF_Function* func, tensorflow::FunctionDef* func_def) { + TF_Status* s = TF_NewStatus(); + TF_Buffer* buffer = TF_NewBuffer(); + TF_FunctionToFunctionDef(func, buffer, s); + bool ret = TF_GetCode(s) == TF_OK; + EXPECT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); + if (ret) ret = func_def->ParseFromArray(buffer->data, buffer->length); + TF_DeleteBuffer(buffer); + TF_DeleteStatus(s); + return ret; +} + bool GetAttrValue(TF_Operation* oper, const char* attr_name, tensorflow::AttrValue* attr_value, TF_Status* s) { TF_Buffer* buffer = TF_NewBuffer(); diff --git a/tensorflow/c/c_test_util.h b/tensorflow/c/c_test_util.h index 0c0ba667bd0..a927739d462 100644 --- a/tensorflow/c/c_test_util.h +++ b/tensorflow/c/c_test_util.h @@ -33,6 +33,13 @@ typedef std::unique_ptr // Create a tensor with values of type TF_INT8 provided by `values`. TF_Tensor* Int8Tensor(const int64_t* dims, int num_dims, const char* values); +// Create a tensor with values of type TF_INT32 provided by `values`. +TF_Tensor* Int32Tensor(const int64_t* dims, int num_dims, + const int32_t* values); + +// Create 1 dimensional tensor with values from `values` +TF_Tensor* Int32Tensor(const std::vector& values); + TF_Tensor* Int32Tensor(int32_t v); TF_Operation* Placeholder(TF_Graph* graph, TF_Status* s, @@ -47,6 +54,13 @@ TF_Operation* ScalarConst(int32_t v, TF_Graph* graph, TF_Status* s, TF_Operation* Add(TF_Operation* l, TF_Operation* r, TF_Graph* graph, TF_Status* s, const char* name = "add"); +TF_Operation* AddNoCheck(TF_Operation* l, TF_Operation* r, TF_Graph* graph, + TF_Status* s, const char* name = "add"); + +TF_Operation* AddWithCtrlDependency(TF_Operation* l, TF_Operation* r, + TF_Graph* graph, TF_Operation* ctrl_op, + TF_Status* s, const char* name = "add"); + TF_Operation* Add(TF_Output l, TF_Output r, TF_Graph* graph, TF_Status* s, const char* name = "add"); @@ -54,6 +68,10 @@ TF_Operation* Neg(TF_Operation* n, TF_Graph* graph, TF_Status* s); TF_Operation* LessThan(TF_Output l, TF_Output r, TF_Graph* graph, TF_Status* s); +// Split `input` along the first dimention into 3 tensors +TF_Operation* Split3(TF_Operation* input, TF_Graph* graph, TF_Status* s, + const char* name = "split3"); + bool IsPlaceholder(const tensorflow::NodeDef& node_def); bool IsScalarConst(const tensorflow::NodeDef& node_def, int v); @@ -66,6 +84,8 @@ bool GetGraphDef(TF_Graph* graph, tensorflow::GraphDef* graph_def); bool GetNodeDef(TF_Operation* oper, tensorflow::NodeDef* node_def); +bool GetFunctionDef(TF_Function* func, tensorflow::FunctionDef* func_def); + bool GetAttrValue(TF_Operation* oper, const char* attr_name, tensorflow::AttrValue* attr_value, TF_Status* s); diff --git a/tensorflow/cc/gradients/math_grad.cc b/tensorflow/cc/gradients/math_grad.cc index 09a15fbe5f1..d90654f2e9a 100644 --- a/tensorflow/cc/gradients/math_grad.cc +++ b/tensorflow/cc/gradients/math_grad.cc @@ -687,6 +687,72 @@ Status MeanGrad(const Scope& scope, const Operation& op, } REGISTER_GRADIENT_OP("Mean", MeanGrad); +Status MinOrMaxGrad(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + // The partial derivative for any input along a "reduced" dimension + // is 1 when it is the min (or max) and 0 everywhere else. So the + // gradient calculation is identical for both operators. + // + // There's a special case for propagating gradients when there are + // multiple minima (or maxima) - we choose to divide the gradient + // equally among all matching inputs. + // + // Please note this comment + // https://github.com/tensorflow/tensorflow/issues/4886#issuecomment-256836063 + // for details. + + // Running example: + // input: [[5, 5, 5], + // [1, 2, -3]] + // reduction_indices: [1] + auto input = op.input(0); + auto reduction_indices = op.input(1); + + // [2, 3] + auto input_shape = Shape(scope, input); + + // [2, 1] + auto output_shape_kept_dims = + ReducedShapeHelper(scope, input_shape, reduction_indices); + + // for op=min (say) + // output = [5, -3] + // y = [[5], + // [-3]] + auto y = Reshape(scope, op.output(0), output_shape_kept_dims); + + // reshape([g1, g2], [2, 1]) = [[g1], + // [g2]] + auto grad = Reshape(scope, grad_inputs[0], output_shape_kept_dims); + + // indicators = equal(y, input) + // = equal([[5], [[5, 5, 5], + // [-3]], [1, 2, -3]]) + // = [[1, 1, 1], + // [0, 0, 1]] + auto indicators = Cast(scope, Equal(scope, y, input), grad_inputs[0].type()); + + // [[3], + // [1]] + auto num_selected = Reshape(scope, Sum(scope, indicators, reduction_indices), + output_shape_kept_dims); + + // [[1/3, 1/3, 1/3], + // [0, 0, 1]] + auto scale = Div(scope, indicators, num_selected); + + // [[g1/3, g1/3, g1/3], + // [0, 0, g2]] + grad_outputs->push_back(Mul(scope, scale, grad)); + + // Stop propagation along reduction_indices + grad_outputs->push_back(NoGradient()); + return scope.status(); +} +REGISTER_GRADIENT_OP("Min", MinOrMaxGrad); +REGISTER_GRADIENT_OP("Max", MinOrMaxGrad); + // MatMulGrad helper function used to compute two MatMul operations // based on input matrix transposition combinations. Status MatMulGradHelper(const Scope& scope, const bool is_batch, diff --git a/tensorflow/cc/gradients/math_grad_test.cc b/tensorflow/cc/gradients/math_grad_test.cc index 62b59b25c7a..5b1558dd820 100644 --- a/tensorflow/cc/gradients/math_grad_test.cc +++ b/tensorflow/cc/gradients/math_grad_test.cc @@ -955,6 +955,55 @@ TEST_F(NaryGradTest, Mean) { RunTest({x}, {x_shape}, {y}, {y_shape}); } +TEST_F(NaryGradTest, Min) { + TensorShape x_shape({2, 3}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); + auto y = Min(scope_, x, {-1}); + // y's shape is the result of reducing x along axes -1 (= 1) + TensorShape y_shape({2}); + Tensor x_init_value = + test::AsTensor({0.5f, 0.7f, 0.2f, 1.0f, 1.5f, -2.8f}, x_shape); + RunTest(x, x_init_value, y, y_shape); +} + +TEST_F(NaryGradTest, Max) { + TensorShape x_shape({2, 3}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); + auto y = Max(scope_, x, {-1}); + // y's shape is the result of reducing x along axes -1 (= 1) + TensorShape y_shape({2}); + Tensor x_init_value = + test::AsTensor({0.5f, 0.7f, 0.2f, 1.0f, 1.5f, -2.8f}, x_shape); + RunTest(x, x_init_value, y, y_shape); +} + +TEST_F(NaryGradTest, MinMulti) { + // Test gradient when there are multiple minima. + // Note that we cannot directly use a test Tensor with multiple + // minima, as the numeric estimator will calculate incorrect + // gradients when perturbing each entry in the Tensor (which then + // changes how many minima exist.) + // Instead, we use a single input that broadcast-multiplies a larger + // tensor with equal values, and apply reduce_min to the multiplied + // result. + TensorShape x_shape({1}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); + auto all_same = Mul(scope_, Const(scope_, {1.f, 1.f, 1.f}), x); + auto y = Min(scope_, all_same, {0}); + // y is a [3] shaped tensor reduced along dimension 0, so it is [1] shaped + TensorShape y_shape({1}); + RunTest({x}, {x_shape}, {y}, {y_shape}); +} + +TEST_F(NaryGradTest, MaxMulti) { + TensorShape x_shape({1}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); + auto all_same = Mul(scope_, Const(scope_, {1.f, 1.f, 1.f}), x); + auto y = Max(scope_, all_same, {0}); + TensorShape y_shape({1}); + RunTest({x}, {x_shape}, {y}, {y_shape}); +} + TEST_F(NaryGradTest, AddN) { TensorShape shape({3, 2, 5}); std::vector xs; diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py index e349aefd4cb..e6862f0d9dd 100644 --- a/tensorflow/compiler/tests/binary_ops_test.py +++ b/tensorflow/compiler/tests/binary_ops_test.py @@ -52,6 +52,12 @@ class BinaryOpsTest(XLATestCase): def testFloatOps(self): for dtype in self.float_types: + self._testBinary( + lambda x, y: math_ops.approximate_equal(x, y, tolerance=0.0001), + np.array([[[[-1, 2.00009999], [-3, 4.01]]]], dtype=dtype), + np.array([[[[-1.001, 2], [-3.00009, 4]]]], dtype=dtype), + expected=np.array([[[[False, True], [True, False]]]], dtype=dtype)) + self._testBinary( gen_math_ops._real_div, np.array([3, 3, -1.5, -8, 44], dtype=dtype), @@ -82,6 +88,12 @@ class BinaryOpsTest(XLATestCase): dtype(4), expected=np.array([[16], [81]], dtype=dtype)) + self._testBinary( + gen_math_ops._reciprocal_grad, + np.array([4, -3, -2, 1], dtype=dtype), + np.array([5, -6, 7, -8], dtype=dtype), + expected=np.array([-80, 54, -28, 8], dtype=dtype)) + self._testBinary( gen_math_ops._sigmoid_grad, np.array([4, 3, 2, 1], dtype=dtype), @@ -107,6 +119,13 @@ class BinaryOpsTest(XLATestCase): expected=np.array( [3.97322869, 2.99258232, 1.99817801, 0.99966466], dtype=dtype)) + self._testBinary( + gen_nn_ops._softsign_grad, + np.array([4, 3, 2, 1], dtype=dtype), + np.array([5, 6, 7, 8], dtype=dtype), + expected=np.array( + [0.11111111, 0.06122449, 0.03125, 0.01234568], dtype=dtype)) + self._testBinary( gen_math_ops._tanh_grad, np.array([4, 3, 2, 1], dtype=dtype), diff --git a/tensorflow/compiler/tests/randomized_tests.cc b/tensorflow/compiler/tests/randomized_tests.cc index a342e37e0ee..49c1699b6ed 100644 --- a/tensorflow/compiler/tests/randomized_tests.cc +++ b/tensorflow/compiler/tests/randomized_tests.cc @@ -888,6 +888,16 @@ TEST_F(OpTest, Any) { }); } +TEST_F(OpTest, ApproximateEqual) { + Repeatedly([this]() { + auto dims = RandomDims(); + return ExpectTfAndXlaOutputsAreClose(OpTestBuilder("ApproximateEqual") + .RandomInput(DT_FLOAT, dims) + .RandomInput(DT_FLOAT, dims) + .Attr("T", DT_FLOAT)); + }); +} + TEST_F(OpTest, Asinh) { Repeatedly([this]() { return ExpectTfAndXlaOutputsAreClose( @@ -1662,11 +1672,9 @@ TEST_F(OpTest, GreaterEqual) { TEST_F(OpTest, L2Loss) { Repeatedly([this]() { - DataType type = Choose({DT_INT32, DT_FLOAT}); - // TODO(b/31644876): scalars currently crash. - return ExpectTfAndXlaOutputsAreClose(OpTestBuilder("L2Loss") - .RandomInput(type, RandomDims(1)) - .Attr("T", type)); + DataType type = DT_FLOAT; + return ExpectTfAndXlaOutputsAreClose( + OpTestBuilder("L2Loss").RandomInput(type).Attr("T", type)); }); } @@ -2165,6 +2173,15 @@ TEST_F(OpTest, Reciprocal) { }); } +TEST_F(OpTest, ReciprocalGrad) { + Repeatedly([this]() { + std::vector dims = RandomDims(); + return ExpectTfAndXlaOutputsAreClose(OpTestBuilder("ReciprocalGrad") + .RandomInput(DT_FLOAT, dims) + .RandomInput(DT_FLOAT, dims) + .Attr("T", DT_FLOAT)); + }); +} TEST_F(OpTest, Relu) { Repeatedly([this]() { return ExpectTfAndXlaOutputsAreClose( @@ -2250,6 +2267,13 @@ TEST_F(OpTest, ReverseV2) { }); } +TEST_F(OpTest, Rint) { + Repeatedly([this]() { + return ExpectTfAndXlaOutputsAreClose( + OpTestBuilder("Rint").RandomInput(DT_FLOAT).Attr("T", DT_FLOAT)); + }); +} + TEST_F(OpTest, Round) { Repeatedly([this]() { return ExpectTfAndXlaOutputsAreClose( @@ -2402,6 +2426,23 @@ TEST_F(OpTest, SoftplusGrad) { }); } +TEST_F(OpTest, Softsign) { + Repeatedly([this]() { + return ExpectTfAndXlaOutputsAreClose( + OpTestBuilder("Softsign").RandomInput(DT_FLOAT).Attr("T", DT_FLOAT)); + }); +} + +TEST_F(OpTest, SoftsignGrad) { + Repeatedly([this]() { + std::vector dims = RandomDims(); + return ExpectTfAndXlaOutputsAreClose(OpTestBuilder("SoftsignGrad") + .RandomInput(DT_FLOAT, dims) + .RandomInput(DT_FLOAT, dims) + .Attr("T", DT_FLOAT)); + }); +} + TEST_F(OpTest, SpaceToBatch) { Repeatedly([this]() { std::vector block_dims = RandomDims(4, 4, 0, 5); diff --git a/tensorflow/compiler/tests/unary_ops_test.py b/tensorflow/compiler/tests/unary_ops_test.py index ca2a438005f..b21f1998a5d 100644 --- a/tensorflow/compiler/tests/unary_ops_test.py +++ b/tensorflow/compiler/tests/unary_ops_test.py @@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import unittest + import numpy as np from six.moves import xrange # pylint: disable=redefined-builtin @@ -161,12 +163,17 @@ class UnaryOpsTest(XLATestCase): np.array([[-1.7, 1.2]], dtype=dtype), expected=np.array([[-2, 1]], dtype=dtype)) + self._assertOpOutputMatchesExpected( + math_ops.is_finite, + np.array([[np.NINF, -2, -1, 0, 0.5, 1, 2, np.inf, np.nan]], + dtype=dtype), + expected=np.array([[0, 1, 1, 1, 1, 1, 1, 0, 0]], dtype=np.bool)) + # Tests for tf.nn ops. self._assertOpOutputMatchesExpected( nn_ops.l2_loss, np.array([[[]]], dtype=dtype), expected=dtype(0)) - # TODO(b/31644876): enable this test case when fixed. - # self._assertOpOutputMatchesExpected(tf.nn.l2_loss, dtype(4), dtype(10)) + self._assertOpOutputMatchesExpected(nn_ops.l2_loss, dtype(4), dtype(8)) self._assertOpOutputMatchesExpected( nn_ops.l2_loss, np.array([[-2, 4]], dtype=dtype), expected=dtype(10)) @@ -198,6 +205,12 @@ class UnaryOpsTest(XLATestCase): np.array([[1e-14, 1e-15, 0.6]], dtype=dtype), expected=np.log1p(np.array([[1e-14, 1e-15, 0.6]], dtype=dtype))) + self._assertOpOutputMatchesExpected( + math_ops.rint, + np.array([[-1.7, 1.2, 4.0, 0.0], [-3.5, -2.5, -1.5, -0.5], + [0.5, 1.5, 2.5, 3.5]], dtype=dtype), + expected=np.array([[-2, 1, 4, 0], [-4, -2, -2, 0], [0, 2, 2, 4]], + dtype=dtype)) self._assertOpOutputMatchesExpected( math_ops.round, np.array([[-1.7, 1.2, 4.0, 0.0], [-3.5, -2.5, -1.5, -0.5], @@ -301,6 +314,12 @@ class UnaryOpsTest(XLATestCase): np.array([[-2, 0, 8]], dtype=dtype), expected=np.array([[0.126928, 0.6931472, 8.0003354]], dtype=dtype)) + self._assertOpOutputMatchesExpected( + nn_ops.softsign, + np.array([[-2, -1, 0, 1, 2]], dtype=dtype), + expected=np.array([[-0.66666669, -0.5, 0, 0.5, 0.66666669]], + dtype=dtype)) + self._assertOpOutputMatchesExpected( math_ops.is_finite, np.array( @@ -335,6 +354,23 @@ class UnaryOpsTest(XLATestCase): np.array([[4, 3], [2, 1]], dtype=dtype), expected=np.array([[1, 1], [1, 1]], dtype=dtype)) + # TODO(phawkins): these tests fail unless fastmath optimizations + # are disabled. Use more robust IsInf/IsNaN detection and enable these + # tests. + @unittest.skip("test case fails in fast-math mode") + def testIsInfAndIsNan(self): + for dtype in self.float_types: + self._assertOpOutputMatchesExpected( + math_ops.is_inf, + np.array([[np.NINF, -2, -1, 0, 0.5, 1, 2, np.inf, np.nan]], + dtype=dtype), + expected=np.array([[1, 0, 0, 0, 0, 0, 0, 1, 0]], dtype=np.bool)) + self._assertOpOutputMatchesExpected( + math_ops.is_nan, + np.array([[np.NINF, -2, -1, 0, 0.5, 1, 2, np.inf, np.nan]], + dtype=dtype), + expected=np.array([[0, 0, 0, 0, 0, 0, 0, 0, 1]], dtype=np.bool)) + def testLogicalOps(self): self._assertOpOutputMatchesExpected( math_ops.logical_not, diff --git a/tensorflow/compiler/tf2xla/kernels/BUILD b/tensorflow/compiler/tf2xla/kernels/BUILD index d09e721c936..6e6c5dc17f5 100644 --- a/tensorflow/compiler/tf2xla/kernels/BUILD +++ b/tensorflow/compiler/tf2xla/kernels/BUILD @@ -31,7 +31,6 @@ tf_kernel_library( "function_ops.cc", "gather_op.cc", "identity_op.cc", - "is_finite_op.cc", "l2loss_op.cc", "lrn_ops.cc", "matmul_op.cc", diff --git a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc index f9bb1e2fb1d..58538b45137 100644 --- a/tensorflow/compiler/tf2xla/kernels/binary_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/binary_ops.cc @@ -102,6 +102,7 @@ XLA_MAKE_BINARY(Mod, b->Rem(lhs, rhs, extend_dimensions)); XLA_MAKE_BINARY(Maximum, b->Max(lhs, rhs, extend_dimensions)); XLA_MAKE_BINARY(Minimum, b->Min(lhs, rhs, extend_dimensions)); XLA_MAKE_BINARY(RealDiv, b->Div(lhs, rhs, extend_dimensions)); +XLA_MAKE_BINARY(ReciprocalGrad, b->Neg(b->Mul(rhs, b->Mul(lhs, lhs)))); XLA_MAKE_BINARY( RsqrtGrad, b->Mul(b->Pow(lhs, XlaHelpers::IntegerLiteral(b, input_type(0), 3)), @@ -140,6 +141,11 @@ XLA_MAKE_BINARY(SoftplusGrad, b->Div(lhs, b->Add(b->Exp(b->Neg(rhs)), XlaHelpers::One(b, input_type(1))))); +// softsigngrad(gradients, features) = gradients / (1 + abs(features)) ** 2 +XLA_MAKE_BINARY(SoftsignGrad, + b->Div(lhs, Square(b, b->Add(XlaHelpers::One(b, input_type(0)), + b->Abs(rhs))))); + XLA_MAKE_BINARY(TanhGrad, b->Mul(rhs, b->Sub(XlaHelpers::One(b, input_type(0)), b->Mul(lhs, lhs)))); @@ -147,5 +153,24 @@ XLA_MAKE_BINARY(Pow, b->Pow(lhs, rhs, extend_dimensions)); #undef XLA_MAKE_BINARY +class ApproximateEqualOp : public XlaOpKernel { + public: + explicit ApproximateEqualOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("tolerance", &tolerance_)); + } + + // Computes the max of the scalar input x and 0. + void Compile(XlaOpKernelContext* ctx) override { + xla::ComputationBuilder* b = ctx->builder(); + auto result = b->Lt(b->Abs(b->Sub(ctx->Input(0), ctx->Input(1))), + XlaHelpers::FloatLiteral(b, input_type(0), tolerance_)); + ctx->SetOutput(0, result); + } + + private: + float tolerance_; +}; +REGISTER_XLA_OP(Name("ApproximateEqual"), ApproximateEqualOp); + } // namespace } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/kernels/is_finite_op.cc b/tensorflow/compiler/tf2xla/kernels/is_finite_op.cc deleted file mode 100644 index 788dcee5443..00000000000 --- a/tensorflow/compiler/tf2xla/kernels/is_finite_op.cc +++ /dev/null @@ -1,43 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/compiler/tf2xla/xla_helpers.h" -#include "tensorflow/compiler/tf2xla/xla_op_kernel.h" -#include "tensorflow/compiler/tf2xla/xla_op_registry.h" -#include "tensorflow/compiler/xla/literal_util.h" -#include "tensorflow/core/platform/macros.h" -#include "tensorflow/core/platform/types.h" -#include "tensorflow/core/util/bcast.h" - -namespace tensorflow { -namespace { - -class IsFiniteOp : public XlaOpKernel { - public: - explicit IsFiniteOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {} - - void Compile(XlaOpKernelContext* ctx) override { - xla::ComputationDataHandle input = ctx->Input(0); - ctx->SetOutput(0, ctx->builder()->IsFinite(input)); - } - - private: - TF_DISALLOW_COPY_AND_ASSIGN(IsFiniteOp); -}; - -REGISTER_XLA_OP(Name("IsFinite"), IsFiniteOp); - -} // anonymous namespace -} // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/kernels/unary_ops.cc b/tensorflow/compiler/tf2xla/kernels/unary_ops.cc index 7b39f0533b4..6b8f5ec7b33 100644 --- a/tensorflow/compiler/tf2xla/kernels/unary_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/unary_ops.cc @@ -73,8 +73,12 @@ XLAJIT_MAKE_UNARY(Exp, b->Exp(x)); XLAJIT_MAKE_UNARY(Expm1, b->Sub(b->Exp(x), XlaHelpers::One(b, input_type(0)))); XLAJIT_MAKE_UNARY(Floor, b->Floor(x)); -// Returns 0 if x is 0, -1 if x < 0 and 1 if x > 0. -XLAJIT_MAKE_UNARY(Sign, b->Sign(x)); +XLAJIT_MAKE_UNARY(IsFinite, b->IsFinite(x)); +XLAJIT_MAKE_UNARY(IsInf, b->Eq(b->Abs(x), + XlaHelpers::FloatLiteral( + b, input_type(0), + std::numeric_limits::infinity()))); +XLAJIT_MAKE_UNARY(IsNan, b->Ne(x, x)); // Return 1/x XLAJIT_MAKE_UNARY(Inv, b->Div(XlaHelpers::One(b, input_type(0)), x)); XLAJIT_MAKE_UNARY(Reciprocal, b->Div(XlaHelpers::One(b, input_type(0)), x)); @@ -105,6 +109,12 @@ static xla::ComputationDataHandle Round(xla::ComputationBuilder* b, b->Add(round_val, one), round_val); } +XLAJIT_MAKE_UNARY(Rint, Round(b, input_type(0), x)); +XLAJIT_MAKE_UNARY(Round, Round(b, input_type(0), x)); + +XLAJIT_MAKE_UNARY(Rsqrt, + b->Pow(x, XlaHelpers::FloatLiteral(b, input_type(0), -0.5))); + // Expresses sigmoid as a rescaled tanh: sigmoid(x) == (tanh(x/2) + 1) / 2. static xla::ComputationDataHandle Sigmoid(xla::ComputationBuilder* b, DataType dtype, @@ -112,16 +122,19 @@ static xla::ComputationDataHandle Sigmoid(xla::ComputationBuilder* b, auto half = XlaHelpers::FloatLiteral(b, dtype, 0.5); return b->Add(half, b->Mul(half, b->Tanh(b->Mul(half, x)))); } - -XLAJIT_MAKE_UNARY(Round, Round(b, input_type(0), x)); -XLAJIT_MAKE_UNARY(Rsqrt, - b->Pow(x, XlaHelpers::FloatLiteral(b, input_type(0), -0.5))); XLAJIT_MAKE_UNARY(Sigmoid, Sigmoid(b, input_type(0), x)); + +// Returns 0 if x is 0, -1 if x < 0 and 1 if x > 0. +XLAJIT_MAKE_UNARY(Sign, b->Sign(x)); XLAJIT_MAKE_UNARY(Sinh, b->Mul(b->Sub(b->Exp(x), b->Exp(b->Neg(x))), XlaHelpers::FloatLiteral(b, input_type(0), 0.5))); XLAJIT_MAKE_UNARY(Softplus, b->Log(b->Add(b->Exp(x), XlaHelpers::One(b, input_type(0))))); +// softsign(x) = x / (abs(x) + 1) +XLAJIT_MAKE_UNARY(Softsign, + b->Div(x, + b->Add(b->Abs(x), XlaHelpers::One(b, input_type(0))))); XLAJIT_MAKE_UNARY(Sqrt, b->Pow(x, XlaHelpers::FloatLiteral(b, input_type(0), 0.5))); XLAJIT_MAKE_UNARY(Square, b->Mul(x, x)); diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 7e59b70abe0..98cc3401c14 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -847,6 +847,7 @@ cc_test( srcs = ["hlo_ordering_test.cc"], deps = [ ":hlo", + ":hlo_dataflow_analysis", ":hlo_ordering", ":hlo_scheduling", "//tensorflow/compiler/xla:shape_util", diff --git a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc index bef4ecd480d..40fa3a67bde 100644 --- a/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc +++ b/tensorflow/compiler/xla/service/cpu/parallel_cpu_executable.cc @@ -241,7 +241,7 @@ Status Executor::Run() { completion_queue_.pop_front(); break; } - } while (1); + } while (true); TF_ASSIGN_OR_RETURN(const BufferAllocation::Slice result_slice, assignment_->GetUniqueTopLevelSlice(instruction)); void* result_buffer = diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor.cc b/tensorflow/compiler/xla/service/dfs_hlo_visitor.cc index 669ebb55bec..6efd0bcee58 100644 --- a/tensorflow/compiler/xla/service/dfs_hlo_visitor.cc +++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor.cc @@ -24,16 +24,14 @@ limitations under the License. namespace xla { -Status DfsHloVisitor::HandleElementwiseUnary(HloInstruction* hlo, - HloOpcode opcode) { +Status DfsHloVisitor::HandleElementwiseUnary(HloInstruction* hlo) { return Unimplemented("DfsHloVisitor::HandleElementwiseUnary: %s", - HloOpcodeString(opcode).c_str()); + HloOpcodeString(hlo->opcode()).c_str()); } -Status DfsHloVisitor::HandleElementwiseBinary(HloInstruction* hlo, - HloOpcode opcode) { +Status DfsHloVisitor::HandleElementwiseBinary(HloInstruction* hlo) { return Unimplemented("DfsHloVisitor::HandleElementwiseBinary: %s", - HloOpcodeString(opcode).c_str()); + HloOpcodeString(hlo->opcode()).c_str()); } DfsHloVisitor::VisitState DfsHloVisitor::GetVisitState( diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h index a1a3a882c7a..2f21043a1d3 100644 --- a/tensorflow/compiler/xla/service/dfs_hlo_visitor.h +++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor.h @@ -63,37 +63,37 @@ class DfsHloVisitor { // These routines are self-descriptive, see class comment for usage // information. - virtual Status HandleElementwiseUnary(HloInstruction* hlo, HloOpcode opcode); - virtual Status HandleElementwiseBinary(HloInstruction* hlo, HloOpcode opcode); + virtual Status HandleElementwiseUnary(HloInstruction* hlo); + virtual Status HandleElementwiseBinary(HloInstruction* hlo); virtual Status HandleClamp(HloInstruction* clamp, HloInstruction* min, HloInstruction* arg, HloInstruction* max) = 0; virtual Status HandleSelect(HloInstruction* select, HloInstruction* pred, HloInstruction* on_true, HloInstruction* on_false) = 0; virtual Status HandleMaximum(HloInstruction* maximum) { - return HandleElementwiseBinary(maximum, HloOpcode::kMaximum); + return HandleElementwiseBinary(maximum); } virtual Status HandleMinimum(HloInstruction* minimum) { - return HandleElementwiseBinary(minimum, HloOpcode::kMinimum); + return HandleElementwiseBinary(minimum); } virtual Status HandleConcatenate( HloInstruction* concatenate, tensorflow::gtl::ArraySlice operands) = 0; virtual Status HandleConvert(HloInstruction* convert) { - return HandleElementwiseUnary(convert, HloOpcode::kConvert); + return HandleElementwiseUnary(convert); } virtual Status HandleCopy(HloInstruction* copy) { - return HandleElementwiseUnary(copy, HloOpcode::kCopy); + return HandleElementwiseUnary(copy); } virtual Status HandleMultiply(HloInstruction* multiply, HloInstruction* lhs, HloInstruction* rhs) { - return HandleElementwiseBinary(multiply, HloOpcode::kMultiply); + return HandleElementwiseBinary(multiply); } virtual Status HandleDot(HloInstruction* dot, HloInstruction* lhs, HloInstruction* rhs) = 0; virtual Status HandlePower(HloInstruction* power, HloInstruction* lhs, HloInstruction* rhs) { - return HandleElementwiseBinary(power, HloOpcode::kPower); + return HandleElementwiseBinary(power); } virtual Status HandleConvolution(HloInstruction* convolution, HloInstruction* lhs, HloInstruction* rhs, @@ -101,73 +101,72 @@ class DfsHloVisitor { virtual Status HandleCrossReplicaSum(HloInstruction* crs) = 0; virtual Status HandleCompare(HloInstruction* compare, HloOpcode opcode, HloInstruction* lhs, HloInstruction* rhs) { - return HandleElementwiseBinary(compare, opcode); + return HandleElementwiseBinary(compare); } virtual Status HandleAdd(HloInstruction* add, HloInstruction* lhs, HloInstruction* rhs) { - return HandleElementwiseBinary(add, HloOpcode::kAdd); + return HandleElementwiseBinary(add); } virtual Status HandleDivide(HloInstruction* divide, HloInstruction* lhs, HloInstruction* rhs) { - return HandleElementwiseBinary(divide, HloOpcode::kDivide); + return HandleElementwiseBinary(divide); } virtual Status HandleRemainder(HloInstruction* remainder, HloInstruction* lhs, HloInstruction* rhs) { - return HandleElementwiseBinary(remainder, HloOpcode::kRemainder); + return HandleElementwiseBinary(remainder); } virtual Status HandleSubtract(HloInstruction* subtract, HloInstruction* lhs, HloInstruction* rhs) { - return HandleElementwiseBinary(subtract, HloOpcode::kSubtract); + return HandleElementwiseBinary(subtract); } virtual Status HandleAbs(HloInstruction* abs, HloInstruction* operand) { - return HandleElementwiseUnary(abs, HloOpcode::kAbs); + return HandleElementwiseUnary(abs); } virtual Status HandleSign(HloInstruction* sign, HloInstruction* operand) { - return HandleElementwiseUnary(sign, HloOpcode::kSign); + return HandleElementwiseUnary(sign); } virtual Status HandleNegate(HloInstruction* negate, HloInstruction* operand) { - return HandleElementwiseUnary(negate, HloOpcode::kNegate); + return HandleElementwiseUnary(negate); } virtual Status HandleExp(HloInstruction* exp, HloInstruction* operand) { - return HandleElementwiseUnary(exp, HloOpcode::kExp); + return HandleElementwiseUnary(exp); } virtual Status HandleFloor(HloInstruction* floor, HloInstruction* operand) { - return HandleElementwiseUnary(floor, HloOpcode::kFloor); + return HandleElementwiseUnary(floor); } virtual Status HandleCeil(HloInstruction* ceil, HloInstruction* operand) { - return HandleElementwiseUnary(ceil, HloOpcode::kCeil); + return HandleElementwiseUnary(ceil); } virtual Status HandleLog(HloInstruction* log, HloInstruction* operand) { - return HandleElementwiseUnary(log, HloOpcode::kLog); + return HandleElementwiseUnary(log); } virtual Status HandleCos(HloInstruction* cos, HloInstruction* operand) { - return HandleElementwiseUnary(cos, HloOpcode::kCos); + return HandleElementwiseUnary(cos); } virtual Status HandleSin(HloInstruction* sin, HloInstruction* operand) { - return HandleElementwiseUnary(sin, HloOpcode::kSin); + return HandleElementwiseUnary(sin); } virtual Status HandleTanh(HloInstruction* tanh, HloInstruction* operand) { - return HandleElementwiseUnary(tanh, HloOpcode::kTanh); + return HandleElementwiseUnary(tanh); } virtual Status HandleIsFinite(HloInstruction* is_finite, HloInstruction* operand) { - return HandleElementwiseUnary(is_finite, HloOpcode::kIsFinite); + return HandleElementwiseUnary(is_finite); } virtual Status HandleLogicalAnd(HloInstruction* logical_and, HloInstruction* lhs, HloInstruction* rhs) { - return HandleElementwiseBinary(logical_and, HloOpcode::kLogicalAnd); + return HandleElementwiseBinary(logical_and); } virtual Status HandleLogicalNot(HloInstruction* logical_not, HloInstruction* operand) { - return HandleElementwiseUnary(logical_not, HloOpcode::kLogicalNot); + return HandleElementwiseUnary(logical_not); } virtual Status HandleLogicalOr(HloInstruction* logical_or, HloInstruction* lhs, HloInstruction* rhs) { - return HandleElementwiseBinary(logical_or, HloOpcode::kLogicalOr); + return HandleElementwiseBinary(logical_or); } virtual Status HandleReducePrecision(HloInstruction* reduce_precision) { - return HandleElementwiseUnary(reduce_precision, - HloOpcode::kReducePrecision); + return HandleElementwiseUnary(reduce_precision); } virtual Status HandleInfeed(HloInstruction* infeed) = 0; diff --git a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h index 10f8ae9b044..a5fe1205984 100644 --- a/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h +++ b/tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h @@ -41,12 +41,10 @@ class DfsHloVisitorWithDefault : public DfsHloVisitor { // Default action performed on HloInstruction. virtual Status DefaultAction(HloInstruction* hlo_instruction) = 0; - Status HandleElementwiseUnary(HloInstruction* hlo, - HloOpcode opcode) override { + Status HandleElementwiseUnary(HloInstruction* hlo) override { return DefaultAction(hlo); } - Status HandleElementwiseBinary(HloInstruction* hlo, - HloOpcode opcode) override { + Status HandleElementwiseBinary(HloInstruction* hlo) override { return DefaultAction(hlo); } diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc index b02138325ed..350dbc321fb 100644 --- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc @@ -709,7 +709,7 @@ llvm_ir::ElementGenerator ElementalIrEmitter::MakeRngElementGenerator( } else { auto r = ir_builder_->CreateSub(q, p); auto leading_zeros = llvm_ir::EmitCallToIntrinsic( - llvm::Intrinsic::ctlz, {r, ir_builder_->getInt1(1)}, + llvm::Intrinsic::ctlz, {r, ir_builder_->getInt1(true)}, {param_ir_type}, ir_builder_); auto in_block = ir_builder_->GetInsertBlock(); diff --git a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc index d044462f9a7..5edaaba3ebe 100644 --- a/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/gpu/elemental_ir_emitter.cc @@ -334,7 +334,7 @@ llvm_ir::ElementGenerator GpuElementalIrEmitter::MakeElementGenerator( SetToFirstInsertPoint(loops.GetInnerLoopBodyBasicBlock(), ir_builder_); IrArray::Index input_index(index.size()); - llvm::Value* in_bounds = ir_builder_->getInt1(1); + llvm::Value* in_bounds = ir_builder_->getInt1(true); for (size_t i = 0; i < index.size(); ++i) { llvm::Value* stridden_index = ir_builder_->CreateNSWMul( index[i], ir_builder_->getInt64(window.dimensions(i).stride())); diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc index 2a999f52f01..2e7765c4c61 100644 --- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc +++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc @@ -389,7 +389,7 @@ StatusOr CompileModuleToPtx(llvm::Module* module, // Loop unrolling exposes more opportunities for SROA. Therefore, we run SROA // again after the standard optimization passes [http://b/13329423]. - // TODO(jingyue): SROA may further expose more optimization opportunities, such + // TODO(jingyue): SROA may further expose more optimization opportunities such // as more precise alias analysis and more function inlining (SROA may change // the inlining cost of a function). For now, running SROA already emits good // enough code for the evaluated benchmarks. We may want to run more diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc index 0beea423798..3dd8ac6dc5f 100644 --- a/tensorflow/compiler/xla/service/hlo_alias_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_alias_analysis.cc @@ -37,6 +37,230 @@ namespace xla { using ::tensorflow::strings::StrAppend; using ::tensorflow::strings::StrCat; +// Data structure used to construct the alias analysis. Thrown away after alias +// analysis is complete. This data structure keeps track of which sets of +// HloValues must be in the same HloBuffer. This is maintained as a map from a +// buffer identifier (BufferNumber) to set of HLoValues. +// +// Initially each value is its own buffer. In MergeAliasedBuffers, sets of +// values which must share the same buffer are merged together. The end result +// is a partitioning of all HloValues into sets where each set needs its own +// HloBuffer. By performing this analysis without constructing HloBuffers on the +// fly, we can after-the-fact construct a vector of contiguously numbered +// HloBuffers after the buffer requirement has been determined. +class BufferValueMap { + public: + // A unique identifier for a set of colocated values which must share the same + // buffer. This is not necessarily the same as the HloBuffer::Id which will + // ultimately contain the values. The reason is that HloBuffer::Id's are + // contiguous, while BufferNumbers may not be. BufferNumbers may not be + // dense because buffers may be created and destroyed during the analysis + // construction process. + using BufferNumber = int64; + + explicit BufferValueMap(const HloDataflowAnalysis& dataflow) + : dataflow_(dataflow) { + buffers_.reserve(dataflow_.values().size()); + value_to_buffer_number_.reserve(dataflow_.values().size()); + for (const HloValue* value : dataflow_.values()) { + BufferNumber buffer_number = next_buffer_number_++; + buffers_[buffer_number].insert(value); + value_to_buffer_number_[value] = buffer_number; + } + } + + // Merge together sets of HloValues which must be in the same HloBuffer + // because of aliasing rules (eg, in-place kWhile instruction). + void MergeAliasedBuffers() { + for (const HloValue* value : dataflow_.values()) { + VLOG(3) << "Merging colocated values, value: " << value->ToShortString(); + + // Gather the set of buffers with aliasing rules (eg, kWhile) which this + // value must be contained in. + std::vector aliased_buffers = ComputeAliasedBuffers(*value); + + BufferNumber current_buffer = value_to_buffer_number_.at(value); + if (aliased_buffers.empty()) { + // The buffer containing 'value' aliases no other buffers. If the buffer + // containing 'value' already only contains 'value', then no change is + // necessary. If the buffer containing 'value' does contain other + // values, then remove 'value' from the buffer and create a new buffer + // containing only 'value' + if (buffers_.at(current_buffer).size() == 1) { + CHECK_EQ(*buffers_.at(current_buffer).begin(), value); + } else { + MoveValueToNewBuffer(*value); + } + } else { + // If multiple buffers are aliased merge these buffers together into a + // single buffer (arbitrarily chosen as the first buffer in the vector). + if (aliased_buffers.size() > 1) { + for (int64 i = 1; i < aliased_buffers.size(); ++i) { + MergeBuffers(/*from=*/aliased_buffers[i], + /*to=*/aliased_buffers[0]); + } + } + BufferNumber new_buffer = aliased_buffers[0]; + if (current_buffer != new_buffer) { + MoveValueToBuffer(*value, new_buffer); + } + } + } + } + + // Compute and return a sorted vector of all BufferNumbers. Can be used to + // iterate through all buffers stabily. + std::vector ComputeSortedBufferNumbers() const { + std::vector buffer_numbers; + for (const auto& pair : buffers_) { + buffer_numbers.push_back(pair.first); + } + std::sort(buffer_numbers.begin(), buffer_numbers.end()); + return buffer_numbers; + } + + // Return a set of all the values in the given buffer. + const tensorflow::gtl::FlatSet& GetValuesInBuffer( + BufferNumber buffer_number) const { + return buffers_.at(buffer_number); + } + + private: + // Create a new buffer. + void NewBuffer(const HloValue& value) { + BufferNumber buffer_number = next_buffer_number_++; + buffers_[buffer_number].insert(&value); + value_to_buffer_number_[&value] = buffer_number; + } + + // Move the given value into a new buffer containing only the value. + void MoveValueToNewBuffer(const HloValue& value) { + BufferNumber new_buffer_number = next_buffer_number_++; + buffers_[new_buffer_number]; + MoveValueToBuffer(value, new_buffer_number); + } + + // Move the given value into the given buffer. + void MoveValueToBuffer(const HloValue& value, BufferNumber buffer_number) { + BufferNumber old_buffer_number = value_to_buffer_number_.at(&value); + buffers_.at(old_buffer_number).erase(&value); + if (buffers_.at(old_buffer_number).empty()) { + buffers_.erase(old_buffer_number); + } + + buffers_.at(buffer_number).insert(&value); + value_to_buffer_number_.at(&value) = buffer_number; + } + + // Merge the buffer 'from' into the buffer 'to'. + void MergeBuffers(BufferNumber from, BufferNumber to) { + auto& from_value_set = buffers_.at(from); + buffers_.at(to).insert(from_value_set.begin(), from_value_set.end()); + // NOTE: using a union-find algorithm to hold the colocated values might be + // faster. + for (const HloValue* value : from_value_set) { + value_to_buffer_number_.at(value) = to; + } + buffers_.erase(from); + } + + BufferNumber GetBufferForValue(const HloValue& value) { + return value_to_buffer_number_.at(&value); + } + + // Compute and return a vector of buffers that the given value must be + // contained in due to HLO aliasing rules. + std::vector ComputeAliasedBuffers(const HloValue& value) { + // Value is init of a while (use is while). + std::vector aliased_buffers; + for (const HloUse& use : value.uses()) { + VLOG(1) << "use of value " << value.ToShortString() << ": " << use; + if (use.instruction->opcode() == HloOpcode::kWhile) { + // Determine the while value that this shares a buffer with. + const HloValue& while_value = + dataflow_.GetUniqueValueAt(use.instruction, use.operand_index); + aliased_buffers.push_back(GetBufferForValue(while_value)); + VLOG(3) << " value is init value to a while; must share buffer with " + "while value " + << while_value.ToShortString(); + } + } + + // Value is a parameter of a while body/condition. + if (value.defining_instruction()->opcode() == HloOpcode::kParameter) { + const HloComputation* computation = + value.defining_instruction()->parent(); + const CallGraphNode& call_graph_node = + dataflow_.call_graph().GetNode(computation); + for (const CallSite& callsite : call_graph_node.caller_callsites()) { + if (callsite.instruction()->opcode() == HloOpcode::kWhile) { + // Call graph must have been flattened. + CHECK_EQ(call_graph_node.caller_callsites().size(), 1); + + const HloValue& while_value = dataflow_.GetUniqueValueAt( + callsite.instruction(), value.defining_index()); + VLOG(3) << " value is parameter value of the body or condition of a " + "while; must share buffer with while value " + << while_value.ToShortString(); + aliased_buffers.push_back(GetBufferForValue(while_value)); + } + } + } + + // Value is the root of a while body. + for (const HloPosition& position : value.positions()) { + const HloComputation* computation = position.instruction->parent(); + const CallGraphNode& call_graph_node = + dataflow_.call_graph().GetNode(computation); + if (position.instruction == computation->root_instruction()) { + for (const CallSite& callsite : call_graph_node.caller_callsites()) { + if (callsite.instruction()->opcode() == HloOpcode::kWhile && + callsite.instruction()->while_body() == computation) { + // Call graph must have been flattened. + CHECK_EQ(call_graph_node.caller_callsites().size(), 1); + + const HloValue& while_value = dataflow_.GetUniqueValueAt( + callsite.instruction(), position.index); + VLOG(3) << " value is root the body computation of a while; must " + "share buffer with while value " + << while_value.ToShortString(); + aliased_buffers.push_back(GetBufferForValue(while_value)); + } + } + } + } + + // Value is the output of the while instruction itself. + if (value.defining_instruction()->opcode() == HloOpcode::kWhile) { + VLOG(3) << " value is output of a while instruction"; + aliased_buffers.push_back(GetBufferForValue(value)); + } + + // Uniquify aliased buffers. + std::sort(aliased_buffers.begin(), aliased_buffers.end()); + aliased_buffers.erase( + std::unique(aliased_buffers.begin(), aliased_buffers.end()), + aliased_buffers.end()); + + return aliased_buffers; + } + + // Dataflow analysis used to construct the buffer map. + const HloDataflowAnalysis& dataflow_; + + // A map containing the set of values contained in each buffer. + tensorflow::gtl::FlatMap> + buffers_; + + // A map indicating which buffer each value is contained in. + tensorflow::gtl::FlatMap + value_to_buffer_number_; + + // The buffer number of the next buffer to be created. + BufferNumber next_buffer_number_ = 0; +}; + HloAliasAnalysis::HloAliasAnalysis(HloModule* module) : module_(module) {} const HloBuffer& HloAliasAnalysis::GetUniqueBufferAt( @@ -99,10 +323,11 @@ bool HloAliasAnalysis::InstructionBuffersAreDistinct( } } else { // It's possible for multiple values at this index to have the same - // HloBuffer. This does not result in non-distictness. To account for this - // case, add all of the buffers at this index after checking whether each - // buffer exists at an earlier index. This is a corner case, however, as - // the number of values at an index is almost always one. + // HloBuffer. This does not result in non-distictness. To account for + // this case, add all of the buffers at this index after checking + // whether each buffer exists at an earlier index. This is a corner + // case, however, as the number of values at an index is almost always + // one. std::vector buffers_at_this_index; for (const HloValue* value : value_set.values()) { const HloBuffer* buffer = &GetBufferContainingValue(*value); @@ -118,15 +343,6 @@ bool HloAliasAnalysis::InstructionBuffersAreDistinct( return true; } -void HloAliasAnalysis::InitializeBufferSets() { - // Initially define a buffer for every HloValue in the module. - for (const HloValue& value : dataflow_analysis_->values()) { - HloBuffer& buffer = NewHloBuffer(); - buffer.AddValue(value); - value_to_buffer_[&value] = &buffer; - } -} - Status HloAliasAnalysis::Verify() const { // Verify consistency between the value_to_buffer_ map and // HloBuffer::values(). @@ -137,9 +353,8 @@ Status HloAliasAnalysis::Verify() const { value) != buffer.values().end()); } - for (const auto& pair : buffers_) { - const HloBuffer::Id id = pair.first; - const HloBuffer& buffer = pair.second; + for (HloBuffer::Id id = 0; id < buffers_.size(); ++id) { + const HloBuffer& buffer = buffers_[id]; TF_RET_CHECK(buffer.id() == id); HloValue::Id last_value_id = -1; @@ -152,116 +367,9 @@ Status HloAliasAnalysis::Verify() const { } } - if (!buffers_vector_.empty()) { - // buffers_vector_ should be a vector of all HloBuffers sorted by id. - std::vector buffers; - for (const auto& id_buffer : buffers_) { - buffers.push_back(&id_buffer.second); - } - std::sort(buffers.begin(), buffers.end(), HloBuffer::IdLessThan); - TF_RET_CHECK(buffers_vector_ == buffers); - } - return Status::OK(); } -Status HloAliasAnalysis::VerifyAgainstReference() const { - TF_RETURN_IF_ERROR(Verify()); - - TF_ASSIGN_OR_RETURN(std::unique_ptr reference, - Run(module_)); - TF_RETURN_IF_ERROR(reference->Verify()); - - VLOG(2) << "This analysis:"; - XLA_VLOG_LINES(2, ToString()); - VLOG(2) << "Reference:"; - XLA_VLOG_LINES(2, reference->ToString()); - - // Create map from HloValue in the reference analysis to HloValue in this - // analysis and vice versa. - tensorflow::gtl::FlatMap reference_to_this; - tensorflow::gtl::FlatMap this_to_reference; - for (const HloValue& value : dataflow_analysis().values()) { - const HloValue& reference_value = - reference->dataflow_analysis().GetValueDefinedAt( - value.defining_instruction(), value.defining_index()); - reference_to_this[&reference_value] = &value; - this_to_reference[&value] = &reference_value; - } - - TF_RET_CHECK(buffers_.size() == reference->buffers_.size()) - << "Different number of buffers (" << buffers_.size() - << " != " << reference->buffers_.size() << ")"; - for (const auto& pair : reference->buffers_) { - const HloBuffer& reference_buffer = pair.second; - - // Find the corresponding buffer in the reference by taking the first value - // in the buffer, finding the corresponding value in the reference, and then - // finding the buffer holding that value. - TF_RET_CHECK(!reference_buffer.values().empty()); - const HloValue* reference_value = reference_buffer.values()[0]; - const HloValue* value = reference_to_this.at(reference_value); - const HloBuffer& buffer = GetBufferContainingValue(*value); - - // The buffer and the reference should have the exact same values. To make - // comparison easy, sort the values in the reference buffer identically to - // the values in the non-reference buffer (ie, by the corresponding id of - // the non-reference value). - std::vector reference_values = reference_buffer.values(); - std::sort(reference_values.begin(), reference_values.end(), - [&reference_to_this](const HloValue* a, const HloValue* b) { - return reference_to_this.at(a)->id() < - reference_to_this.at(b)->id(); - }); - TF_RET_CHECK(reference_values.size() == buffer.values().size()); - for (int i = 0; i < buffer.values().size(); ++i) { - TF_RET_CHECK(*reference_values[i] == *buffer.values()[i]) - << "Buffer:\n " << buffer - << "\ndoes not have the same values as reference buffer:\n " - << reference_buffer; - } - } - - return Status::OK(); -} - -HloBuffer& HloAliasAnalysis::NewHloBuffer() { - HloBuffer::Id buffer_id = next_buffer_id_++; - auto emplaced = buffers_.emplace(std::piecewise_construct, - std::forward_as_tuple(buffer_id), - std::forward_as_tuple(buffer_id)); - CHECK(emplaced.second); - - buffers_vector_.clear(); - - return emplaced.first->second; -} - -void HloAliasAnalysis::MoveValueToNewBuffer(const HloValue& value) { - HloBuffer& new_buffer = NewHloBuffer(); - MoveValueToBuffer(value, &new_buffer); - - VLOG(3) << "Moved value " << value.ToShortString() << " into new buffer " - << new_buffer.id(); -} - -void HloAliasAnalysis::MoveValueToBuffer(const HloValue& value, - HloBuffer* buffer) { - HloBuffer& old_buffer = GetBufferContainingValue(value); - CHECK_NE(buffer, &old_buffer); - VLOG(3) << "Moved value " << value.ToShortString() << " from buffer " - << old_buffer.id() << " into buffer " << buffer->id(); - old_buffer.RemoveValue(value); - if (old_buffer.values().empty()) { - VLOG(3) << "Buffer " << old_buffer.id() << " now empty. Removing."; - buffers_.erase(old_buffer.id()); - buffers_vector_.clear(); - } - - buffer->AddValue(value); - value_to_buffer_[&value] = buffer; -} - string HloAliasAnalysis::ToString() const { string out = StrCat("HloAliasAnalysis, module ", module_->name(), "\n"); StrAppend(&out, " Buffers at each position:\n"); @@ -290,10 +398,10 @@ string HloAliasAnalysis::ToString() const { } StrAppend(&out, " Buffers:\n"); - for (const HloBuffer* buffer : buffers()) { - StrAppend(&out, " ", buffer->ToString(), "\n"); + for (const HloBuffer& buffer : buffers()) { + StrAppend(&out, " ", buffer.ToString(), "\n"); StrAppend(&out, " positions:\n"); - for (const HloPosition& position : buffer->ComputePositions()) { + for (const HloPosition& position : buffer.ComputePositions()) { StrAppend(&out, " ", position.ToString(), "\n"); } } @@ -301,217 +409,6 @@ string HloAliasAnalysis::ToString() const { return out; } -const std::vector& HloAliasAnalysis::buffers() const { - if (buffers_vector_.empty()) { - // Lazily construct vector of buffers. - buffers_vector_.reserve(buffers_.size()); - for (auto& pair : buffers_) { - buffers_vector_.push_back(&pair.second); - } - std::sort(buffers_vector_.begin(), buffers_vector_.end(), - HloBuffer::IdLessThan); - } else { - CHECK_EQ(buffers_vector_.size(), buffers_.size()); - for (const HloBuffer* buffer : buffers_vector_) { - DCHECK(ContainsKey(buffers_, buffer->id())); - DCHECK(&GetBuffer(buffer->id()) == buffer); - } - } - return buffers_vector_; -} - -void HloAliasAnalysis::UpdateAtInstructions( - tensorflow::gtl::ArraySlice instructions) { - VLOG(4) << "Updated HLO module:"; - XLA_VLOG_LINES(4, module_->ToString()); - - VLOG(3) << "Before update:"; - XLA_VLOG_LINES(3, ToString()); - - std::vector values_to_update; - for (const HloInstruction* instruction : instructions) { - for (auto& pair : dataflow_analysis().GetInstructionValueSet(instruction)) { - for (const HloValue* value : pair.second.values()) { - values_to_update.push_back(value); - } - } - } - - UpdateBuffersForValues(values_to_update); - - VLOG(3) << "After update:"; - XLA_VLOG_LINES(3, ToString()); -} - -void HloAliasAnalysis::UpdateAfterChangingOperand(HloInstruction* instruction, - HloInstruction* old_operand, - HloInstruction* new_operand) { - VLOG(1) << "UpdateAfterChangingOperand(" << instruction->name() << ", " - << old_operand->name() << " => " << new_operand->name() << ")"; - - dataflow_analysis_->UpdateAfterChangingOperand(instruction, old_operand, - new_operand); - TF_DCHECK_OK(dataflow_analysis_->VerifyAgainstReference()); - - VLOG(4) << "Updated dataflow:"; - XLA_VLOG_LINES(4, dataflow_analysis_->ToString()); - - UpdateAtInstructions({instruction, old_operand, new_operand}); -} - -void HloAliasAnalysis::UpdateAfterChangingRoot(HloInstruction* old_root, - HloInstruction* new_root) { - VLOG(1) << "UpdateAfterChangingRoot(" << old_root->name() << " => " - << new_root->name() << ")"; - - dataflow_analysis_->UpdateAfterChangingRoot(old_root, new_root); - TF_DCHECK_OK(dataflow_analysis_->VerifyAgainstReference()); - - VLOG(4) << "Updated dataflow:"; - XLA_VLOG_LINES(4, dataflow_analysis_->ToString()); - - UpdateAtInstructions({old_root, new_root}); -} - -std::vector HloAliasAnalysis::ComputeAliasedBuffers( - const HloValue& value) { - std::vector aliased_buffers; - - // Value is init of a while (use is while). - for (const HloUse& use : value.uses()) { - VLOG(1) << "use of value " << value.ToShortString() << ": " << use; - if (use.instruction->opcode() == HloOpcode::kWhile) { - // Determine the while value that this shares a buffer with. - const HloValue& while_value = dataflow_analysis().GetUniqueValueAt( - use.instruction, use.operand_index); - aliased_buffers.push_back(&GetBufferContainingValue(while_value)); - VLOG(3) << " value is init value to a while; must share buffer with " - "while value " - << while_value.ToShortString(); - } - } - - // Value is a parameter of a while body/condition. - if (value.defining_instruction()->opcode() == HloOpcode::kParameter) { - const HloComputation* computation = value.defining_instruction()->parent(); - const CallGraphNode& call_graph_node = - dataflow_analysis().call_graph().GetNode(computation); - for (const CallSite& callsite : call_graph_node.caller_callsites()) { - if (callsite.instruction()->opcode() == HloOpcode::kWhile) { - // Call graph must have been flattened. - CHECK_EQ(call_graph_node.caller_callsites().size(), 1); - - const HloValue& while_value = dataflow_analysis().GetUniqueValueAt( - callsite.instruction(), value.defining_index()); - VLOG(3) << " value is parameter value of the body or condition of a " - "while; must share buffer with while value " - << while_value.ToShortString(); - aliased_buffers.push_back(&GetBufferContainingValue(while_value)); - } - } - } - - // Value is the root of a while body. - for (const HloPosition& position : value.positions()) { - const HloComputation* computation = position.instruction->parent(); - const CallGraphNode& call_graph_node = - dataflow_analysis().call_graph().GetNode(computation); - if (position.instruction == computation->root_instruction()) { - for (const CallSite& callsite : call_graph_node.caller_callsites()) { - if (callsite.instruction()->opcode() == HloOpcode::kWhile && - callsite.instruction()->while_body() == computation) { - // Call graph must have been flattened. - CHECK_EQ(call_graph_node.caller_callsites().size(), 1); - - // If the value appears in the root of a while body, then - // necessarily the value is defined in the body as well. - CHECK_EQ(value.defining_instruction()->parent(), computation); - - const HloValue& while_value = dataflow_analysis().GetUniqueValueAt( - callsite.instruction(), position.index); - VLOG(3) << " value is root the body computation of a while; must " - "share buffer with while value " - << while_value.ToShortString(); - aliased_buffers.push_back(&GetBufferContainingValue(while_value)); - } - } - } - } - - // Value is in the while instruction itself. - if (value.defining_instruction()->opcode() == HloOpcode::kWhile) { - VLOG(3) << " value is output of a while instruction"; - aliased_buffers.push_back(&GetUniqueBufferAt(value.defining_instruction(), - value.defining_index())); - } - - // Uniquify aliased buffers. - std::sort(aliased_buffers.begin(), aliased_buffers.end(), - HloBuffer::IdLessThan); - aliased_buffers.erase( - std::unique(aliased_buffers.begin(), aliased_buffers.end()), - aliased_buffers.end()); - - return aliased_buffers; -} - -// This method recomputes the HloBuffer for each of the given HloValues. The -// method does not necessarily update the HloBuffer of values which share a -// buffer with the given values, but are not explicitly passed in -// 'values'. Therefore, the caller must pass in all values which may require an -// update according to the kind of HLO graph change which occurred: operand -// changed (UpdateAfterChangingOperand), or root of computation changed -// (UpdateAfterChangingRoot). -void HloAliasAnalysis::UpdateBuffersForValues( - tensorflow::gtl::ArraySlice values) { - for (const HloValue* value : values) { - VLOG(3) << "Updating buffer for value: " << value->ToShortString(); - - // Gather the set of buffer with aliasing rules (eg, kWhile) which this - // value must be contained in due. - std::vector aliased_buffers = ComputeAliasedBuffers(*value); - - HloBuffer& current_buffer = GetBufferContainingValue(*value); - if (aliased_buffers.empty()) { - // The buffer containing 'value' aliases no other buffers. If the buffer - // containing 'value' already only contains 'value', then no change is - // necessary. If the buffer containing 'value' does contain other values, - // then remove 'value' from the buffer and create a new buffer containing - // only 'value' - if (current_buffer.values().size() == 1) { - CHECK_EQ(current_buffer.values()[0], value); - } else { - MoveValueToNewBuffer(*value); - } - } else { - // If multiple buffers are aliased merge these buffers together into a - // single buffer (arbitrarily chosen as the first buffer in the vector). - if (aliased_buffers.size() > 1) { - for (int64 i = 1; i < aliased_buffers.size(); ++i) { - // Make copy of values vector because MoveValueToBuffer invalidates - // the values iterator. The could be done more efficiently by moving - // all values and once. - std::vector values = aliased_buffers[i]->values(); - for (const HloValue* value : values) { - MoveValueToBuffer(*value, aliased_buffers[0]); - } - } - aliased_buffers.resize(1); - } - - CHECK_EQ(aliased_buffers.size(), 1); - HloBuffer* new_buffer = aliased_buffers[0]; - - if (¤t_buffer != new_buffer) { - MoveValueToBuffer(*value, new_buffer); - } - } - - VLOG(4) << "Analysis after update:"; - XLA_VLOG_LINES(4, ToString()); - } -} - /* static */ StatusOr> HloAliasAnalysis::Run( HloModule* module) { @@ -524,18 +421,28 @@ StatusOr> HloAliasAnalysis::Run( HloDataflowAnalysis::Run(module, /*ssa_form=*/true, /*bitcast_defines_value=*/false)); - alias_analysis->InitializeBufferSets(); + BufferValueMap buffer_map(alias_analysis->dataflow_analysis()); + buffer_map.MergeAliasedBuffers(); - VLOG(3) << "After initialization:"; - XLA_VLOG_LINES(3, alias_analysis->ToString()); - - std::vector all_values; - for (const HloValue& value : alias_analysis->dataflow_analysis().values()) { - all_values.push_back(&value); + // Create a vector of HloBuffers, one for each set of values in the + // BufferValueMap. Create the HloBuffers as a vector of contiguously numbered + // buffers. + std::vector sorted_buffer_numbers = + buffer_map.ComputeSortedBufferNumbers(); + alias_analysis->buffers_.reserve(sorted_buffer_numbers.size()); + HloBuffer::Id next_id = 0; + for (BufferValueMap::BufferNumber buffer_number : sorted_buffer_numbers) { + auto& value_set = buffer_map.GetValuesInBuffer(buffer_number); + std::vector sorted_values(value_set.begin(), + value_set.end()); + std::sort(sorted_values.begin(), sorted_values.end(), HloValue::IdLessThan); + alias_analysis->buffers_.emplace_back(next_id++, sorted_values); + for (const HloValue* value : sorted_values) { + alias_analysis->value_to_buffer_[value] = + &alias_analysis->buffers_.back(); + } } - alias_analysis->UpdateBuffersForValues(all_values); - TF_DCHECK_OK(alias_analysis->Verify()); XLA_VLOG_LINES(1, alias_analysis->ToString()); diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis.h b/tensorflow/compiler/xla/service/hlo_alias_analysis.h index 1b538f6d1cf..39554e46648 100644 --- a/tensorflow/compiler/xla/service/hlo_alias_analysis.h +++ b/tensorflow/compiler/xla/service/hlo_alias_analysis.h @@ -74,7 +74,7 @@ class HloAliasAnalysis { // Return a vector of all HloBuffers stabily sorted by HloBuffer::Id. This // vector is lazily computed. Mutating operations on HloAliasAnalysis may // invalidate the underlying vector requiring recomputation. - const std::vector& buffers() const; + const std::vector& buffers() const { return buffers_; } // Returns the underlying dataflow analysis used by this alias analysis. const HloDataflowAnalysis& dataflow_analysis() const { @@ -90,50 +90,13 @@ class HloAliasAnalysis { // output of the given instruction. bool InstructionBuffersAreDistinct(const HloInstruction* instruction) const; - // Updates the analysis after the operands of 'instruction' have changed or if - // 'instruction' has been made the root of a computation. Analysis update is - // not possible if instructions have been added or removed from the graph. - void UpdateAfterChangingOperand(HloInstruction* instruction, - HloInstruction* old_operand, - HloInstruction* new_operand); - void UpdateAfterChangingRoot(HloInstruction* old_root, - HloInstruction* new_root); - // Compare the dataflow analysis against a clean recomputation of the // analysis. Returns an error status if there is a mismatch. Useful for // verifying the correctness after updates to the analysis. Status VerifyAgainstReference() const; protected: - HloAliasAnalysis(HloModule* module); - - // Create a new empty HloBuffer. - HloBuffer& NewHloBuffer(); - - // Move the given value to the given buffer. The value is removed from it's - // current buffer. - void MoveValueToBuffer(const HloValue& value, HloBuffer* buffer); - - // Move the given value to a newly created buffer. The value is removed from - // it's current buffer. - void MoveValueToNewBuffer(const HloValue& value); - - // Construct the initial set of buffer sets where an HloBuffer is created for - // each HloValue in the module. - void InitializeBufferSets(); - - // Compute and return the buffers with aliasing rules (eg, kWhile) which the - // given value must be contained in. - std::vector ComputeAliasedBuffers(const HloValue& value); - - // Recompute the HloBuffers for the given values. - void UpdateBuffersForValues( - tensorflow::gtl::ArraySlice values); - - // Recompute the HloBuffers for all the values which appear in the output of - // the given instructions. - void UpdateAtInstructions( - tensorflow::gtl::ArraySlice instructions); + explicit HloAliasAnalysis(HloModule* module); // Verify various invariants of the alias analysis. Status Verify() const; @@ -143,20 +106,12 @@ class HloAliasAnalysis { // The underlying dataflow analysis used by this alias analysis. std::unique_ptr dataflow_analysis_; - // The map of all HloBuffers in the module. We pass around pointers to the - // mapped HloBuffers, so the underlying container must keep them valid despite - // mutations touching other map entries. - std::unordered_map buffers_; - // A map indicating which buffer a value is contained in. tensorflow::gtl::FlatMap value_to_buffer_; // A lazily constructed vector containing all HloBuffers sorted by // HloBuffer::Id. - mutable std::vector buffers_vector_; - - // The Id to use for the next HloBuffer. - int64 next_buffer_id_ = 0; + std::vector buffers_; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc index e2815d6e648..6e311e25fb9 100644 --- a/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_alias_analysis_test.cc @@ -87,14 +87,13 @@ class HloAliasAnalysisTest : public HloTestBase { // constructed. bool AnyValuesInSameBufferInterfere() { DependencyHloOrdering ordering(module_.get()); - for (const HloBuffer* buffer : analysis_->buffers()) { - for (const HloValue* value_a : buffer->values()) { - for (const HloValue* value_b : buffer->values()) { + for (const HloBuffer& buffer : analysis_->buffers()) { + for (const HloValue* value_a : buffer.values()) { + for (const HloValue* value_b : buffer.values()) { if (*value_a != *value_b && - analysis_->dataflow_analysis().MayInterfere(*value_a, *value_b, - ordering)) { + ordering.MayInterfere(*value_a, *value_b)) { VLOG(1) << *value_a << " interferes with " << *value_b - << " in buffer: " << *buffer; + << " in buffer: " << buffer; return true; } } @@ -384,10 +383,7 @@ TEST_F(HloAliasAnalysisTest, SingleWhile) { EXPECT_THAT( GetValuesInBuffer(analysis.GetUniqueBufferAt(xla_while, /*index=*/{0})), - UnorderedElementsAre(GetValueDefinedAt(xla_while, /*index=*/{0}), - GetValueDefinedAt(body_param, /*index=*/{0}), - GetValueDefinedAt(cond_param, /*index=*/{0}), - GetValueDefinedAt(constant1))); + UnorderedElementsAre(GetValueDefinedAt(constant1))); EXPECT_THAT( GetValuesInBuffer(analysis.GetUniqueBufferAt(xla_while, /*index=*/{1})), UnorderedElementsAre(GetValueDefinedAt(constant2), @@ -631,9 +627,9 @@ TEST_F(HloAliasAnalysisTest, SwizzlingWhile) { // HloBuffers. EXPECT_THAT( analysis.buffers(), - UnorderedElementsAre(&analysis.GetUniqueBufferAt(constant1), - &analysis.GetUniqueBufferAt(tuple, /*index=*/{}), - &analysis.GetUniqueBufferAt(cond_constant))); + UnorderedElementsAre(analysis.GetUniqueBufferAt(constant1), + analysis.GetUniqueBufferAt(tuple, /*index=*/{}), + analysis.GetUniqueBufferAt(cond_constant))); // The tuple elements of the while and the three constant inputs should all be // smooshed into the same buffer. @@ -820,127 +816,5 @@ TEST_F(HloAliasAnalysisTest, Bitcast) { analysis.GetUniqueBufferAt(bitcast)); } -TEST_F(HloAliasAnalysisTest, UpdateAnalysisForWhile) { - // Test updating alias analysis after modifying a module with an array shaped - // while: - // - // body(F32[] %param): - // %negate = Negate(%param) - // - // condition(F32[] %param): - // return Constant(false) - // - // entry: - // %constant = Constant(1.0) - // %exp = Exp(%constant) - // return While(%exp, body, condition) - // - auto body_builder = HloComputation::Builder("body"); - auto body_param = body_builder.AddInstruction( - HloInstruction::CreateParameter(0, scalar_shape_, "param")); - auto negate = body_builder.AddInstruction(HloInstruction::CreateUnary( - scalar_shape_, HloOpcode::kNegate, body_param)); - HloComputation* body = module_->AddEmbeddedComputation(body_builder.Build()); - - // Condition computation trivially returns a constant "false". - auto cond_builder = HloComputation::Builder("condition"); - auto cond_param = cond_builder.AddInstruction( - HloInstruction::CreateParameter(0, scalar_shape_, "param")); - cond_builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(false))); - HloComputation* condition = - module_->AddEmbeddedComputation(cond_builder.Build()); - - auto builder = HloComputation::Builder(TestName()); - auto constant = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(1.0))); - auto exp = builder.AddInstruction( - HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kExp, constant)); - auto xla_while = builder.AddInstruction( - HloInstruction::CreateWhile(scalar_shape_, condition, body, exp)); - module_->AddEntryComputation(builder.Build()); - - HloAliasAnalysis& analysis = RunAnalysis(); - - // Sanity check some alias information. - EXPECT_EQ(analysis.GetUniqueBufferAt(exp), - analysis.GetUniqueBufferAt(body_param)); - EXPECT_EQ(analysis.GetUniqueBufferAt(exp), - analysis.GetUniqueBufferAt(cond_param)); - EXPECT_EQ(analysis.GetUniqueBufferAt(exp), - analysis.GetUniqueBufferAt(negate)); - EXPECT_EQ(analysis.GetUniqueBufferAt(exp), - analysis.GetUniqueBufferAt(xla_while)); - - // Set the body root to the body_param. Previously it was Negate(body_param). - body->set_root_instruction(body_param); - - // Prior to updating, verify that the analysis is no longer valid. - Status verify_status = analysis.VerifyAgainstReference(); - EXPECT_FALSE(verify_status.ok()); - - analysis.UpdateAfterChangingRoot(/*old_root=*/negate, - /*new_root*/ body_param); - - // Analysis should be valid after the update. - TF_ASSERT_OK(analysis.VerifyAgainstReference()); - - // The exponential should now pass through the body transparently. - EXPECT_EQ(analysis.GetUniqueBufferAt(exp), - analysis.GetUniqueBufferAt(body_param)); - EXPECT_EQ(analysis.GetUniqueBufferAt(exp), - analysis.GetUniqueBufferAt(cond_param)); - EXPECT_NE(analysis.GetUniqueBufferAt(exp), - analysis.GetUniqueBufferAt(negate)); - EXPECT_EQ(analysis.GetUniqueBufferAt(exp), - analysis.GetUniqueBufferAt(xla_while)); - - // Now replace the operand of the while with %constant (was %exp). - TF_ASSERT_OK(exp->ReplaceUseWith(xla_while, constant)); - analysis.UpdateAfterChangingOperand(xla_while, /*old_operand=*/exp, - /*new_operand=*/constant); - - // Analysis should be valid after the update. - TF_ASSERT_OK(analysis.VerifyAgainstReference()); - - EXPECT_EQ(analysis.GetUniqueBufferAt(constant), - analysis.GetUniqueBufferAt(body_param)); - EXPECT_EQ(analysis.GetUniqueBufferAt(constant), - analysis.GetUniqueBufferAt(cond_param)); - EXPECT_EQ(analysis.GetUniqueBufferAt(constant), - analysis.GetUniqueBufferAt(xla_while)); - EXPECT_NE(analysis.GetUniqueBufferAt(constant), - analysis.GetUniqueBufferAt(exp)); - EXPECT_NE(analysis.GetUniqueBufferAt(constant), - analysis.GetUniqueBufferAt(negate)); - - // And finally make the negate the root of the body again. - body->set_root_instruction(negate); - analysis.UpdateAfterChangingRoot(/*old_root=*/body_param, - /*new_root*/ negate); - - // Analysis should be valid after the update. - TF_ASSERT_OK(analysis.VerifyAgainstReference()); - - EXPECT_EQ(analysis.GetUniqueBufferAt(negate), - analysis.GetUniqueBufferAt(body_param)); - EXPECT_EQ(analysis.GetUniqueBufferAt(negate), - analysis.GetUniqueBufferAt(cond_param)); - EXPECT_EQ(analysis.GetUniqueBufferAt(negate), - analysis.GetUniqueBufferAt(xla_while)); - EXPECT_EQ(analysis.GetUniqueBufferAt(constant), - analysis.GetUniqueBufferAt(negate)); - - auto value_of = [&analysis](const HloInstruction* instruction) { - return &analysis.dataflow_analysis().GetValueDefinedAt(instruction); - }; - EXPECT_THAT(analysis.GetUniqueBufferAt(negate).values(), - UnorderedElementsAre(value_of(body_param), value_of(cond_param), - value_of(negate), value_of(constant), - value_of(xla_while))); -} - -// Test update tuple element. - } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_buffer.cc b/tensorflow/compiler/xla/service/hlo_buffer.cc index 2bfdd9156ad..e16413f361f 100644 --- a/tensorflow/compiler/xla/service/hlo_buffer.cc +++ b/tensorflow/compiler/xla/service/hlo_buffer.cc @@ -36,22 +36,6 @@ namespace xla { using ::tensorflow::str_util::Join; using ::tensorflow::strings::StrCat; -void HloBuffer::AddValue(const HloValue& value) { - values_.push_back(&value); - // Sort vector and remove duplicates. - std::sort(values_.begin(), values_.end(), HloValue::IdLessThan); - values_.erase(std::unique(values_.begin(), values_.end(), HloValue::IdEqual), - values_.end()); -} - -void HloBuffer::RemoveValue(const HloValue& value) { - // The values are sorted, so finding the value could be done in log(n) time - // with a binary search. - auto it = std::find(values_.begin(), values_.end(), &value); - CHECK(it != values_.end()); - values_.erase(it); -} - bool HloBuffer::operator==(const HloBuffer& other) const { bool equal = id() == other.id(); if (equal) { diff --git a/tensorflow/compiler/xla/service/hlo_buffer.h b/tensorflow/compiler/xla/service/hlo_buffer.h index cb961e1601c..4873463b2ea 100644 --- a/tensorflow/compiler/xla/service/hlo_buffer.h +++ b/tensorflow/compiler/xla/service/hlo_buffer.h @@ -84,22 +84,15 @@ class HloBuffer { return a->id() == b->id(); } - HloBuffer(Id id) : id_(id) {} + HloBuffer(Id id, tensorflow::gtl::ArraySlice values) + : id_(id), values_(values.begin(), values.end()) {} // Return the unique identifier for this HloBuffer. Id id() const { return id_; } - // Add a value to the set of values held by this buffer. Also adds the - // HloPositions of the value to the positions vector of the buffer. If the - // buffer already contains this value, then this method is a nop. - void AddValue(const HloValue& value); - void RemoveValue(const HloValue& value); - // Return all values contained in this buffer. const std::vector& values() const { return values_; } - std::vector ComputePositions() const; - // Return the unique HLO value in the buffer. CHECK fails if the buffer does // not contain exactly one value. const HloValue& GetUniqueValue() const { @@ -107,6 +100,8 @@ class HloBuffer { return *values_[0]; } + std::vector ComputePositions() const; + string ToString() const; bool operator==(const HloBuffer& other) const; @@ -118,7 +113,7 @@ class HloBuffer { // The set of values contained in this buffer. Vector contains no duplicates // and is sorted stably by HloValue::Id. - std::vector values_; + const std::vector values_; }; std::ostream& operator<<(std::ostream& out, const HloBuffer& buffer); diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc index 9dbde0ec243..f6b764732b4 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.cc @@ -118,13 +118,11 @@ Status HloCostAnalysis::HandleElementwiseOp(HloInstruction* hlo_instruction) { } } -Status HloCostAnalysis::HandleElementwiseUnary(HloInstruction* hlo, - HloOpcode opcode) { +Status HloCostAnalysis::HandleElementwiseUnary(HloInstruction* hlo) { return HandleElementwiseOp(hlo); } -Status HloCostAnalysis::HandleElementwiseBinary(HloInstruction* hlo, - HloOpcode opcode) { +Status HloCostAnalysis::HandleElementwiseBinary(HloInstruction* hlo) { return HandleElementwiseOp(hlo); } diff --git a/tensorflow/compiler/xla/service/hlo_cost_analysis.h b/tensorflow/compiler/xla/service/hlo_cost_analysis.h index 6d8fdfa64b5..eeb3d4edd1b 100644 --- a/tensorflow/compiler/xla/service/hlo_cost_analysis.h +++ b/tensorflow/compiler/xla/service/hlo_cost_analysis.h @@ -49,9 +49,8 @@ class HloCostAnalysis : public DfsHloVisitor { using ShapeSizeFunction = std::function; explicit HloCostAnalysis(const ShapeSizeFunction& shape_size); - Status HandleElementwiseUnary(HloInstruction* hlo, HloOpcode opcode) override; - Status HandleElementwiseBinary(HloInstruction* hlo, - HloOpcode opcode) override; + Status HandleElementwiseUnary(HloInstruction* hlo) override; + Status HandleElementwiseBinary(HloInstruction* hlo) override; Status HandleConstant(HloInstruction* constant, const Literal& literal) override; Status HandleGetTupleElement(HloInstruction* get_tuple_element, diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc index ea8b239e100..2be1645f1b0 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.cc @@ -67,6 +67,22 @@ HloValue& HloDataflowAnalysis::GetValueDefinedAt( return GetUniqueValueAt(instruction, index); } +HloValue* HloDataflowAnalysis::NewHloValue(HloInstruction* instruction, + const ShapeIndex& index, + bool is_phi) { + const int64 value_id = next_value_id_++; + auto emplaced = values_.emplace( + std::piecewise_construct, std::forward_as_tuple(value_id), + std::forward_as_tuple(value_id, instruction, index, is_phi)); + CHECK(emplaced.second); + + return &emplaced.first->second; +} + +void HloDataflowAnalysis::DeleteHloValue(HloValue::Id value_id) { + values_.erase(value_id); +} + string HloDataflowAnalysis::ToString() const { string out = StrCat("HloDataflowAnalysis, module ", module_->name(), "\n"); StrAppend(&out, " Instruction value sets:\n"); @@ -99,22 +115,98 @@ string HloDataflowAnalysis::ToString() const { } } StrAppend(&out, " HloValues:\n"); - for (const HloValue& value : values()) { - StrAppend(&out, value.ToString(/*indent=*/4)); - } - StrAppend(&out, " Phi resolutions:\n"); - for (const HloValue& value : values()) { - if (value.is_phi()) { - const HloValue* resolved_value = ResolvePhi(value); - StrAppend(&out, " ", value.ToShortString(), " => ", - resolved_value == nullptr ? "UNKNOWN" - : resolved_value->ToShortString(), - "\n"); - } + for (const HloValue* value : values()) { + StrAppend(&out, value->ToString(/*indent=*/4)); } return out; } +bool HloDataflowAnalysis::Phi( + HloInstruction* instruction, + tensorflow::gtl::ArraySlice inputs) { + CHECK(ssa_form_); + + for (const InstructionValueSet* input : inputs) { + DCHECK(ShapeUtil::Compatible(instruction->shape(), input->shape())); + } + + bool changed = false; + for (auto& pair : GetInstructionValueSet(instruction)) { + const ShapeIndex& index = pair.first; + HloValueSet& value_set = pair.second; + + // Positions with phi values should never have more than one value in the + // value set. + CHECK_LE(value_set.values().size(), 1); + const HloValue* current_value = + value_set.values().size() == 1 ? value_set.values()[0] : nullptr; + + // Construct a vector of unique value IDs of the inputs. + std::vector input_value_ids; + for (const InstructionValueSet* input : inputs) { + for (const HloValue* value : input->element(index).values()) { + input_value_ids.push_back(value->id()); + } + } + std::sort(input_value_ids.begin(), input_value_ids.end()); + input_value_ids.erase( + std::unique(input_value_ids.begin(), input_value_ids.end()), + input_value_ids.end()); + + // Remove the existing phi value (if it exists). The phi can be its own + // input, for example, in while body parameters where the body passes + // through the parameter value. + bool current_value_defined_here = + (current_value != nullptr && + current_value->defining_instruction() == instruction && + current_value->defining_index() == index); + if (current_value_defined_here) { + CHECK(current_value->is_phi()); + auto it = std::find(input_value_ids.begin(), input_value_ids.end(), + current_value->id()); + if (it != input_value_ids.end()) { + input_value_ids.erase(it); + } + } + + if (input_value_ids.empty()) { + // A value set which has at least one element should never have its value + // set reduced to zero elements. During dataflow value sets only can go + // from empty to non-empty, not the reverse. + CHECK_EQ(value_set.values().size(), 0) + << "Instruction " << instruction->name() << " at index " << index + << " previously had non-empty value set. Value set: " << value_set; + } else if (input_value_ids.size() == 1) { + // Only a single value reaches this point. There should be no phi, and + // this value set should contain this single value. + const HloValue& new_value = GetValue(input_value_ids[0]); + if (current_value == nullptr) { + value_set.Clear(); + value_set.AddValue(&new_value); + changed = true; + } else if (current_value != &new_value) { + if (current_value_defined_here) { + // Remove the existing phi. + DeleteHloValue(current_value->id()); + } + value_set.Clear(); + value_set.AddValue(&new_value); + changed = true; + } + } else { + // Multiple distinct values reach this point. A phi value is + // necessary. + CHECK_GT(input_value_ids.size(), 1); + if (current_value == nullptr || !current_value->is_phi()) { + value_set.Clear(); + value_set.AddValue(NewHloValue(instruction, index, /*is_phi=*/true)); + changed = true; + } + } + } + return changed; +} + const HloValue& HloDataflowAnalysis::GetValue(HloValue::Id value_id) const { return values_.at(value_id); } @@ -142,129 +234,6 @@ HloValueSet& HloDataflowAnalysis::GetValueSet(const HloPosition& position) { return GetValueSet(position.instruction, position.index); } -void HloDataflowAnalysis::UpdateAfterChangingOperand( - HloInstruction* instruction, HloInstruction* old_operand, - HloInstruction* new_operand) { - CHECK(std::find(instruction->operands().begin(), - instruction->operands().end(), - new_operand) != instruction->operands().end()); - VLOG(1) << "UpdateAfterChangingOperand(" << instruction->name() << ", " - << old_operand->name() << " => " << new_operand->name() << ")"; - - std::vector to_update = {instruction}; - - // If the instruction calls any computations then add the parameters of called - // computation to capture any changes to the dataflow into the subcomputation - // introduced by the new operand. - for (HloComputation* computation : instruction->called_computations()) { - to_update.insert(to_update.end(), - computation->parameter_instructions().begin(), - computation->parameter_instructions().end()); - } - - UpdateInstructionsAndPropagate(to_update); - - // The uses of the values in the old and new operand may have changed. Uses of - // other HloValues are updated in UpdateInstructionsAndPropagate. - for (auto& pair : GetInstructionValueSet(old_operand)) { - for (const HloValue* value : pair.second.values()) { - GetValue(value->id()).RecomputeUses(); - } - } - for (auto& pair : GetInstructionValueSet(new_operand)) { - for (const HloValue* value : pair.second.values()) { - GetValue(value->id()).RecomputeUses(); - } - } - - TF_DCHECK_OK(VerifyAgainstReference()); -} - -void HloDataflowAnalysis::UpdateAfterChangingRoot(HloInstruction* old_root, - HloInstruction* new_root) { - VLOG(1) << "UpdateAfterChangingRoot(" << old_root->name() << " => " - << new_root->name() << ")"; - - CHECK_EQ(new_root, new_root->parent()->root_instruction()); - CHECK_EQ(new_root->parent(), old_root->parent()); - - std::vector to_update = {old_root, new_root}; - - const CallGraphNode& call_graph_node = - call_graph_->GetNode(new_root->parent()); - for (const CallSite& callsite : call_graph_node.caller_callsites()) { - if (callsite.instruction()->opcode() == HloOpcode::kCall) { - to_update.push_back(callsite.instruction()); - } else if (callsite.instruction()->opcode() == HloOpcode::kWhile) { - // Add the while itself, and the body and condition parameters. - to_update.push_back(callsite.instruction()); - to_update.push_back( - callsite.instruction()->while_body()->parameter_instruction(0)); - to_update.push_back( - callsite.instruction()->while_condition()->parameter_instruction(0)); - } - } - - UpdateInstructionsAndPropagate(to_update); - - TF_DCHECK_OK(VerifyAgainstReference()); -} - -const HloValue* HloDataflowAnalysis::ResolvePhi(const HloValue& phi) const { - CHECK(phi.is_phi()); - - tensorflow::gtl::FlatSet visited; - std::queue worklist; - auto add_to_worklist = [&worklist, &visited](const HloValue* v) { - if (visited.insert(v).second) { - // 'v' was not previously in visited. - worklist.push(v); - } - }; - add_to_worklist(&phi); - - const HloValue* resolved_value = nullptr; - while (!worklist.empty()) { - const HloValue* value = worklist.front(); - worklist.pop(); - - if (!value->is_phi()) { - if (resolved_value == nullptr) { - resolved_value = value; - } else if (resolved_value != value) { - return nullptr; - } - } else { - for (const HloValue* input : phi_inputs_.at(value)) { - add_to_worklist(input); - } - } - } - return resolved_value; -} - -void HloDataflowAnalysis::UpdatePhiInputs( - const HloInstruction* instruction, - tensorflow::gtl::ArraySlice inputs) { - CHECK(ssa_form_); - for (auto& pair : GetInstructionValueSet(instruction)) { - const ShapeIndex& index = pair.first; - const HloValue& phi_value = GetUniqueValueAt(instruction, index); - auto& phi_inputs = phi_inputs_.at(&phi_value); - phi_inputs.clear(); - for (const InstructionValueSet* input : inputs) { - for (const HloValue* value : input->element(index).values()) { - // The number of phi inputs is typically 2, and virtually always very - // small. - if (std::find(phi_inputs.begin(), phi_inputs.end(), value) == - phi_inputs.end()) { - phi_inputs.push_back(value); - } - } - } - } -} - bool HloDataflowAnalysis::UpdateBitcastValueSet(HloInstruction* bitcast) { CHECK_EQ(bitcast->opcode(), HloOpcode::kBitcast); const InstructionValueSet& operand_set = @@ -380,8 +349,7 @@ bool HloDataflowAnalysis::UpdateParameterValueSet(HloInstruction* parameter) { } if (ssa_form_ && called_from_while) { - UpdatePhiInputs(parameter, inputs); - return false; + return Phi(parameter, inputs); } else { return GetInstructionValueSet(parameter).AssignUnionOf(inputs); } @@ -439,8 +407,7 @@ bool HloDataflowAnalysis::UpdateWhileValueSet(HloInstruction* xla_while) { &GetInstructionValueSet(xla_while->while_body()->root_instruction()), &GetInstructionValueSet(xla_while->operand(0))}; if (ssa_form_) { - UpdatePhiInputs(xla_while, inputs); - return false; + return Phi(xla_while, inputs); } else { return GetInstructionValueSet(xla_while).AssignUnionOf(inputs); } @@ -487,38 +454,7 @@ void HloDataflowAnalysis::UpdateInstructionsAndPropagate( VLOG(3) << "Worklist top: " << instruction->name(); VLOG(3) << ToString(); - // The updating of the instruction value set below in - // UpdateInstructionValueSet does not update HloValue::positions(). To - // perform the positions() update remove all positions in 'instruction' from - // the HloValues in 'instruction's value set prior to the update, then after - // the update add the new positions back in. There is likely a more - // efficient way of doing this. - for (auto& pair : GetInstructionValueSet(instruction)) { - const ShapeIndex& index = pair.first; - HloValueSet& value_set = pair.second; - for (const HloValue* value : value_set.values()) { - if (value->defining_instruction() != instruction) { - // Use GetValue for a non-const HloValue reference. - GetValue(value->id()).RemovePosition(instruction, index); - } - } - } - - bool changed = UpdateInstructionValueSet(instruction); - - // Add the positions back in. - for (auto& pair : GetInstructionValueSet(instruction)) { - const ShapeIndex& index = pair.first; - HloValueSet& value_set = pair.second; - for (const HloValue* value : value_set.values()) { - if (value->defining_instruction() != instruction) { - // Use GetValue for a non-const HloValue reference. - GetValue(value->id()).AddPosition(instruction, index); - } - } - } - - if (!changed) { + if (!UpdateInstructionValueSet(instruction)) { // No change to the instruction's value set. VLOG(4) << "No change."; continue; @@ -531,12 +467,16 @@ void HloDataflowAnalysis::UpdateInstructionsAndPropagate( for (HloInstruction* user : instruction->users()) { worklist.push(user); - // If user calls a computation, then the respective parameter(s) of the - // computation need to be updated. + // If user sequentially calls a computation, then the respective + // parameter(s) of the computation need to be updated. for (HloComputation* called_computation : user->called_computations()) { - for (int64 operand_number : user->OperandIndices(instruction)) { - worklist.push( - called_computation->parameter_instruction(operand_number)); + const CallGraphNode& call_graph_node = + call_graph_->GetNode(called_computation); + if (call_graph_node.context() == CallContext::kSequential) { + for (int64 operand_number : user->OperandIndices(instruction)) { + worklist.push( + called_computation->parameter_instruction(operand_number)); + } } } } @@ -574,25 +514,10 @@ InstructionValueSet& HloDataflowAnalysis::GetInstructionValueSet( } Status HloDataflowAnalysis::InitializeInstructionValueSets() { - // Gather the values to create before creating them. This is done because we - // want to allocate the vector of values only once so references to elements - // are stable. - struct ValueToCreate { - HloInstruction* instruction; - ShapeIndex index; - bool is_phi; - }; - std::vector values_to_create; - for (const std::unique_ptr& computation : module_->computations()) { const CallGraphNode& call_graph_node = call_graph_->GetNode(computation.get()); - bool called_from_while = std::any_of( - call_graph_node.caller_callsites().begin(), - call_graph_node.caller_callsites().end(), [](const CallSite& cs) { - return cs.instruction()->opcode() == HloOpcode::kWhile; - }); for (const std::unique_ptr& instruction : computation->instructions()) { @@ -603,20 +528,22 @@ Status HloDataflowAnalysis::InitializeInstructionValueSets() { // Lambda to set the value set to define all values in the output of the // instruction. - auto define_all_values = [this, &instruction, - &values_to_create](bool is_phi = false) { + auto define_all_values = [this, &instruction](bool is_phi = false) { for (auto& pair : GetInstructionValueSet(instruction.get())) { const ShapeIndex& index = pair.first; - values_to_create.push_back({instruction.get(), index, is_phi}); + HloValue* value = + NewHloValue(instruction.get(), index, /*is_phi=*/false); + GetValueSet(instruction.get(), index).AddValue(value); } }; // Lambda to set the value set to define only the top-level buffer in the // output of the instruction. Any other values flow from the operands of // the instruction (or from cross-computation dataflow). - auto define_top_level_only = [this, &instruction, &values_to_create]() { - values_to_create.push_back( - {instruction.get(), /*index=*/{}, /*is_phi=*/false}); + auto define_top_level_only = [this, &instruction]() { + HloValue* value = + NewHloValue(instruction.get(), /*index=*/{}, /*is_phi=*/false); + GetValueSet(instruction.get(), /*index=*/{}).AddValue(value); }; switch (instruction->opcode()) { @@ -626,10 +553,6 @@ Status HloDataflowAnalysis::InitializeInstructionValueSets() { } break; case HloOpcode::kWhile: - if (ssa_form_) { - define_all_values(/*is_phi=*/true); - } - break; case HloOpcode::kCall: case HloOpcode::kGetTupleElement: // These instructions define no values. The values in their output @@ -654,10 +577,6 @@ Status HloDataflowAnalysis::InitializeInstructionValueSets() { // values in their output. Otherwise the values of the parameter // come from the caller (eg, operands to the kCall instruction). define_all_values(); - } else if (call_graph_node.context() == CallContext::kSequential && - called_from_while && ssa_form_) { - // Parameters of while bodies and conditions are phis. - define_all_values(/*is_phi=*/true); } break; case HloOpcode::kCopy: @@ -674,164 +593,9 @@ Status HloDataflowAnalysis::InitializeInstructionValueSets() { } } - // Reserve the vector ahead of time so references to elements are stable. - values_.reserve(values_to_create.size()); - for (int64 i = 0; i < values_to_create.size(); ++i) { - const ValueToCreate& to_create = values_to_create[i]; - values_.emplace_back(/*id=*/i, to_create.instruction, to_create.index, - to_create.is_phi); - const HloValue& value = values_.back(); - GetValueSet(to_create.instruction, to_create.index).AddValue(&value); - if (value.is_phi()) { - phi_inputs_[&value] = {}; - } - } return Status::OK(); } -bool HloDataflowAnalysis::IsDefinedBefore(const HloValue& a, const HloValue& b, - const HloOrdering& ordering) const { - // If 'b' is an entry param then 'a' cannot be defined before 'b' because 'b' - // is live into the module. - if (b.defining_instruction()->parent() == module_->entry_computation() && - b.defining_instruction()->opcode() == HloOpcode::kParameter) { - return false; - } - - // Phi values require special handling. Because XLA does not have a phi - // instruction, the definition instruction of the phis values are - // placeholders: either the subcomputation parameter (body or condition) or - // the while instruction. However, the program point where these values are - // logically defined does not necessarily coincide exactly with program point - // of these place-holder instructions. So we explicitly define the following - // order for phi values: - // - // body/condition parameter phi: - // Defined before all values defined in its computation excepting other - // phis. - // - // while phi: - // defined after all values defined in the condition or body. - // - auto is_body_or_condition_phi = [](const HloValue& v) { - return v.is_phi() && - v.defining_instruction()->opcode() == HloOpcode::kParameter; - }; - if (is_body_or_condition_phi(a) && !is_body_or_condition_phi(b) && - call_graph_->InstructionIsNestedIn(b.defining_instruction(), - a.defining_instruction()->parent())) { - return true; - } - if (is_body_or_condition_phi(b) && - call_graph_->InstructionIsNestedIn(a.defining_instruction(), - b.defining_instruction()->parent())) { - return false; - } - - // If 'b' is a while phi and 'a' is in the body or condition, then 'a' - // executes before 'b'. - if (b.is_phi() && b.defining_instruction()->opcode() == HloOpcode::kWhile && - (call_graph_->InstructionIsNestedIn( - a.defining_instruction(), b.defining_instruction()->while_body()) || - call_graph_->InstructionIsNestedIn( - a.defining_instruction(), - b.defining_instruction()->while_condition()))) { - return true; - } - - return ordering.ExecutesBefore(a.defining_instruction(), - b.defining_instruction()); -} - -bool HloDataflowAnalysis::UseIsBeforeValueDefinition( - const HloUse& use, const HloValue& value, - const HloOrdering& ordering) const { - if (ordering.ExecutesBefore(use.instruction, value.defining_instruction())) { - return true; - } - - // If the use is at the instruction where the value is defined, then the use - // is before the def if the instruction allows buffer sharing (in place - // computation). - if (use.instruction == value.defining_instruction() && - CanShareOperandBufferWithUser( - use.instruction->mutable_operand(use.operand_number), - use.operand_index, value.defining_instruction(), - value.defining_index())) { - return true; - } - - // The use at a while is an input to a phi, and logically occurs before values - // are defined in the body or condition computations. - if (use.instruction->opcode() == HloOpcode::kWhile) { - const HloInstruction* xla_while = use.instruction; - if (call_graph_->InstructionIsNestedIn(value.defining_instruction(), - xla_while->while_body()) || - call_graph_->InstructionIsNestedIn(value.defining_instruction(), - xla_while->while_condition())) { - return true; - } - } - - // Similarly if the value is defined at a while, it logically occurs after any - // uses in the body or condition computations. - if (value.defining_instruction()->opcode() == HloOpcode::kWhile) { - CHECK(ssa_form_); - const HloInstruction* xla_while = value.defining_instruction(); - if (call_graph_->InstructionIsNestedIn(use.instruction, - xla_while->while_body()) || - call_graph_->InstructionIsNestedIn(use.instruction, - xla_while->while_condition())) { - return true; - } - } - return false; -} - -bool HloDataflowAnalysis::LiveRangeStrictlyBefore( - const HloValue& a, const HloValue& b, const HloOrdering& ordering) const { - VLOG(4) << "LiveRangeStrictlyBefore(a = " << a.ToShortString() - << ", b = " << b.ToShortString() << ")"; - if (!IsDefinedBefore(a, b, ordering)) { - VLOG(4) << "a not defined before b"; - return false; - } - - // Live-out values from the module can never have ranges strictly before any - // other value. - if (a.live_out_of_module()) { - VLOG(4) << "a is live out of module"; - return false; - } - - // Live-out values of computations can never have ranges strictly before any - // other value in the computation (including values nested in - // subcomputations). - if (a.live_out_of_computation() && - call_graph_->InstructionIsNestedIn(b.defining_instruction(), - a.defining_instruction()->parent())) { - VLOG(4) << "a is live out of computation containing b"; - return false; - } - - // All uses of 'a' must be before 'b' is defined. - for (const HloUse& use : a.uses()) { - if (!UseIsBeforeValueDefinition(use, b, ordering)) { - VLOG(4) << "use of a (" << use << ") not before b is defined"; - return false; - } - } - - return true; -} - -bool HloDataflowAnalysis::MayInterfere(const HloValue& a, const HloValue& b, - const HloOrdering& ordering) const { - // Buffers without disjoint liveness may interfere. - return !LiveRangeStrictlyBefore(a, b, ordering) && - !LiveRangeStrictlyBefore(b, a, ordering); -} - /* static */ StatusOr> HloDataflowAnalysis::Run( HloModule* module, bool ssa_form, bool bitcast_defines_value) { @@ -855,6 +619,33 @@ StatusOr> HloDataflowAnalysis::Run( } dataflow_analysis->UpdateInstructionsAndPropagate(all_instructions); + // Add in positions to all values. + for (const std::unique_ptr& computation : + module->computations()) { + for (const std::unique_ptr& instruction : + computation->instructions()) { + for (const auto& pair : + dataflow_analysis->GetInstructionValueSet(instruction.get())) { + const ShapeIndex& index = pair.first; + const HloValueSet& value_set = pair.second; + for (const HloValue* value : value_set.values()) { + if (value->defining_instruction() != instruction.get()) { + dataflow_analysis->GetValue(value->id()) + .AddPosition(instruction.get(), index); + } + } + } + } + } + + // Construct vector of values. + dataflow_analysis->values_vector_.reserve(dataflow_analysis->values_.size()); + for (auto& pair : dataflow_analysis->values_) { + dataflow_analysis->values_vector_.push_back(&pair.second); + } + std::sort(dataflow_analysis->values_vector_.begin(), + dataflow_analysis->values_vector_.end(), HloValue::IdLessThan); + TF_DCHECK_OK(dataflow_analysis->Verify()); XLA_VLOG_LINES(1, dataflow_analysis->ToString()); @@ -865,14 +656,14 @@ StatusOr> HloDataflowAnalysis::Run( Status HloDataflowAnalysis::Verify() const { // Verify each HloValue appears in the value sets that the value's positions() // indicate. - for (const HloValue& value : values()) { - for (const HloPosition& position : value.positions()) { + for (const HloValue* value : values()) { + for (const HloPosition& position : value->positions()) { const HloValueSet& value_set = GetValueSet(position); TF_RET_CHECK(std::find(value_set.values().begin(), value_set.values().end(), - &value) != value_set.values().end()) + value) != value_set.values().end()) << "Value set at position " << position << " does not contain value " - << value.ToShortString(); + << value->ToShortString(); } } @@ -898,75 +689,4 @@ Status HloDataflowAnalysis::Verify() const { return Status::OK(); } -Status HloDataflowAnalysis::VerifyAgainstReference() const { - TF_RETURN_IF_ERROR(Verify()); - - TF_ASSIGN_OR_RETURN(std::unique_ptr reference, - Run(module_, ssa_form_, bitcast_defines_value_)); - TF_RETURN_IF_ERROR(reference->Verify()); - - VLOG(2) << "This analysis:"; - XLA_VLOG_LINES(2, ToString()); - VLOG(2) << "Reference:"; - XLA_VLOG_LINES(2, reference->ToString()); - - // Verify value sets in each position are identical. - for (const auto& computation : module_->computations()) { - for (const auto& instruction : computation->instructions()) { - for (const auto& pair : GetInstructionValueSet(instruction.get())) { - const ShapeIndex& index = pair.first; - const HloValueSet& value_set = pair.second; - const HloValueSet& reference_value_set = - reference->GetValueSet(instruction.get(), index); - - auto value_in_set = [](const HloValue& v, const HloValueSet& vset) { - return std::find_if(vset.values().begin(), vset.values().end(), - [&v](const HloValue* w) { return *w == v; }) != - vset.values().end(); - }; - - for (const HloValue* value : value_set.values()) { - TF_RET_CHECK(value_in_set(*value, reference_value_set)) - << "Value " << value->ToShortString() - << " does not exist in reference"; - } - for (const HloValue* reference_value : reference_value_set.values()) { - TF_RET_CHECK(value_in_set(*reference_value, value_set)) - << "Value " << reference_value->ToShortString() - << " only exists in reference"; - } - } - } - } - - // Verify all phis resolve identically and uses are identical. - for (const HloValue& value : values()) { - const HloValue& reference_value = reference->GetValueDefinedAt( - value.defining_instruction(), value.defining_index()); - TF_RET_CHECK(value.is_phi() == reference_value.is_phi()); - if (value.is_phi()) { - const HloValue* resolved_value = ResolvePhi(value); - const HloValue* reference_resolved_value = - reference->ResolvePhi(reference_value); - if (resolved_value == nullptr) { - TF_RET_CHECK(reference_resolved_value == nullptr); - } else { - TF_RET_CHECK(reference_resolved_value != nullptr); - TF_RET_CHECK(*reference_resolved_value == *resolved_value); - } - } - - for (const HloUse& use : value.uses()) { - TF_RET_CHECK(std::find(reference_value.uses().begin(), - reference_value.uses().end(), - use) != reference_value.uses().end()); - } - for (const HloUse& reference_use : reference_value.uses()) { - TF_RET_CHECK(std::find(value.uses().begin(), value.uses().end(), - reference_use) != value.uses().end()); - } - } - return Status::OK(); -} - } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h index 7781cc58a3a..aae257dd09e 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis.h @@ -88,10 +88,10 @@ class HloDataflowAnalysis { // given position. const HloValueSet& GetValueSet(const HloInstruction* instruction, const ShapeIndex& index = {}) const; - HloValueSet& GetValueSet(const HloInstruction* instruction, - const ShapeIndex& index = {}); const HloValueSet& GetValueSet(const HloPosition& position) const; HloValueSet& GetValueSet(const HloPosition& position); + HloValueSet& GetValueSet(const HloInstruction* instruction, + const ShapeIndex& index = {}); // Return the unique value in the HloValueSet at the given instruction and // shape index. CHECKs if the value set does not contain a exactly one value. @@ -108,49 +108,11 @@ class HloDataflowAnalysis { const HloValue& GetValue(HloValue::Id value_id) const; HloValue& GetValue(HloValue::Id value_id); - // Returns whether the given values interfere assuming the given HLO - // ordering. Two values interfere if they may both be simultaneously live. - bool MayInterfere(const HloValue& a, const HloValue& b, - const HloOrdering& ordering) const; - - // Overload which takes HloValue:Ids. - bool MayInterfere(HloValue::Id a, HloValue::Id b, - const HloOrdering& ordering) const { - return MayInterfere(GetValue(a), GetValue(b), ordering); - } - // Return the total number of HloValues. int64 value_count() const { return values_.size(); } - // Return a vector of all HloValues. - const std::vector& values() const { return values_; } - - // Updates the dataflow after the changing an operand of - // 'instruction'. Dataflow update is not possible if instructions have been - // added or removed from the graph. - void UpdateAfterChangingOperand(HloInstruction* instruction, - HloInstruction* old_operand, - HloInstruction* new_operand); - - // Updates the dataflow after the changing the root of a computation from - // 'old_root' to 'new_root'. - void UpdateAfterChangingRoot(HloInstruction* old_root, - HloInstruction* new_root); - - // Returns the non-phi HloValue that is the unique (transitive) input to the - // given phi. If no such HloValue exists (there are multiple inputs to the - // phi) then nullptr is returned. This is computed by all walking the inputs - // of the given phi value until non-phi HloValue(s) are encountered. - const HloValue* ResolvePhi(const HloValue& phi) const; - const HloValue* ResolvePhi(const HloInstruction* instruction, - const ShapeIndex& index = {}) const { - return ResolvePhi(GetValueDefinedAt(instruction, index)); - } - - // Compare the dataflow analysis against a clean recomputation of the - // analysis. Returns an error status if there is a mismatch. Useful for - // verifying the correctness after updates to the analysis. - Status VerifyAgainstReference() const; + // Return a vector of all HloValues stabily sorted by HloValue::Id. + const std::vector& values() const { return values_vector_; } // Return the call graph used for computing the dataflow. const CallGraph& call_graph() const { return *call_graph_; } @@ -161,6 +123,13 @@ class HloDataflowAnalysis { HloDataflowAnalysis(HloModule* module, bool ssa_form, bool bitcast_defines_value = false); + // Returns a new HloValue defined at the given instruction and shape index. + HloValue* NewHloValue(HloInstruction* instruction, const ShapeIndex& index, + bool is_phi = false); + + // Delete the HloValue with the given ID. + void DeleteHloValue(HloValue::Id value_id); + // Constructs and initializes the InstructionValueSets of all instructions to // contain exactly the HloValues defined by each instruction. These values can // then propagated throughout the HLO graph by calling @@ -187,10 +156,11 @@ class HloDataflowAnalysis { void UpdateInstructionsAndPropagate( tensorflow::gtl::ArraySlice instructions); - // Sets the inputs of the given phi to given value(s). - void UpdatePhiInputs( - const HloInstruction* instruction, - tensorflow::gtl::ArraySlice inputs); + // Return the result of the SSA Phi function applied to the given inputs at + // the given instruction. If skip_top_level is true, then the top level of the + // value set of 'instruction' is not modified. + bool Phi(HloInstruction* instruction, + tensorflow::gtl::ArraySlice inputs); // Updates the positions of the HloValues in the output of the given // instruction. This should be called after the instruction value set of @@ -203,20 +173,6 @@ class HloDataflowAnalysis { HloInstruction* instruction, const InstructionValueSet& new_value_set, const InstructionValueSet* prev_value_set = nullptr); - // Returns true if the live range of the given value 'a' is strictly before - // the live range of value 'b' using the given HLO ordering. - bool LiveRangeStrictlyBefore(const HloValue& a, const HloValue& b, - const HloOrdering& ordering) const; - - // Returns whether the value 'a' is defined before the value 'b' under the - // given ordering. - bool IsDefinedBefore(const HloValue& a, const HloValue& b, - const HloOrdering& ordering) const; - - // Returns whether the given use is before the given value definition. - bool UseIsBeforeValueDefinition(const HloUse& use, const HloValue& value, - const HloOrdering& ordering) const; - // Verify various invariants of the dataflow analysis. Status Verify() const; @@ -226,19 +182,19 @@ class HloDataflowAnalysis { std::unique_ptr call_graph_; - // Array of all values in the module. This is allocated once at analysis - // construction time so HloValue references are stable. Updates to the - // analysis via UpdateAfterChangingOperand and UpdateAfterChangingRoot do not - // result in the creation or destruction of any HloValues. - std::vector values_; - - // Map hold the inputs to each phi value in the module. Used by ResolvePhi. - tensorflow::gtl::FlatMap> - phi_inputs_; + // The map of all HloValues in the module. We pass around pointers to the + // mapped HloValues, so the underlying container must keep them valid despite + // mutations touching other map entries. + std::unordered_map values_; // A map from instruction to InstructionValueSet. std::unordered_map value_sets_; + + // A vector containing all HloValues sorted by HloValue::Id. + std::vector values_vector_; + + // The Id to use for the next HloValue. + HloValue::Id next_value_id_ = 0; }; } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc index 9f3dd539efe..ef0fa1d745a 100644 --- a/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc +++ b/tensorflow/compiler/xla/service/hlo_dataflow_analysis_test.cc @@ -26,7 +26,6 @@ limitations under the License. #include "tensorflow/compiler/xla/test_helpers.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" #include "tensorflow/compiler/xla/xla_data.pb.h" -#include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/test.h" @@ -44,8 +43,8 @@ class HloDataflowAnalysisTest : public HloTestBase, // Run dataflow analysis on the member module. For convenience returns a // reference to the generated analysis stored in analysis_. - HloDataflowAnalysis& RunAnalysis(bool ssa_form, - bool bitcast_defines_value = false) { + const HloDataflowAnalysis& RunAnalysis(bool ssa_form, + bool bitcast_defines_value = false) { analysis_ = HloDataflowAnalysis::Run(module_.get(), ssa_form, bitcast_defines_value) .ConsumeValueOrDie(); @@ -71,8 +70,8 @@ class HloDataflowAnalysisTest : public HloTestBase, const HloInstruction* b) { EXPECT_FALSE(ShapeUtil::IsTuple(a->shape())); EXPECT_FALSE(ShapeUtil::IsTuple(b->shape())); - return analysis_->MayInterfere(analysis_->GetValueDefinedAt(a), - analysis_->GetValueDefinedAt(b), ordering); + return ordering.MayInterfere(analysis_->GetValueDefinedAt(a), + analysis_->GetValueDefinedAt(b)); } std::unique_ptr module_; @@ -499,37 +498,26 @@ TEST_P(HloDataflowAnalysisTest, SingleWhile) { EXPECT_FALSE(analysis.GetValueDefinedAt(cond_constant).live_out_of_module()); if (ssa_form) { - // While instruction should define phi values. The value at index {0} is a - // degenerate phi with a single input 'constant1'. - EXPECT_TRUE(analysis.ValueIsDefinedAt(xla_while, /*index=*/{0})); - EXPECT_TRUE(analysis.GetValueDefinedAt(xla_while, /*index=*/{0}).is_phi()); - EXPECT_EQ(analysis.ResolvePhi(xla_while, /*index=*/{0}), - &analysis.GetValueDefinedAt(constant1)); - EXPECT_TRUE(analysis.ValueIsDefinedAt(body_param, /*index=*/{0})); - EXPECT_TRUE(analysis.GetValueDefinedAt(body_param, /*index=*/{0}).is_phi()); - EXPECT_EQ(analysis.ResolvePhi(body_param, /*index=*/{0}), - &analysis.GetValueDefinedAt(constant1)); - EXPECT_TRUE(analysis.ValueIsDefinedAt(cond_param, /*index=*/{0})); - EXPECT_TRUE(analysis.GetValueDefinedAt(cond_param, /*index=*/{0}).is_phi()); - EXPECT_EQ(analysis.ResolvePhi(cond_param, /*index=*/{0}), - &analysis.GetValueDefinedAt(constant1)); + // Element 0 of the tuple passed through the body so no phi value is + // defined. + EXPECT_FALSE(analysis.ValueIsDefinedAt(xla_while, /*index=*/{0})); + EXPECT_FALSE(analysis.ValueIsDefinedAt(body_param, /*index=*/{0})); + EXPECT_FALSE(analysis.ValueIsDefinedAt(cond_param, /*index=*/{0})); + // Element 1 of the tuple should be a phi value. EXPECT_TRUE(analysis.ValueIsDefinedAt(xla_while, /*index=*/{1})); EXPECT_TRUE(analysis.GetValueDefinedAt(xla_while, /*index=*/{1}).is_phi()); - EXPECT_EQ(analysis.ResolvePhi(xla_while, /*index=*/{1}), nullptr); EXPECT_TRUE(analysis.ValueIsDefinedAt(body_param, /*index=*/{1})); EXPECT_TRUE(analysis.GetValueDefinedAt(body_param, /*index=*/{1}).is_phi()); - EXPECT_EQ(analysis.ResolvePhi(body_param, /*index=*/{1}), nullptr); EXPECT_TRUE(analysis.ValueIsDefinedAt(cond_param, /*index=*/{1})); EXPECT_TRUE(analysis.GetValueDefinedAt(cond_param, /*index=*/{1}).is_phi()); - EXPECT_EQ(analysis.ResolvePhi(cond_param, /*index=*/{1}), nullptr); - EXPECT_THAT(analysis.GetValueDefinedAt(constant1).uses(), - UnorderedElementsAre(HloUse{xla_while, 0, {0}})); + EXPECT_THAT( + analysis.GetValueDefinedAt(constant1).uses(), + UnorderedElementsAre(HloUse{add, 0, {}}, HloUse{xla_while, 0, {0}})); - EXPECT_FALSE(analysis.GetValueDefinedAt(constant1).live_out_of_module()); - EXPECT_TRUE(analysis.GetValueDefinedAt(xla_while, /*index=*/{0}) - .live_out_of_module()); + // Constant1 passes through the body and out of the module. + EXPECT_TRUE(analysis.GetValueDefinedAt(constant1).live_out_of_module()); EXPECT_TRUE(analysis.GetValueDefinedAt(xla_while, /*index=*/{1}) .live_out_of_module()); @@ -613,20 +601,15 @@ TEST_P(HloDataflowAnalysisTest, SequentialWhiles) { bool ssa_form = GetParam(); const HloDataflowAnalysis& analysis = RunAnalysis(ssa_form); - if (ssa_form) { - EXPECT_TRUE(analysis.GetValueDefinedAt(xla_while2).live_out_of_module()); - EXPECT_FALSE(analysis.GetValueDefinedAt(constant1).live_out_of_module()); - } else { - // Element 0 is passed through all the while instructions and out of the - // module. - EXPECT_EQ(analysis.GetUniqueValueAt(xla_while0, /*index=*/{0}), - analysis.GetValueDefinedAt(constant1)); - EXPECT_EQ(analysis.GetUniqueValueAt(xla_while1, /*index=*/{0}), - analysis.GetValueDefinedAt(constant1)); - EXPECT_EQ(analysis.GetUniqueValueAt(xla_while2, /*index=*/{0}), - analysis.GetValueDefinedAt(constant1)); - EXPECT_TRUE(analysis.GetValueDefinedAt(constant1).live_out_of_module()); - } + // Element 0 is passed through all the while instructions and out of the + // module.. + EXPECT_EQ(analysis.GetUniqueValueAt(xla_while0, /*index=*/{0}), + analysis.GetValueDefinedAt(constant1)); + EXPECT_EQ(analysis.GetUniqueValueAt(xla_while1, /*index=*/{0}), + analysis.GetValueDefinedAt(constant1)); + EXPECT_EQ(analysis.GetUniqueValueAt(xla_while2, /*index=*/{0}), + analysis.GetValueDefinedAt(constant1)); + EXPECT_TRUE(analysis.GetValueDefinedAt(constant1).live_out_of_module()); } TEST_P(HloDataflowAnalysisTest, NestedWhiles) { @@ -705,13 +688,18 @@ TEST_P(HloDataflowAnalysisTest, NestedWhiles) { bool ssa_form = GetParam(); const HloDataflowAnalysis& analysis = RunAnalysis(ssa_form); + EXPECT_THAT(HloValuesAt(inner_param, /*index=*/{0}), + UnorderedElementsAre(analysis.GetValueDefinedAt(negate))); if (ssa_form) { EXPECT_TRUE(analysis.ValueIsDefinedAt(inner_param, /*index=*/{1})); EXPECT_TRUE( analysis.GetValueDefinedAt(inner_param, /*index=*/{1}).is_phi()); - EXPECT_TRUE(analysis.ValueIsDefinedAt(nested_while, /*index=*/{0})); - EXPECT_TRUE( - analysis.GetValueDefinedAt(inner_param, /*index=*/{1}).is_phi()); + + // Element 0 of the nested while is %negate. + EXPECT_FALSE(analysis.ValueIsDefinedAt(nested_while, /*index=*/{0})); + EXPECT_THAT(HloValuesAt(inner_param, /*index=*/{0}), + UnorderedElementsAre(analysis.GetValueDefinedAt(negate))); + // Element 1 is a phi value (join of %add and %constant2). EXPECT_TRUE(analysis.ValueIsDefinedAt(nested_while, /*index=*/{1})); EXPECT_TRUE( analysis.GetValueDefinedAt(nested_while, /*index=*/{1}).is_phi()); @@ -724,8 +712,6 @@ TEST_P(HloDataflowAnalysisTest, NestedWhiles) { EXPECT_TRUE( analysis.GetValueDefinedAt(entry_while, /*index=*/{1}).is_phi()); } else { - EXPECT_THAT(HloValuesAt(inner_param, /*index=*/{0}), - UnorderedElementsAre(analysis.GetValueDefinedAt(negate))); EXPECT_THAT(HloValuesAt(inner_param, /*index=*/{1}), UnorderedElementsAre(analysis.GetValueDefinedAt(add), analysis.GetValueDefinedAt(constant2))); @@ -1496,256 +1482,6 @@ TEST_P(HloDataflowAnalysisTest, EmbeddedComputationInterference) { EXPECT_TRUE(InstructionsMayInterfere(ordering, negate, embedded_log)); } -TEST_P(HloDataflowAnalysisTest, UpdateAnalysisForWhile) { - // Test updating dataflow after modifying a module with an array shaped while: - // - // body(F32[] %param): - // %negate = Negate(%param) - // - // condition(F32[] %param): - // return Constant(false) - // - // entry: - // %constant = Constant(1.0) - // %exp = Exp(%constant) - // return While(%exp, body, condition) - // - auto body_builder = HloComputation::Builder("body"); - auto body_param = body_builder.AddInstruction( - HloInstruction::CreateParameter(0, scalar_shape_, "param")); - auto negate = body_builder.AddInstruction(HloInstruction::CreateUnary( - scalar_shape_, HloOpcode::kNegate, body_param)); - HloComputation* body = module_->AddEmbeddedComputation(body_builder.Build()); - - // Condition computation trivially returns a constant "false". - auto cond_builder = HloComputation::Builder("condition"); - auto cond_param = cond_builder.AddInstruction( - HloInstruction::CreateParameter(0, scalar_shape_, "param")); - cond_builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(false))); - HloComputation* condition = - module_->AddEmbeddedComputation(cond_builder.Build()); - - auto builder = HloComputation::Builder(TestName()); - auto constant = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(1.0))); - auto exp = builder.AddInstruction( - HloInstruction::CreateUnary(scalar_shape_, HloOpcode::kExp, constant)); - auto xla_while = builder.AddInstruction( - HloInstruction::CreateWhile(scalar_shape_, condition, body, exp)); - module_->AddEntryComputation(builder.Build()); - - bool ssa_form = GetParam(); - HloDataflowAnalysis& analysis = RunAnalysis(ssa_form); - - // Sanity check the initial dataflow analysis before transforming the HLO - // graph. - if (ssa_form) { - EXPECT_TRUE(analysis.ValueIsDefinedAt(body_param)); - EXPECT_TRUE(analysis.GetValueDefinedAt(body_param).is_phi()); - EXPECT_EQ(analysis.ResolvePhi(body_param), nullptr); - - EXPECT_TRUE(analysis.ValueIsDefinedAt(cond_param)); - EXPECT_TRUE(analysis.GetValueDefinedAt(cond_param).is_phi()); - EXPECT_EQ(analysis.ResolvePhi(cond_param), nullptr); - - EXPECT_FALSE(analysis.GetValueDefinedAt(exp).live_out_of_module()); - EXPECT_FALSE(analysis.GetValueDefinedAt(negate).live_out_of_module()); - } else { - EXPECT_THAT(HloValuesAt(body_param), - UnorderedElementsAre(analysis.GetValueDefinedAt(exp), - analysis.GetValueDefinedAt(negate))); - EXPECT_THAT(HloValuesAt(cond_param), - UnorderedElementsAre(analysis.GetValueDefinedAt(exp), - analysis.GetValueDefinedAt(negate))); - EXPECT_THAT(HloValuesAt(xla_while), - UnorderedElementsAre(analysis.GetValueDefinedAt(exp), - analysis.GetValueDefinedAt(negate))); - - EXPECT_TRUE(analysis.GetValueDefinedAt(negate).live_out_of_module()); - EXPECT_TRUE(analysis.GetValueDefinedAt(exp).live_out_of_module()); - } - - // Set the body root to the body_param. Previously it was Negate(body_param). - body->set_root_instruction(body_param); - - // Prior to updating, verify that the dataflow analysis is no longer valid. - Status verify_status = analysis.VerifyAgainstReference(); - EXPECT_FALSE(verify_status.ok()); - - analysis.UpdateAfterChangingRoot(/*old_root=*/negate, - /*new_root=*/body_param); - - // Analysis should be valid after the update. - TF_EXPECT_OK(analysis.VerifyAgainstReference()); - - if (ssa_form) { - // The phis should now be resolvable as 'exp' is passed through the body - // transparently. - EXPECT_EQ(analysis.ResolvePhi(body_param), - &analysis.GetValueDefinedAt(exp)); - EXPECT_EQ(analysis.ResolvePhi(cond_param), - &analysis.GetValueDefinedAt(exp)); - EXPECT_EQ(analysis.ResolvePhi(xla_while), &analysis.GetValueDefinedAt(exp)); - EXPECT_FALSE(analysis.GetValueDefinedAt(exp).live_out_of_module()); - } else { - EXPECT_THAT(HloValuesAt(body_param), - UnorderedElementsAre(analysis.GetValueDefinedAt(exp))); - EXPECT_THAT(HloValuesAt(cond_param), - UnorderedElementsAre(analysis.GetValueDefinedAt(exp))); - EXPECT_THAT(HloValuesAt(xla_while), - UnorderedElementsAre(analysis.GetValueDefinedAt(exp))); - EXPECT_TRUE(analysis.GetValueDefinedAt(exp).live_out_of_module()); - } - EXPECT_FALSE(analysis.GetValueDefinedAt(negate).live_out_of_module()); - - // Now replace the operand of the while with %constant (was %exp). - TF_ASSERT_OK(exp->ReplaceUseWith(xla_while, constant)); - analysis.UpdateAfterChangingOperand(xla_while, /*old_operand=*/exp, - /*new_operand=*/constant); - - // Verify that the dataflow is correct. - TF_ASSERT_OK(analysis.VerifyAgainstReference()); - - if (ssa_form) { - // The phis now resolve to 'constant'. - EXPECT_EQ(analysis.ResolvePhi(body_param), - &analysis.GetValueDefinedAt(constant)); - EXPECT_EQ(analysis.ResolvePhi(cond_param), - &analysis.GetValueDefinedAt(constant)); - EXPECT_EQ(analysis.ResolvePhi(xla_while), - &analysis.GetValueDefinedAt(constant)); - } else { - EXPECT_THAT(HloValuesAt(body_param), - UnorderedElementsAre(analysis.GetValueDefinedAt(constant))); - EXPECT_THAT(HloValuesAt(cond_param), - UnorderedElementsAre(analysis.GetValueDefinedAt(constant))); - EXPECT_THAT(HloValuesAt(xla_while), - UnorderedElementsAre(analysis.GetValueDefinedAt(constant))); - EXPECT_TRUE(analysis.GetValueDefinedAt(constant).live_out_of_module()); - } - - // And finally make the negate the root of the body again. - body->set_root_instruction(negate); - analysis.UpdateAfterChangingRoot(/*old_root=*/body_param, - /*new_root=*/negate); - - // Verify that the dataflow is correct. - TF_ASSERT_OK(analysis.VerifyAgainstReference()); - - if (ssa_form) { - // Phis should no longer be resolvable. - EXPECT_EQ(analysis.ResolvePhi(body_param), nullptr); - EXPECT_EQ(analysis.ResolvePhi(cond_param), nullptr); - EXPECT_EQ(analysis.ResolvePhi(xla_while), nullptr); - } else { - EXPECT_THAT(HloValuesAt(body_param), - UnorderedElementsAre(analysis.GetValueDefinedAt(constant), - analysis.GetValueDefinedAt(negate))); - EXPECT_THAT(HloValuesAt(cond_param), - UnorderedElementsAre(analysis.GetValueDefinedAt(constant), - analysis.GetValueDefinedAt(negate))); - EXPECT_THAT(HloValuesAt(xla_while), - UnorderedElementsAre(analysis.GetValueDefinedAt(constant), - analysis.GetValueDefinedAt(negate))); - - EXPECT_FALSE(analysis.GetValueDefinedAt(exp).live_out_of_module()); - EXPECT_TRUE(analysis.GetValueDefinedAt(negate).live_out_of_module()); - EXPECT_TRUE(analysis.GetValueDefinedAt(constant).live_out_of_module()); - } - - // After the updates, verify that the dataflow is correct. - TF_ASSERT_OK(analysis.VerifyAgainstReference()); -} - -TEST_P(HloDataflowAnalysisTest, UpdateOfATupleSelect) { - // Test changing the operands of kSelects of a tuple value and updating the - // dataflow. - auto builder = HloComputation::Builder(TestName()); - auto pred = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(false))); - auto a = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(1.0))); - auto b = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(2.0))); - auto c = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(3.0))); - auto d = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(4.0))); - auto tuple_a = builder.AddInstruction(HloInstruction::CreateTuple({a})); - auto tuple_b = builder.AddInstruction(HloInstruction::CreateTuple({b})); - auto tuple_c = builder.AddInstruction(HloInstruction::CreateTuple({c})); - auto tuple_d = builder.AddInstruction(HloInstruction::CreateTuple({d})); - const Shape tuple_shape = tuple_a->shape(); - auto select_aa = builder.AddInstruction(HloInstruction::CreateTernary( - tuple_shape, HloOpcode::kSelect, pred, tuple_a, tuple_a)); - auto select_ab = builder.AddInstruction(HloInstruction::CreateTernary( - tuple_shape, HloOpcode::kSelect, pred, tuple_a, tuple_b)); - auto select_cd = builder.AddInstruction(HloInstruction::CreateTernary( - tuple_shape, HloOpcode::kSelect, pred, tuple_c, tuple_d)); - auto select_abcd = builder.AddInstruction(HloInstruction::CreateTernary( - tuple_shape, HloOpcode::kSelect, pred, select_ab, select_cd)); - - module_->AddEntryComputation(builder.Build()); - - bool ssa_form = GetParam(); - HloDataflowAnalysis& analysis = RunAnalysis(ssa_form); - - // Sanity check dataflow before changing the graph and updating. - EXPECT_THAT(HloValuesAt(select_aa, /*index=*/{0}), - UnorderedElementsAre(analysis.GetValueDefinedAt(a))); - EXPECT_THAT(HloValuesAt(select_ab, /*index=*/{0}), - UnorderedElementsAre(analysis.GetValueDefinedAt(a), - analysis.GetValueDefinedAt(b))); - EXPECT_THAT(HloValuesAt(select_cd, /*index=*/{0}), - UnorderedElementsAre(analysis.GetValueDefinedAt(c), - analysis.GetValueDefinedAt(d))); - EXPECT_THAT(HloValuesAt(select_abcd, /*index=*/{0}), - UnorderedElementsAre(analysis.GetValueDefinedAt(a), - analysis.GetValueDefinedAt(b), - analysis.GetValueDefinedAt(c), - analysis.GetValueDefinedAt(d))); - EXPECT_TRUE(analysis.GetValueDefinedAt(a).live_out_of_module()); - EXPECT_TRUE(analysis.GetValueDefinedAt(b).live_out_of_module()); - EXPECT_TRUE(analysis.GetValueDefinedAt(c).live_out_of_module()); - EXPECT_TRUE(analysis.GetValueDefinedAt(d).live_out_of_module()); - - // Set the rhs of 'select_aa' to be 'd'. - TF_ASSERT_OK(select_aa->ReplaceOperandWith(2, tuple_d)); - analysis.UpdateAfterChangingOperand(select_aa, /*old_operand=*/tuple_a, - /*new_operand=*/tuple_d); - - // Verify that the dataflow is correct. - TF_ASSERT_OK(analysis.VerifyAgainstReference()); - - EXPECT_THAT(HloValuesAt(select_aa, /*index=*/{0}), - UnorderedElementsAre(analysis.GetValueDefinedAt(a), - analysis.GetValueDefinedAt(d))); - - // Set the lhs of 'select_cd' to be 'a'. - TF_ASSERT_OK(select_cd->ReplaceOperandWith(1, tuple_a)); - analysis.UpdateAfterChangingOperand(select_cd, /*old_operand=*/tuple_c, - /*new_operand=*/tuple_a); - - // Verify that the dataflow is correct. - TF_ASSERT_OK(analysis.VerifyAgainstReference()); - - EXPECT_THAT(HloValuesAt(select_cd, /*index=*/{0}), - UnorderedElementsAre(analysis.GetValueDefinedAt(a), - analysis.GetValueDefinedAt(d))); - EXPECT_THAT(HloValuesAt(select_abcd, /*index=*/{0}), - UnorderedElementsAre(analysis.GetValueDefinedAt(a), - analysis.GetValueDefinedAt(b), - analysis.GetValueDefinedAt(d))); - EXPECT_TRUE(analysis.GetValueDefinedAt(a).live_out_of_module()); - EXPECT_TRUE(analysis.GetValueDefinedAt(b).live_out_of_module()); - EXPECT_FALSE(analysis.GetValueDefinedAt(c).live_out_of_module()); - EXPECT_TRUE(analysis.GetValueDefinedAt(d).live_out_of_module()); - - // After the updates, verify that the dataflow is correct. - TF_ASSERT_OK(analysis.VerifyAgainstReference()); -} - INSTANTIATE_TEST_CASE_P(HloDataflowAnalysisInstantiation, HloDataflowAnalysisTest, ::testing::Values(false, true)); diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index dfb111d1d0b..07b3369d5c1 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -561,13 +561,21 @@ tooltip = " "; } string comp_body = DumpComputation(subcomp); - string computation = - Printf(computation_fmt, id, style, subcomp_label, comp_body, id); - // Add an edge from the subcomputation to its parent node. If subcomp - // belongs to a fusion node, it's drawn in place of the fusion instruction, so - // there's no need to link those. - if (parent_instr->opcode() != HloOpcode::kFusion) { + if (parent_instr->opcode() == HloOpcode::kFusion) { + // Dump any nested fusion nodes. + for (const auto& subcomp_instr : subcomp->instructions()) { + if (subcomp_instr->opcode() == HloOpcode::kFusion) { + StrAppend( + &comp_body, + DumpSubcomputation(subcomp_instr->fused_instructions_computation(), + subcomp_instr.get())); + } + } + } else { + // Add an edge from the subcomputation to its parent node. If subcomp + // belongs to a fusion node, it's drawn in place of the fusion instruction, + // so there's no need to link those. edge_ids_.insert( {{subcomp->root_instruction(), parent_instr}, next_edge_id_++}); const char* edge_fmt = @@ -578,6 +586,9 @@ tooltip = " "; subcomp->name(), parent_instr->name())); } + string computation = + Printf(computation_fmt, id, style, subcomp_label, comp_body, id); + return computation; } diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index 3bdb67ba923..75b88aeb128 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -793,13 +793,6 @@ HloInstruction* HloInstruction::CloneAndFuseInternal( } } - for (HloComputation* computation : - instruction_to_fuse->called_computations()) { - if (std::find(called_computations_.begin(), called_computations_.end(), - computation) == called_computations_.end()) { - called_computations_.push_back(computation); - } - } VLOG(2) << "New clone:\n" << clone->ToString(); return clone; } diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index 5688fcc4255..e393e05c344 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -797,8 +797,7 @@ class HloInstruction { const Shape& shape, tensorflow::gtl::ArraySlice operands); - // Returns the computations this instruction calls (if any). This includes - // computations called by fused instructions inside of a fusion instruction. + // Returns the computations this instruction directly calls (if any). const std::vector& called_computations() const { return called_computations_; } diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc index ea5749581b5..2e1eeee36b5 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc @@ -758,16 +758,13 @@ TEST_F(HloInstructionTest, FusionOpWithCalledComputations) { auto* fusion = computation->CreateFusionInstruction( {map_3_y}, HloInstruction::FusionKind::kLoop); auto* fused_computation = fusion->fused_instructions_computation(); - EXPECT_THAT(fusion->called_computations(), - ElementsAre(fused_computation, computation_y)); + EXPECT_THAT(fusion->called_computations(), ElementsAre(fused_computation)); fusion->FuseInstruction(map_2_x); - EXPECT_THAT(fusion->called_computations(), - ElementsAre(fused_computation, computation_y, computation_x)); + EXPECT_THAT(fusion->called_computations(), ElementsAre(fused_computation)); fusion->FuseInstruction(map_1_x); - EXPECT_THAT(fusion->called_computations(), - ElementsAre(fused_computation, computation_y, computation_x)); + EXPECT_THAT(fusion->called_computations(), ElementsAre(fused_computation)); } TEST_F(HloInstructionTest, ComplexFusionOp) { diff --git a/tensorflow/compiler/xla/service/hlo_ordering_test.cc b/tensorflow/compiler/xla/service/hlo_ordering_test.cc index ad6070a9c1b..c95e44bd5d9 100644 --- a/tensorflow/compiler/xla/service/hlo_ordering_test.cc +++ b/tensorflow/compiler/xla/service/hlo_ordering_test.cc @@ -19,6 +19,7 @@ limitations under the License. #include #include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_dataflow_analysis.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/service/hlo_scheduling.h" @@ -218,6 +219,94 @@ TEST_F(HloOrderingTest, InstructionsInWhileComputations) { EXPECT_FALSE(ordering.ExecutesBefore(body_param, cond_param)); } +TEST_F(HloOrderingTest, ValuesInWhileComputations) { + // Tests the ordering of values (defined by dataflow analysis) in the body and + // condition of a while instruction. HLO code: + // + // body(F32[]) %param): + // %negate = Negate(%param) + // + // condition(F32[] %param): + // %convert = Convert(%param) + // + // entry: + // %constant = Constant(1.0) + // %while = While(%constant, body, condition) + // %add = Add(%constant, %while) + // + auto module = CreateNewModule(); + const Shape scalar_shape = ShapeUtil::MakeShape(xla::F32, {}); + + auto body_builder = HloComputation::Builder("body"); + auto body_param = body_builder.AddInstruction( + HloInstruction::CreateParameter(0, scalar_shape, "body_param")); + auto negate = body_builder.AddInstruction(HloInstruction::CreateUnary( + scalar_shape, HloOpcode::kNegate, body_param)); + HloComputation* body = module->AddEmbeddedComputation(body_builder.Build()); + + auto cond_builder = HloComputation::Builder("condition"); + auto cond_param = cond_builder.AddInstruction( + HloInstruction::CreateParameter(0, scalar_shape, "cond_param")); + auto convert = cond_builder.AddInstruction(HloInstruction::CreateConvert( + ShapeUtil::MakeShape(xla::PRED, {}), cond_param)); + HloComputation* condition = + module->AddEmbeddedComputation(cond_builder.Build()); + + auto builder = HloComputation::Builder(TestName()); + auto constant = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(1.0))); + auto xla_while = builder.AddInstruction( + HloInstruction::CreateWhile(scalar_shape, condition, body, constant)); + auto add = builder.AddInstruction(HloInstruction::CreateBinary( + scalar_shape, HloOpcode::kAdd, constant, xla_while)); + module->AddEntryComputation(builder.Build()); + + TF_ASSERT_OK_AND_ASSIGN( + auto dataflow, HloDataflowAnalysis::Run(module.get(), /*ssa_form=*/true)); + DependencyHloOrdering ordering(module.get()); + + // Init value is defined before the while, but live range is not before the + // while because of the use of the init value in the add. + EXPECT_TRUE(ordering.IsDefinedBefore(dataflow->GetValueDefinedAt(constant), + dataflow->GetValueDefinedAt(xla_while))); + EXPECT_FALSE( + ordering.LiveRangeStrictlyBefore(dataflow->GetValueDefinedAt(constant), + dataflow->GetValueDefinedAt(xla_while))); + EXPECT_TRUE(ordering.MayInterfere(dataflow->GetValueDefinedAt(constant), + dataflow->GetValueDefinedAt(xla_while))); + + // Any value defined in the body or condition is defined before the while, and + // has a live range strictly before the while. + EXPECT_TRUE(ordering.IsDefinedBefore(dataflow->GetValueDefinedAt(negate), + dataflow->GetValueDefinedAt(xla_while))); + EXPECT_TRUE( + ordering.LiveRangeStrictlyBefore(dataflow->GetValueDefinedAt(negate), + dataflow->GetValueDefinedAt(xla_while))); + EXPECT_FALSE(ordering.MayInterfere(dataflow->GetValueDefinedAt(negate), + dataflow->GetValueDefinedAt(xla_while))); + + EXPECT_TRUE(ordering.IsDefinedBefore(dataflow->GetValueDefinedAt(convert), + dataflow->GetValueDefinedAt(xla_while))); + EXPECT_TRUE( + ordering.LiveRangeStrictlyBefore(dataflow->GetValueDefinedAt(convert), + dataflow->GetValueDefinedAt(xla_while))); + EXPECT_FALSE(ordering.MayInterfere(dataflow->GetValueDefinedAt(convert), + dataflow->GetValueDefinedAt(xla_while))); + + // The live range of the while should be before the add. + EXPECT_TRUE(ordering.IsDefinedBefore(dataflow->GetValueDefinedAt(xla_while), + dataflow->GetValueDefinedAt(add))); + ASSERT_EQ(dataflow->GetValueDefinedAt(xla_while).uses().size(), 1); + + const HloUse& while_use = dataflow->GetValueDefinedAt(xla_while).uses()[0]; + EXPECT_EQ(while_use.instruction, add); + EXPECT_TRUE(ordering.UseIsBeforeValueDefinition( + while_use, dataflow->GetValueDefinedAt(add))); + EXPECT_TRUE( + ordering.LiveRangeStrictlyBefore(dataflow->GetValueDefinedAt(xla_while), + dataflow->GetValueDefinedAt(add))); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_rematerialization.cc b/tensorflow/compiler/xla/service/hlo_rematerialization.cc index 278a1d7efad..20152cf0cef 100644 --- a/tensorflow/compiler/xla/service/hlo_rematerialization.cc +++ b/tensorflow/compiler/xla/service/hlo_rematerialization.cc @@ -1248,7 +1248,8 @@ StatusOr HloRematerialization::Run( sequence->at(node.computation()))); } return Status::OK(); - })); + }, + /*visit_unreachable_nodes=*/false)); // The peak memory usage of the module equals the peak memory use of the entry // computation plus the output size of the computation. This is because the diff --git a/tensorflow/compiler/xla/service/hlo_value.cc b/tensorflow/compiler/xla/service/hlo_value.cc index f85d8ec50de..e6cf0d37b8a 100644 --- a/tensorflow/compiler/xla/service/hlo_value.cc +++ b/tensorflow/compiler/xla/service/hlo_value.cc @@ -159,12 +159,6 @@ void HloValue::AddPosition(HloInstruction* instruction, for (const HloPosition& position : positions_) { DCHECK_NE(position, new_position); } - // The shape of the new position must match existing positions. - if (!positions_.empty()) { - CHECK( - ShapeUtil::Compatible(positions_.front().shape(), new_position.shape())) - << "front: " << positions_.front() << " new: " << new_position; - } positions_.push_back(std::move(new_position)); diff --git a/tensorflow/compiler/xla/service/hlo_value.h b/tensorflow/compiler/xla/service/hlo_value.h index 63ecc25020b..6872bc76a82 100644 --- a/tensorflow/compiler/xla/service/hlo_value.h +++ b/tensorflow/compiler/xla/service/hlo_value.h @@ -225,6 +225,9 @@ class HloValueSet { // already exist in the set. bool AddValue(const HloValue* value); + // Clear all values from the set. + void Clear() { values_.clear(); } + // Return the unique HLO value in the set. CHECKs if the set does not contain // exactly one value. const HloValue& GetUniqueValue() const { diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index 9ba2d54d024..c44be716cdf 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -32,13 +32,11 @@ class ShapeVerifier : public DfsHloVisitor { const std::function& shape_size_fn) : shape_size_fn_(shape_size_fn) {} - Status HandleElementwiseUnary(HloInstruction* hlo, - HloOpcode opcode) override { + Status HandleElementwiseUnary(HloInstruction* hlo) override { return CheckUnaryShape(hlo); } - Status HandleElementwiseBinary(HloInstruction* hlo, - HloOpcode opcode) override { + Status HandleElementwiseBinary(HloInstruction* hlo) override { return CheckBinaryShape(hlo); } @@ -282,6 +280,14 @@ class ShapeVerifier : public DfsHloVisitor { const std::function shape_size_fn_; }; +string ComputationsToString( + tensorflow::gtl::ArraySlice computations) { + return tensorflow::str_util::Join( + computations, ",", [](string* s, const HloComputation* computation) { + s->append(computation->name()); + }); +} + } // namespace StatusOr HloVerifier::Run(HloModule* module) { @@ -292,6 +298,17 @@ StatusOr HloVerifier::Run(HloModule* module) { for (const auto& instruction : computation->instructions()) { TF_RET_CHECK(instruction->parent() == computation.get()); if (instruction->opcode() == HloOpcode::kFusion) { + TF_RET_CHECK( + ContainersEqual(instruction->called_computations(), + {instruction->fused_instructions_computation()})) + << "Fusion HLO calls computations other than the " + "fused_instructions_computation: " + << instruction->ToString() + << " instruction->fused_instructions_computation(): " + << instruction->fused_instructions_computation()->ToString() + << " instruction->called_computations(): " + << ComputationsToString(instruction->called_computations()); + for (const auto& fused : instruction->fused_instructions()) { TF_RET_CHECK(fused->parent() == instruction->fused_instructions_computation()) diff --git a/tensorflow/compiler/xla/service/reduce_precision_insertion.cc b/tensorflow/compiler/xla/service/reduce_precision_insertion.cc index 01dbb7e8663..33327dc60fb 100644 --- a/tensorflow/compiler/xla/service/reduce_precision_insertion.cc +++ b/tensorflow/compiler/xla/service/reduce_precision_insertion.cc @@ -122,7 +122,8 @@ StatusOr ReducePrecisionInsertion::insert_on_inputs( continue; } - if (instruction->opcode() == HloOpcode::kFusion) { + if (instruction->opcode() == HloOpcode::kFusion && + instruction->fusion_kind() == HloInstruction::FusionKind::kLoop) { // Insert the reduce-precision operation inside the fusion computation, // after the corresponding parameter instruction. TF_ASSIGN_OR_RETURN( @@ -171,7 +172,8 @@ StatusOr ReducePrecisionInsertion::insert_on_outputs( continue; } - if (instruction->opcode() == HloOpcode::kFusion) { + if (instruction->opcode() == HloOpcode::kFusion && + instruction->fusion_kind() == HloInstruction::FusionKind::kLoop) { // Insert the reduce-precision operation as the last operation inside // the fusion computation. HloInstruction* fusion_root = instruction->fused_expression_root(); diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index 47a0f54a023..84fcc0d0149 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -28,6 +28,7 @@ py_library( "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py", "//tensorflow/contrib/framework:framework_py", "//tensorflow/contrib/fused_conv:fused_conv_py", + "//tensorflow/contrib/gan", "//tensorflow/contrib/graph_editor:graph_editor_py", "//tensorflow/contrib/grid_rnn:grid_rnn_py", "//tensorflow/contrib/hooks", @@ -72,6 +73,7 @@ py_library( "//tensorflow/contrib/staging", "//tensorflow/contrib/stat_summarizer:stat_summarizer_py", "//tensorflow/contrib/stateless", + "//tensorflow/contrib/summary:summary_ops", "//tensorflow/contrib/tensor_forest:init_py", "//tensorflow/contrib/tensorboard", "//tensorflow/contrib/testing:testing_py", diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py index 315ea943cf3..d1d0e2823ad 100644 --- a/tensorflow/contrib/__init__.py +++ b/tensorflow/contrib/__init__.py @@ -31,6 +31,7 @@ from tensorflow.contrib import deprecated from tensorflow.contrib import distributions from tensorflow.contrib import factorization from tensorflow.contrib import framework +from tensorflow.contrib import gan from tensorflow.contrib import graph_editor from tensorflow.contrib import grid_rnn from tensorflow.contrib import image diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py index c377c50e9fe..a8b60460c8f 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy.py @@ -18,6 +18,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import collections +import os + from tensorflow.contrib.boosted_trees.proto import tree_config_pb2 from tensorflow.contrib.boosted_trees.python.training.functions import gbdt_batch from tensorflow.contrib.decision_trees.proto import generic_tree_model_extensions_pb2 @@ -26,18 +29,21 @@ from tensorflow.contrib.learn.python.learn import export_strategy from tensorflow.contrib.learn.python.learn.utils import saved_model_export_utils from tensorflow.python.client import session as tf_session from tensorflow.python.framework import ops +from tensorflow.python.platform import gfile from tensorflow.python.saved_model import loader as saved_model_loader from tensorflow.python.saved_model import tag_constants -def make_custom_export_strategy(name, convert_fn, feature_columns, +def make_custom_export_strategy(name, + convert_fn, + feature_columns, export_input_fn): """Makes custom exporter of GTFlow tree format. Args: name: A string, for the name of the export strategy. convert_fn: A function that converts the tree proto to desired format and - saves it to the desired location. + saves it to the desired location. Can be None to skip conversion. feature_columns: A list of feature columns. export_input_fn: A function that takes no arguments and returns an `InputFnOps`. @@ -68,9 +74,22 @@ def make_custom_export_strategy(name, convert_fn, feature_columns, dtec = tree_config_pb2.DecisionTreeEnsembleConfig() dtec.ParseFromString(dfec_str) # Export the result in the same folder as the saved model. - convert_fn(dtec, sorted_feature_names, len(dense_floats), - len(sparse_float_indices), len(sparse_int_indices), - result_dir, eval_result) + if convert_fn: + convert_fn(dtec, sorted_feature_names, + len(dense_floats), + len(sparse_float_indices), + len(sparse_int_indices), result_dir, eval_result) + feature_importances = _get_feature_importances( + dtec, sorted_feature_names, + len(dense_floats), + len(sparse_float_indices), len(sparse_int_indices)) + sorted_by_importance = sorted( + feature_importances.items(), key=lambda x: -x[1]) + assets_dir = os.path.join(result_dir, "assets.extra") + gfile.MakeDirs(assets_dir) + with gfile.GFile(os.path.join(assets_dir, "feature_importances"), + "w") as f: + f.write("\n".join("%s, %f" % (k, v) for k, v in sorted_by_importance)) return result_dir return export_strategy.ExportStrategy(name, export_fn) @@ -157,3 +176,41 @@ def convert_to_universal_format(dtec, sorted_feature_names, node.left_child_id.value = split.left_id node.right_child_id.value = split.right_id return model_and_features + + +def _get_feature_importances(dtec, feature_names, num_dense_floats, + num_sparse_float, num_sparse_int): + """Export the feature importance per feature column.""" + del num_sparse_int # Unused. + sums = collections.defaultdict(lambda: 0) + for tree_idx in range(len(dtec.trees)): + tree = dtec.trees[tree_idx] + for tree_node in tree.nodes: + node_type = tree_node.WhichOneof("node") + if node_type == "dense_float_binary_split": + split = tree_node.dense_float_binary_split + split_column = feature_names[split.feature_column] + elif node_type == "sparse_float_binary_split_default_left": + split = tree_node.sparse_float_binary_split_default_left.split + split_column = feature_names[split.feature_column + num_dense_floats] + elif node_type == "sparse_float_binary_split_default_right": + split = tree_node.sparse_float_binary_split_default_right.split + split_column = feature_names[split.feature_column + num_dense_floats] + elif node_type == "categorical_id_binary_split": + split = tree_node.categorical_id_binary_split + split_column = feature_names[split.feature_column + num_dense_floats + + num_sparse_float] + elif node_type == "categorical_id_set_membership_binary_split": + split = tree_node.categorical_id_set_membership_binary_split + split_column = feature_names[split.feature_column + num_dense_floats + + num_sparse_float] + elif node_type == "leaf": + assert tree_node.node_metadata.gain == 0 + continue + else: + raise ValueError("Unexpected split type %s", node_type) + # Apply shrinkage factor. It is important since it is not always uniform + # across different trees. + sums[split_column] += ( + tree_node.node_metadata.gain * dtec.tree_weights[tree_idx]) + return dict(sums) diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy_test.py b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy_test.py index 8d801fa1f38..4ed18b2d34c 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy_test.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/custom_export_strategy_test.py @@ -27,7 +27,7 @@ from tensorflow.python.platform import googletest class ConvertModelTest(test_util.TensorFlowTestCase): - def testConvertModel(self): + def _make_trees(self): dtec_str = """ trees { nodes { @@ -108,8 +108,12 @@ class ConvertModelTest(test_util.TensorFlowTestCase): """ dtec = tree_config_pb2.DecisionTreeEnsembleConfig() text_format.Merge(dtec_str, dtec) - # The feature columns in the order they were added. feature_columns = ["feature_b", "feature_a", "feature_d"] + return dtec, feature_columns + + def testConvertModel(self): + dtec, feature_columns = self._make_trees() + # The feature columns in the order they were added. out = custom_export_strategy.convert_to_universal_format( dtec, feature_columns, 1, 1, 1) @@ -273,6 +277,16 @@ class ConvertModelTest(test_util.TensorFlowTestCase): }""" self.assertProtoEquals(expected_tree, out) + def testFeatureImportance(self): + dtec, feature_columns = self._make_trees() + feature_importances = custom_export_strategy._get_feature_importances( + dtec, feature_columns, 1, 1, 1) + self.assertItemsEqual(["feature_b", "feature_a", "feature_d"], + feature_importances.keys()) + self.assertAlmostEqual(50.0, feature_importances["feature_b"], places=4) + self.assertAlmostEqual(50.0, feature_importances["feature_a"], places=4) + self.assertAlmostEqual(50.0, feature_importances["feature_d"], places=4) + if __name__ == "__main__": googletest.main() diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py b/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py index e28adad53ec..f8028acbdb0 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/estimator.py @@ -61,11 +61,19 @@ class GradientBoostedDecisionTreeClassifier(estimator.Estimator): logits_modifier_function: A modifier function for the logits. center_bias: Whether a separate tree should be created for first fitting the bias. + + Raises: + ValueError: If learner_config is not valid. """ head = head_lib.multi_class_head( n_classes=n_classes, weight_column_name=weight_column_name, enable_centered_bias=False) + if learner_config.num_classes == 0: + learner_config.num_classes = n_classes + elif learner_config.num_classes != n_classes: + raise ValueError("n_classes (%d) doesn't match learner_config (%d)." % + (learner_config.num_classes, n_classes)) super(GradientBoostedDecisionTreeClassifier, self).__init__( model_fn=model.model_builder, params={ @@ -129,6 +137,10 @@ class GradientBoostedDecisionTreeRegressor(estimator.Estimator): label_dimension=label_dimension, weight_column_name=weight_column_name, enable_centered_bias=False) + if label_dimension == 1: + learner_config.num_classes = 2 + else: + learner_config.num_classes = label_dimension super(GradientBoostedDecisionTreeRegressor, self).__init__( model_fn=model.model_builder, params={ diff --git a/tensorflow/contrib/boosted_trees/estimator_batch/model.py b/tensorflow/contrib/boosted_trees/estimator_batch/model.py index 2d517f78111..8cda5c8f2b1 100644 --- a/tensorflow/contrib/boosted_trees/estimator_batch/model.py +++ b/tensorflow/contrib/boosted_trees/estimator_batch/model.py @@ -92,6 +92,7 @@ def model_builder(features, labels, mode, params, config): examples_per_layer=examples_per_layer, learner_config=learner_config, feature_columns=feature_columns, + logits_dimension=head.logits_dimension, features=features) with ops.name_scope("gbdt", "gbdt_optimizer"): predictions_dict = gbdt_model.predict(mode) diff --git a/tensorflow/contrib/boosted_trees/kernels/model_ops.cc b/tensorflow/contrib/boosted_trees/kernels/model_ops.cc index 42112c586a5..f4ad99f779e 100644 --- a/tensorflow/contrib/boosted_trees/kernels/model_ops.cc +++ b/tensorflow/contrib/boosted_trees/kernels/model_ops.cc @@ -74,7 +74,7 @@ class TreeEnsembleStampTokenOp : public OpKernel { decision_tree_ensemble_resource; OP_REQUIRES_OK(context, LookupResource(context, HandleFromInput(context, 0), &decision_tree_ensemble_resource)); - mutex_lock l(*decision_tree_ensemble_resource->get_mutex()); + tf_shared_lock l(*decision_tree_ensemble_resource->get_mutex()); core::ScopedUnref unref_me(decision_tree_ensemble_resource); Tensor* output_stamp_token_t = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, TensorShape(), @@ -95,7 +95,7 @@ class TreeEnsembleSerializeOp : public OpKernel { decision_tree_ensemble_resource; OP_REQUIRES_OK(context, LookupResource(context, HandleFromInput(context, 0), &decision_tree_ensemble_resource)); - mutex_lock l(*decision_tree_ensemble_resource->get_mutex()); + tf_shared_lock l(*decision_tree_ensemble_resource->get_mutex()); core::ScopedUnref unref_me(decision_tree_ensemble_resource); Tensor* output_stamp_token_t = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, TensorShape(), diff --git a/tensorflow/contrib/boosted_trees/kernels/prediction_ops.cc b/tensorflow/contrib/boosted_trees/kernels/prediction_ops.cc index daca0495481..8ffd7f120b4 100644 --- a/tensorflow/contrib/boosted_trees/kernels/prediction_ops.cc +++ b/tensorflow/contrib/boosted_trees/kernels/prediction_ops.cc @@ -143,7 +143,7 @@ class GradientTreesPredictionOp : public OpKernel { // Release the reference to the resource once we're done using it. core::ScopedUnref unref_me(decision_tree_ensemble_resource); if (use_locking_) { - mutex_lock l(*decision_tree_ensemble_resource->get_mutex()); + tf_shared_lock l(*decision_tree_ensemble_resource->get_mutex()); DoCompute(context, decision_tree_ensemble_resource); } else { DoCompute(context, decision_tree_ensemble_resource); @@ -334,7 +334,7 @@ class GradientTreesPartitionExamplesOp : public OpKernel { // Release the reference to the resource once we're done using it. core::ScopedUnref unref_me(decision_tree_ensemble_resource); if (use_locking_) { - mutex_lock l(*decision_tree_ensemble_resource->get_mutex()); + tf_shared_lock l(*decision_tree_ensemble_resource->get_mutex()); DoCompute(context, decision_tree_ensemble_resource); } else { DoCompute(context, decision_tree_ensemble_resource); diff --git a/tensorflow/contrib/boosted_trees/kernels/training_ops.cc b/tensorflow/contrib/boosted_trees/kernels/training_ops.cc index 9e9ef1738cd..d528757cf99 100644 --- a/tensorflow/contrib/boosted_trees/kernels/training_ops.cc +++ b/tensorflow/contrib/boosted_trees/kernels/training_ops.cc @@ -656,7 +656,8 @@ class GrowTreeEnsembleOp : public OpKernel { CHECK(split->split_info.split_node().node_case() != TreeNode::NODE_NOT_SET); CHECK(tree_config->nodes(node_id).node_case() == TreeNode::kLeaf) << "Unexpected node type to split " - << tree_config->nodes(node_id).node_case(); + << tree_config->nodes(node_id).node_case() << " for node_id " << node_id + << ". Tree config: " << tree_config->DebugString(); // Add left leaf. int32 left_id = tree_config->nodes_size(); @@ -767,7 +768,7 @@ class TreeEnsembleStatsOp : public OpKernel { OP_REQUIRES_OK(context, LookupResource(context, HandleFromInput(context, 0), &decision_tree_ensemble_resource)); core::ScopedUnref unref_me(decision_tree_ensemble_resource); - mutex_lock l(*decision_tree_ensemble_resource->get_mutex()); + tf_shared_lock l(*decision_tree_ensemble_resource->get_mutex()); // Get the stamp token. const Tensor* stamp_token_t; diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler_test.cc b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler_test.cc index 82664aed72d..f4c7df7fabd 100644 --- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler_test.cc +++ b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/bias-feature-column-handler_test.cc @@ -42,6 +42,7 @@ class BiasFeatureColumnHandlerTest : public ::testing::Test { example_partitions_({0, 0, 1, 3}) { // Set L2 regularization. learner_config_.mutable_regularization()->set_l2(2.0f); + learner_config_.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS); // Create handler. handler_.reset(new BiasFeatureColumnHandler(kClassId, kSlotId, kBatchSize)); diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler_test.cc b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler_test.cc index abd72384648..ea82b3f086d 100644 --- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler_test.cc +++ b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/categorical-feature-column-handler_test.cc @@ -51,7 +51,7 @@ class CategoricalFeatureColumnHandlerTest : public ::testing::Test { values_(test::AsTensor({1, 2, 2, 0}, {4})) { // Set L2 regularization. learner_config_.mutable_regularization()->set_l2(2.0f); - + learner_config_.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS); // Create handler. handler_.reset(new CategoricalFeatureColumnHandler( kClassId, kSlotId, kBatchSize, kFeatureColumn, indices_.matrix(), diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler_test.cc b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler_test.cc index 396f48e5321..1bc9d733ad3 100644 --- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler_test.cc +++ b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/dense-quantized-feature-column-handler_test.cc @@ -51,7 +51,7 @@ class DenseQuantizedFeatureColumnHandlerTest : public ::testing::Test { dense_quantized_values_(test::AsTensor({1, 1, 0, 1}, {4})) { // Set L2 regularization. learner_config_.mutable_regularization()->set_l2(2.0f); - + learner_config_.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS); // Create handler. handler_.reset(new DenseQuantizedFeatureColumnHandler( kClassId, kSlotId, kBatchSize, kFeatureColumn, diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler_test.cc b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler_test.cc index db8c64a617f..643d936ad23 100644 --- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler_test.cc +++ b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/handlers/sparse-quantized-feature-column-handler_test.cc @@ -53,7 +53,7 @@ class SparseQuantizedFeatureColumnHandlerTest : public ::testing::Test { sparse_quantized_values_(test::AsTensor({1, 0, 1}, {3})) { // Set L2 regularization. learner_config_.mutable_regularization()->set_l2(2.0f); - + learner_config_.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS); // Create handler. handler_.reset(new SparseQuantizedFeatureColumnHandler( kClassId, kSlotId, kBatchSize, kFeatureColumn, diff --git a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/node-stats_test.cc b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/node-stats_test.cc index f99b6826a78..ecb7a04efb9 100644 --- a/tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/node-stats_test.cc +++ b/tensorflow/contrib/boosted_trees/lib/learner/stochastic/stats/node-stats_test.cc @@ -30,6 +30,7 @@ const double kDelta = 1e-5; TEST(NodeStatsTest, AlmostZero) { LearnerConfig learner_config; + learner_config.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS); NodeStats node_stats(learner_config, GradientStats(1e-8f, 1e-8f)); EXPECT_EQ(0, node_stats.weight_contribution[0]); EXPECT_EQ(0, node_stats.gain); @@ -37,6 +38,7 @@ TEST(NodeStatsTest, AlmostZero) { TEST(NodeStatsTest, LessThanMinWeightConstraint) { LearnerConfig learner_config; + learner_config.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS); learner_config.mutable_constraints()->set_min_node_weight(3.2f); NodeStats node_stats(learner_config, GradientStats(7.32f, 1.63f)); EXPECT_EQ(0, node_stats.weight_contribution[0]); @@ -45,6 +47,7 @@ TEST(NodeStatsTest, LessThanMinWeightConstraint) { TEST(NodeStatsTest, L1RegSquashed) { LearnerConfig learner_config; + learner_config.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS); learner_config.mutable_regularization()->set_l1(10.0f); NodeStats node_stats(learner_config, GradientStats(7.32f, 1.63f)); EXPECT_EQ(0, node_stats.weight_contribution[0]); @@ -53,6 +56,7 @@ TEST(NodeStatsTest, L1RegSquashed) { TEST(NodeStatsTest, L1RegPos) { LearnerConfig learner_config; + learner_config.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS); learner_config.mutable_regularization()->set_l1(5.0f); NodeStats node_stats(learner_config, GradientStats(7.32f, 1.63f)); const float expected_clipped_grad = 7.32f - 5.0f; @@ -66,6 +70,7 @@ TEST(NodeStatsTest, L1RegPos) { TEST(NodeStatsTest, L1RegNeg) { LearnerConfig learner_config; + learner_config.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS); learner_config.mutable_regularization()->set_l1(5.0f); NodeStats node_stats(learner_config, GradientStats(-7.32f, 1.63f)); const float expected_clipped_grad = -7.32f + 5.0f; @@ -79,6 +84,7 @@ TEST(NodeStatsTest, L1RegNeg) { TEST(NodeStatsTest, L2Reg) { LearnerConfig learner_config; + learner_config.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS); learner_config.mutable_regularization()->set_l2(8.0f); NodeStats node_stats(learner_config, GradientStats(7.32f, 1.63f)); const float expected_denom = 1.63f + 8.0f; @@ -91,6 +97,7 @@ TEST(NodeStatsTest, L2Reg) { TEST(NodeStatsTest, L1L2Reg) { LearnerConfig learner_config; + learner_config.set_multi_class_strategy(LearnerConfig::TREE_PER_CLASS); learner_config.mutable_regularization()->set_l1(5.0f); learner_config.mutable_regularization()->set_l2(8.0f); NodeStats node_stats(learner_config, GradientStats(7.32f, 1.63f)); diff --git a/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_summary.h b/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_summary.h index 314c44fddc5..dad3b4e10de 100644 --- a/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_summary.h +++ b/tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_summary.h @@ -15,6 +15,7 @@ #ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_QUANTILES_WEIGHTED_QUANTILES_SUMMARY_H_ #define THIRD_PARTY_TENSORFLOW_CONTRIB_BOOSTED_TREES_LIB_QUANTILES_WEIGHTED_QUANTILES_SUMMARY_H_ +#include #include #include "tensorflow/contrib/boosted_trees/lib/quantiles/weighted_quantiles_buffer.h" @@ -34,10 +35,27 @@ class WeightedQuantilesSummary { struct SummaryEntry { SummaryEntry(const ValueType& v, const WeightType& w, const WeightType& min, - const WeightType& max) - : value(v), weight(w), min_rank(min), max_rank(max) {} + const WeightType& max) { + // Explicitely initialize all of memory (including padding from memory + // alignment) to allow the struct to be msan-resistant "plain old data". + // + // POD = http://en.cppreference.com/w/cpp/concept/PODType + memset(this, 0, sizeof(*this)); - SummaryEntry() : value(0), weight(0), min_rank(0), max_rank(0) {} + value = v; + weight = w; + min_rank = min; + max_rank = max; + } + + SummaryEntry() { + memset(this, 0, sizeof(*this)); + + value = 0; + weight = 0; + min_rank = 0; + max_rank = 0; + } bool operator==(const SummaryEntry& other) const { return value == other.value && weight == other.weight && diff --git a/tensorflow/contrib/boosted_trees/proto/learner.proto b/tensorflow/contrib/boosted_trees/proto/learner.proto index 06ee223467b..919e7cd8142 100644 --- a/tensorflow/contrib/boosted_trees/proto/learner.proto +++ b/tensorflow/contrib/boosted_trees/proto/learner.proto @@ -17,7 +17,7 @@ message TreeRegularizationConfig { // Tree constraints config. message TreeConstraintsConfig { - // Maximum depth of the trees. + // Maximum depth of the trees. The default value is 6 if not specified. uint32 max_tree_depth = 1; // Min hessian weight per node. @@ -86,20 +86,22 @@ message LearningRateDropoutDrivenConfig { message LearnerConfig { enum PruningMode { - PRE_PRUNE = 0; - POST_PRUNE = 1; + PRUNING_MODE_UNSPECIFIED = 0; + PRE_PRUNE = 1; + POST_PRUNE = 2; } enum GrowingMode { - WHOLE_TREE = 0; - // Layer by layer is only supported by the batch learner. - LAYER_BY_LAYER = 1; + GROWING_MODE_UNSPECIFIED = 0; + WHOLE_TREE = 1; + LAYER_BY_LAYER = 2; } enum MultiClassStrategy { - TREE_PER_CLASS = 0; - FULL_HESSIAN = 1; - DIAGONAL_HESSIAN = 2; + MULTI_CLASS_STRATEGY_UNSPECIFIED = 0; + TREE_PER_CLASS = 1; + FULL_HESSIAN = 2; + DIAGONAL_HESSIAN = 3; } // Number of classes. @@ -118,16 +120,18 @@ message LearnerConfig { // Constraints. TreeConstraintsConfig constraints = 5; - // Pruning. + // Pruning. POST_PRUNE is the default pruning mode. PruningMode pruning_mode = 8; - // Growing Mode. + // Growing Mode. LAYER_BY_LAYER is the default growing mode. GrowingMode growing_mode = 9; - // Learning rate. + // Learning rate. By default we use fixed learning rate of 0.1. LearningRateConfig learning_rate_tuner = 6; - // Multi-class strategy. + // Multi-class strategy. By default we use TREE_PER_CLASS for binary + // classification and linear regression. For other cases, we use + // DIAGONAL_HESSIAN as the default. MultiClassStrategy multi_class_strategy = 10; // If you want to average the ensembles (for regularization), provide the diff --git a/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py b/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py index 51e084b79c6..37595f1c75d 100644 --- a/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py +++ b/tensorflow/contrib/boosted_trees/python/kernel_tests/prediction_ops_test.py @@ -344,6 +344,7 @@ class PredictionOpsTest(test_util.TensorFlowTestCase): # Prepare learner config. learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 + learner_config.growing_mode = learner_pb2.LearnerConfig.WHOLE_TREE result, result_no_dropout, dropout_info = ( prediction_ops.gradient_trees_prediction( diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py index 6f85874a33a..2d28e0a9f16 100644 --- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py +++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py @@ -261,6 +261,7 @@ class GradientBoostedDecisionTreeModel(object): examples_per_layer, learner_config, features, + logits_dimension, feature_columns=None): """Construct a new GradientBoostedDecisionTreeModel function. @@ -273,8 +274,8 @@ class GradientBoostedDecisionTreeModel(object): a tree layer. It can also be a function that computes the number of examples based on the depth of the layer that's being built. learner_config: A learner config. - print split, sorted_feature_names[split.feature_column] features: `dict` of `Tensor` objects. + logits_dimension: An int, the dimension of logits. feature_columns: A list of feature columns. Raises: @@ -289,11 +290,39 @@ class GradientBoostedDecisionTreeModel(object): if learner_config.num_classes < 2: raise ValueError("Number of classes must be >=2") + self._logits_dimension = logits_dimension self._is_chief = is_chief self._num_ps_replicas = num_ps_replicas self._ensemble_handle = ensemble_handle self._center_bias = center_bias self._examples_per_layer = examples_per_layer + + # Fill in the defaults. + if (learner_config.multi_class_strategy == + learner_pb2.LearnerConfig.MULTI_CLASS_STRATEGY_UNSPECIFIED): + if logits_dimension == 1: + learner_config.multi_class_strategy = ( + learner_pb2.LearnerConfig.TREE_PER_CLASS) + else: + learner_config.multi_class_strategy = ( + learner_pb2.LearnerConfig.DIAGONAL_HESSIAN) + + if (learner_config.growing_mode == + learner_pb2.LearnerConfig.GROWING_MODE_UNSPECIFIED): + learner_config.growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER + + if (learner_config.pruning_mode == + learner_pb2.LearnerConfig.PRUNING_MODE_UNSPECIFIED): + learner_config.pruning_mode = learner_pb2.LearnerConfig.POST_PRUNE + + if learner_config.constraints.max_tree_depth == 0: + # Use 6 as the default maximum depth. + learner_config.constraints.max_tree_depth = 6 + + tuner = learner_config.learning_rate_tuner.WhichOneof("tuner") + if not tuner: + learner_config.learning_rate_tuner.fixed.learning_rate = 0.1 + self._learner_config = learner_config self._feature_columns = feature_columns self._learner_config_serialized = learner_config.SerializeToString() @@ -378,75 +407,81 @@ class GradientBoostedDecisionTreeModel(object): local_stamp), _refresh_local_ensemble_fn, lambda: (control_flow_ops.no_op(), ensemble_stamp)) - # Once updated, Use the the local model for prediction. + # Once updated, use the local model for prediction. with ops.control_dependencies([refresh_local_ensemble]): ensemble_stats = training_ops.tree_ensemble_stats( local_ensemble_handle, ensemble_stamp) - apply_dropout, seed = _dropout_params(mode, ensemble_stats) # We don't need dropout info - we can always restore it based on the # seed. - predictions, predictions_no_dropout, _ = ( - prediction_ops.gradient_trees_prediction( - local_ensemble_handle, - seed, - self._dense_floats, - self._sparse_float_indices, - self._sparse_float_values, - self._sparse_float_shapes, - self._sparse_int_indices, - self._sparse_int_values, - self._sparse_int_shapes, - learner_config=self._learner_config_serialized, - apply_dropout=apply_dropout, - apply_averaging=apply_averaging, - use_locking=False, - center_bias=self._center_bias, - reduce_dim=self._reduce_dim)) - partition_ids = prediction_ops.gradient_trees_partition_examples( - local_ensemble_handle, - self._dense_floats, - self._sparse_float_indices, - self._sparse_float_values, - self._sparse_float_shapes, - self._sparse_int_indices, - self._sparse_int_values, - self._sparse_int_shapes, - use_locking=False) + apply_dropout, seed = _dropout_params(mode, ensemble_stats) + # Make sure ensemble stats run. This will check that the ensemble has + # the right stamp. + with ops.control_dependencies(ensemble_stats): + predictions, predictions_no_dropout, _ = ( + prediction_ops.gradient_trees_prediction( + local_ensemble_handle, + seed, + self._dense_floats, + self._sparse_float_indices, + self._sparse_float_values, + self._sparse_float_shapes, + self._sparse_int_indices, + self._sparse_int_values, + self._sparse_int_shapes, + learner_config=self._learner_config_serialized, + apply_dropout=apply_dropout, + apply_averaging=apply_averaging, + use_locking=True, + center_bias=self._center_bias, + reduce_dim=self._reduce_dim)) + partition_ids = prediction_ops.gradient_trees_partition_examples( + local_ensemble_handle, + self._dense_floats, + self._sparse_float_indices, + self._sparse_float_values, + self._sparse_float_shapes, + self._sparse_int_indices, + self._sparse_int_values, + self._sparse_int_shapes, + use_locking=True) else: with ops.device(self._ensemble_handle.device): ensemble_stats = training_ops.tree_ensemble_stats( self._ensemble_handle, ensemble_stamp) - apply_dropout, seed = _dropout_params(mode, ensemble_stats) # We don't need dropout info - we can always restore it based on the # seed. - predictions, predictions_no_dropout, _ = ( - prediction_ops.gradient_trees_prediction( - self._ensemble_handle, - seed, - self._dense_floats, - self._sparse_float_indices, - self._sparse_float_values, - self._sparse_float_shapes, - self._sparse_int_indices, - self._sparse_int_values, - self._sparse_int_shapes, - learner_config=self._learner_config_serialized, - apply_dropout=apply_dropout, - apply_averaging=apply_averaging, - use_locking=False, - center_bias=self._center_bias, - reduce_dim=self._reduce_dim)) - partition_ids = prediction_ops.gradient_trees_partition_examples( - self._ensemble_handle, - self._dense_floats, - self._sparse_float_indices, - self._sparse_float_values, - self._sparse_float_shapes, - self._sparse_int_indices, - self._sparse_int_values, - self._sparse_int_shapes, - use_locking=False) + apply_dropout, seed = _dropout_params(mode, ensemble_stats) + # Make sure ensemble stats run. This will check that the ensemble has + # the right stamp. + with ops.control_dependencies(ensemble_stats): + predictions, predictions_no_dropout, _ = ( + prediction_ops.gradient_trees_prediction( + self._ensemble_handle, + seed, + self._dense_floats, + self._sparse_float_indices, + self._sparse_float_values, + self._sparse_float_shapes, + self._sparse_int_indices, + self._sparse_int_values, + self._sparse_int_shapes, + learner_config=self._learner_config_serialized, + apply_dropout=apply_dropout, + apply_averaging=apply_averaging, + use_locking=True, + center_bias=self._center_bias, + reduce_dim=self._reduce_dim)) + partition_ids = prediction_ops.gradient_trees_partition_examples( + self._ensemble_handle, + self._dense_floats, + self._sparse_float_indices, + self._sparse_float_values, + self._sparse_float_shapes, + self._sparse_int_indices, + self._sparse_int_values, + self._sparse_int_shapes, + use_locking=True) return _make_predictions_dict(ensemble_stamp, predictions, predictions_no_dropout, partition_ids, diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py index 9ce434edf8b..16e24d97dde 100644 --- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py +++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py @@ -164,7 +164,7 @@ class GbdtTest(test_util.TensorFlowTestCase): ensemble_handle=ensemble_handle, examples_per_layer=1, learner_config=learner_config, - features=features) + logits_dimension=1, features=features) predictions = array_ops.constant( [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) @@ -268,7 +268,7 @@ class GbdtTest(test_util.TensorFlowTestCase): ensemble_handle=ensemble_handle, examples_per_layer=num_examples_fn, learner_config=learner_config, - features=features) + logits_dimension=1, features=features) predictions = array_ops.constant( [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) @@ -371,7 +371,7 @@ class GbdtTest(test_util.TensorFlowTestCase): ensemble_handle=ensemble_handle, examples_per_layer=1, learner_config=learner_config, - features=features) + logits_dimension=1, features=features) predictions = array_ops.constant( [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) @@ -442,7 +442,7 @@ class GbdtTest(test_util.TensorFlowTestCase): ensemble_handle=ensemble_handle, examples_per_layer=1, learner_config=learner_config, - features=features) + logits_dimension=1, features=features) predictions = array_ops.constant( [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) @@ -505,7 +505,7 @@ class GbdtTest(test_util.TensorFlowTestCase): ensemble_handle=ensemble_handle, examples_per_layer=1, learner_config=learner_config, - features=features) + logits_dimension=1, features=features) predictions = array_ops.constant( [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) @@ -588,7 +588,7 @@ class GbdtTest(test_util.TensorFlowTestCase): ensemble_handle=ensemble_handle, examples_per_layer=1, learner_config=learner_config, - features=features) + logits_dimension=1, features=features) # Create predict op. mode = model_fn.ModeKeys.EVAL @@ -627,7 +627,7 @@ class GbdtTest(test_util.TensorFlowTestCase): ensemble_handle=ensemble_handle, examples_per_layer=1, learner_config=learner_config, - features=features) + logits_dimension=5, features=features) predictions = array_ops.constant( [[0.0, -1.0, 0.5, 1.2, 3.1], [1.0, 0.0, 0.8, 0.3, 1.0], @@ -730,7 +730,7 @@ class GbdtTest(test_util.TensorFlowTestCase): ensemble_handle=ensemble_handle, examples_per_layer=1, learner_config=learner_config, - features=features) + logits_dimension=5, features=features) predictions = array_ops.constant( [[0.0, -1.0, 0.5, 1.2, 3.1], [1.0, 0.0, 0.8, 0.3, 1.0], @@ -833,7 +833,7 @@ class GbdtTest(test_util.TensorFlowTestCase): ensemble_handle=ensemble_handle, examples_per_layer=1, learner_config=learner_config, - features=features) + logits_dimension=5, features=features) batch_size = 3 predictions = array_ops.constant( diff --git a/tensorflow/contrib/cmake/external/cub.cmake b/tensorflow/contrib/cmake/external/cub.cmake index 477572d5881..d98579d2077 100644 --- a/tensorflow/contrib/cmake/external/cub.cmake +++ b/tensorflow/contrib/cmake/external/cub.cmake @@ -14,8 +14,8 @@ # ============================================================================== include (ExternalProject) -set(cub_URL http://mirror.bazel.build/github.com/NVlabs/cub/archive/69ceda618313df8e9cac6659d607b08949455d14.tar.gz) -set(cub_HASH SHA256=87e856522c283b8ea887c3b61d7d5b252d2dd74abac4f1d756d776e721223e82) +set(cub_URL http://mirror.bazel.build/github.com/NVlabs/cub/archive/1.7.3.zip) +set(cub_HASH SHA256=b7ead9e291d34ffa8074243541c1380d63be63f88de23de8ee548db573b72ebe) set(cub_BUILD ${CMAKE_CURRENT_BINARY_DIR}/cub/src/cub) set(cub_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/cub/src/cub) set(cub_ARCHIVE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/cub_archive) diff --git a/tensorflow/contrib/cmake/tf_c.cmake b/tensorflow/contrib/cmake/tf_c.cmake index 87d946c3462..c5a10181271 100644 --- a/tensorflow/contrib/cmake/tf_c.cmake +++ b/tensorflow/contrib/cmake/tf_c.cmake @@ -18,6 +18,7 @@ set(tf_c_srcs "${tensorflow_source_dir}/tensorflow/c/c_api.cc" "${tensorflow_source_dir}/tensorflow/c/c_api.h" + "${tensorflow_source_dir}/tensorflow/c/c_api_function.cc" "${tensorflow_source_dir}/tensorflow/c/eager/c_api.cc" "${tensorflow_source_dir}/tensorflow/c/eager/c_api.h" "${tensorflow_source_dir}/tensorflow/c/eager/runtime.cc" diff --git a/tensorflow/contrib/cmake/tf_python.cmake b/tensorflow/contrib/cmake/tf_python.cmake index 48023099379..1b706159a3d 100755 --- a/tensorflow/contrib/cmake/tf_python.cmake +++ b/tensorflow/contrib/cmake/tf_python.cmake @@ -315,6 +315,7 @@ add_python_module("tensorflow/contrib/framework/ops") add_python_module("tensorflow/contrib/framework/python") add_python_module("tensorflow/contrib/framework/python/framework") add_python_module("tensorflow/contrib/framework/python/ops") +add_python_module("tensorflow/contrib/gan") add_python_module("tensorflow/contrib/graph_editor") add_python_module("tensorflow/contrib/graph_editor/examples") add_python_module("tensorflow/contrib/graph_editor/tests") diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake index 76531add312..15850bf0a4e 100644 --- a/tensorflow/contrib/cmake/tf_tests.cmake +++ b/tensorflow/contrib/cmake/tf_tests.cmake @@ -291,6 +291,8 @@ if (tensorflow_BUILD_PYTHON_TESTS) # Failing with TF 1.3 (TODO) "${tensorflow_source_dir}/tensorflow/contrib/distributions/python/kernel_tests/estimator_test.py" "${tensorflow_source_dir}/tensorflow/contrib/distributions/python/kernel_tests/bijectors/sinh_arcsinh_test.py" + # Test should only be run manually + "${tensorflow_source_dir}/tensorflow/python/kernel_tests/reduction_ops_test_big.py" ) endif() list(REMOVE_ITEM tf_test_src_py ${tf_test_src_py_exclude}) diff --git a/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py b/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py index 694bd507d97..bc4fd10cac6 100644 --- a/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py +++ b/tensorflow/contrib/cudnn_rnn/python/ops/cudnn_rnn_ops.py @@ -716,6 +716,482 @@ _cudnn_rnn_common_doc_string = """ """ +def _check_direction(direction): + if direction not in (CUDNN_RNN_UNIDIRECTION, CUDNN_RNN_BIDIRECTION): + raise ValueError("Invalid direction: %s, expect %s or %s" % + (direction, CUDNN_RNN_UNIDIRECTION, CUDNN_RNN_BIDIRECTION)) + + +def _check_rnn_mode(rnn_mode): + if rnn_mode not in (CUDNN_LSTM, CUDNN_GRU, CUDNN_RNN_TANH, CUDNN_RNN_RELU): + raise ValueError("Invalid rnn_mode: %s, expect one of (%s, %s, %s, %s)" % + (rnn_mode, CUDNN_LSTM, CUDNN_GRU, CUDNN_RNN_TANH, + CUDNN_RNN_RELU)) + + +def _get_seed(seed): + seed, seed2 = random_seed.get_seed(seed) + if seed is None and seed2 is None: + seed, seed2 = 0, 0 + return seed, seed2 + + +def _get_num_params(rnn_mode, num_layers, direction): + """Return num params for given Cudnn config.""" + if rnn_mode == CUDNN_LSTM: + num_params_per_layer = 8 + elif rnn_mode == CUDNN_GRU: + num_params_per_layer = 6 + elif rnn_mode in (CUDNN_RNN_RELU, CUDNN_RNN_TANH): + num_params_per_layer = 2 + else: + raise ValueError("Invalid \'rnn_mode\': %s", rnn_mode) + num_params = num_layers * num_params_per_layer + if direction != CUDNN_RNN_UNIDIRECTION: + num_params *= 2 + return num_params + + +def _cudnn_rnn(inputs, + input_h, + input_c, + params, + is_training, + rnn_mode, + input_mode=CUDNN_INPUT_LINEAR_MODE, + direction=CUDNN_RNN_UNIDIRECTION, + dropout=0., + seed=0, + name=None): + """Cudnn RNN. + + Args: + inputs: the input sequence to the RNN model. A Tensor of shape [?, + batch_size, input_size]. + input_h: the initial hidden state for h. A Tensor of shape [num_layers, + batch_size, num_units]. + input_c: the initial hidden state for c. This is only relevant for LSTM. + A Tensor of the same shape as input_h. + params: the parameter buffer created for this model. + is_training: whether this operation will be used in training or inference + rnn_mode: one of ('lstm', 'gru', 'rnn_relu', 'rnn_tanh'). + input_mode: indicate whether there is a linear projection between the + input and the actual computation before the first layer. It could be + 'linear_input', 'skip_input' or 'auto_select'. + 'linear_input' (default) always applies a linear projection of input + onto RNN hidden state. (standard RNN behavior). + 'skip_input' is only allowed when input_size == num_units; + 'auto_select' implies 'skip_input' when input_size == num_units; + otherwise, it implies 'linear_input'. + direction: the direction model that the model operates. Could be either + 'unidirectional' or 'bidirectional' + dropout: whether to enable dropout. With it is 0, dropout is disabled. + seed: the op seed used for initializing dropout. See @{tf.set_random_seed} + for behavior. + name: name of the operation. + Returns: + outputs, output_h, output_c + """ + _check_rnn_mode(rnn_mode) + _check_direction(direction) + seed, seed2 = random_seed.get_seed(seed) + outputs, output_h, output_c, _ = gen_cudnn_rnn_ops.cudnn_rnn( + input=inputs, + input_h=input_h, + input_c=input_c, + params=params, + is_training=is_training, + rnn_mode=rnn_mode, + input_mode=input_mode, + direction=direction, + dropout=dropout, + seed=seed, + seed2=seed2, + name=name) + return (outputs, output_h, output_c) + + +def cudnn_lstm(inputs, + input_h, + input_c, + params, + is_training, + input_mode=CUDNN_INPUT_LINEAR_MODE, + direction=CUDNN_RNN_UNIDIRECTION, + dropout=0., + seed=0, + name=None): + """Cudnn LSTM. + + Args: + inputs: the input sequence to the RNN model. A Tensor of shape [?, + batch_size, input_size]. + input_h: the initial hidden state for h. A Tensor of shape [num_layers, + batch_size, num_units]. + input_c: the initial hidden state for c. This is only relevant for LSTM. + A Tensor of the same shape as input_h. + params: the parameter buffer created for this model. + is_training: whether this operation will be used in training or inference + input_mode: indicate whether there is a linear projection between the + input and the actual computation before the first layer. It could be + 'linear_input', 'skip_input' or 'auto_select'. + 'linear_input' (default) always applies a linear projection of input + onto RNN hidden state. (standard RNN behavior). + 'skip_input' is only allowed when input_size == num_units; + 'auto_select' implies 'skip_input' when input_size == num_units; + otherwise, it implies 'linear_input'. + direction: the direction model that the model operates. Could be either + 'unidirectional' or 'bidirectional' + dropout: whether to enable dropout. With it is 0, dropout is disabled. + seed: the op seed used for initializing dropout. See @{tf.set_random_seed} + for behavior. + name: name of the operation. + Returns: + outputs, output_h, output_c + """ + return _cudnn_rnn(inputs, input_h, input_c, params, is_training, CUDNN_LSTM, + input_mode, direction, dropout, seed, name) + + +def _cudnn_rnn_no_input_c(inputs, + input_h, + params, + is_training, + rnn_mode, + input_mode=CUDNN_INPUT_LINEAR_MODE, + direction=CUDNN_RNN_UNIDIRECTION, + dropout=0., + seed=0, + name=None): + """Cudnn RNN w/o input_c. + + Args: + inputs: the input sequence to the RNN model. A Tensor of shape [?, + batch_size, input_size]. + input_h: the initial hidden state for h. A Tensor of shape [num_layers, + batch_size, num_units]. + params: the parameter buffer created for this model. + is_training: whether this operation will be used in training or inference + rnn_mode: one of ('lstm', 'gru', 'rnn_relu', 'rnn_tanh'). + input_mode: indicate whether there is a linear projection between the + input and the actual computation before the first layer. It could be + 'linear_input', 'skip_input' or 'auto_select'. + 'linear_input' (default) always applies a linear projection of input + onto RNN hidden state. (standard RNN behavior). + 'skip_input' is only allowed when input_size == num_units; + 'auto_select' implies 'skip_input' when input_size == num_units; + otherwise, it implies 'linear_input'. + direction: the direction model that the model operates. Could be either + 'unidirectional' or 'bidirectional' + dropout: whether to enable dropout. With it is 0, dropout is disabled. + seed: the op seed used for initializing dropout. See @{tf.set_random_seed} + for behavior. + name: name of the operation. + Returns: + outputs, output_h + """ + input_c = array_ops.constant([], dtype=input_h.dtype) + outputs, output_h, _ = _cudnn_rnn(inputs, input_h, input_c, params, + is_training, rnn_mode, input_mode, + direction, dropout, seed, name) + return outputs, output_h + + +def cudnn_gru(inputs, + input_h, + params, + is_training, + input_mode=CUDNN_INPUT_LINEAR_MODE, + direction=CUDNN_RNN_UNIDIRECTION, + dropout=0., + seed=0, + name=None): + """Cudnn GRU. + + Args: + inputs: the input sequence to the RNN model. A Tensor of shape [?, + batch_size, input_size]. + input_h: the initial hidden state for h. A Tensor of shape [num_layers, + batch_size, num_units]. + params: the parameter buffer created for this model. + is_training: whether this operation will be used in training or inference + input_mode: indicate whether there is a linear projection between the + input and the actual computation before the first layer. It could be + 'linear_input', 'skip_input' or 'auto_select'. + 'linear_input' (default) always applies a linear projection of input + onto RNN hidden state. (standard RNN behavior). + 'skip_input' is only allowed when input_size == num_units; + 'auto_select' implies 'skip_input' when input_size == num_units; + otherwise, it implies 'linear_input'. + direction: the direction model that the model operates. Could be either + 'unidirectional' or 'bidirectional' + dropout: whether to enable dropout. With it is 0, dropout is disabled. + seed: the op seed used for initializing dropout. See @{tf.set_random_seed} + for behavior. + name: name of the operation. + Returns: + outputs, output_h + """ + return _cudnn_rnn_no_input_c(inputs, input_h, params, is_training, CUDNN_GRU, + input_mode, direction, dropout, seed, name) + + +def cudnn_rnn_relu(inputs, + input_h, + params, + is_training, + input_mode=CUDNN_INPUT_LINEAR_MODE, + direction=CUDNN_RNN_UNIDIRECTION, + dropout=0., + seed=0, + name=None): + """Cudnn RNN Relu. + + Args: + inputs: the input sequence to the RNN model. A Tensor of shape [?, + batch_size, input_size]. + input_h: the initial hidden state for h. A Tensor of shape [num_layers, + batch_size, num_units]. + params: the parameter buffer created for this model. + is_training: whether this operation will be used in training or inference + input_mode: indicate whether there is a linear projection between the + input and the actual computation before the first layer. It could be + 'linear_input', 'skip_input' or 'auto_select'. + 'linear_input' (default) always applies a linear projection of input + onto RNN hidden state. (standard RNN behavior). + 'skip_input' is only allowed when input_size == num_units; + 'auto_select' implies 'skip_input' when input_size == num_units; + otherwise, it implies 'linear_input'. + direction: the direction model that the model operates. Could be either + 'unidirectional' or 'bidirectional' + dropout: whether to enable dropout. With it is 0, dropout is disabled. + seed: the op seed used for initializing dropout. See @{tf.set_random_seed} + for behavior. + name: name of the operation. + Returns: + outputs, output_h + """ + return _cudnn_rnn_no_input_c(inputs, input_h, params, is_training, + CUDNN_RNN_RELU, input_mode, direction, dropout, + seed, name) + + +def cudnn_rnn_tanh(inputs, + input_h, + params, + is_training, + input_mode=CUDNN_INPUT_LINEAR_MODE, + direction=CUDNN_RNN_UNIDIRECTION, + dropout=0., + seed=0, + name=None): + """Cudnn RNN Tanh. + + Args: + inputs: the input sequence to the RNN model. A Tensor of shape [?, + batch_size, input_size]. + input_h: the initial hidden state for h. A Tensor of shape [num_layers, + batch_size, num_units]. + params: the parameter buffer created for this model. + is_training: whether this operation will be used in training or inference + input_mode: indicate whether there is a linear projection between the + input and the actual computation before the first layer. It could be + 'linear_input', 'skip_input' or 'auto_select'. + 'linear_input' (default) always applies a linear projection of input + onto RNN hidden state. (standard RNN behavior). + 'skip_input' is only allowed when input_size == num_units; + 'auto_select' implies 'skip_input' when input_size == num_units; + otherwise, it implies 'linear_input'. + direction: the direction model that the model operates. Could be either + 'unidirectional' or 'bidirectional' + dropout: whether to enable dropout. With it is 0, dropout is disabled. + seed: the op seed used for initializing dropout. See @{tf.set_random_seed} + for behavior. + name: name of the operation. + Returns: + outputs, output_h + """ + return _cudnn_rnn_no_input_c(inputs, input_h, params, is_training, + CUDNN_RNN_TANH, input_mode, direction, dropout, + seed, name) + + +def cudnn_rnn_params_to_canonical(rnn_mode, + num_layers, + num_units, + input_size, + params, + input_mode=CUDNN_INPUT_LINEAR_MODE, + direction=CUDNN_RNN_UNIDIRECTION, + dropout=0, + seed=0, + name=None): + """Convert cudnn opaque params to canonical. + + Args: + rnn_mode: a string specifies the mode, under which this RNN model runs. + Could be either 'lstm', 'gru', 'rnn_tanh' or 'rnn_relu'. + num_layers: the number of layers for the RNN model. + num_units: the number of units within the RNN model. + input_size: the size of the input, it could be different from the + num_units. + params: opaque cudnn params var. + input_mode: indicate whether there is a linear projection between the + input and the actual computation before the first layer. It could be + 'linear_input', 'skip_input' or 'auto_select'. + 'linear_input' (default) always applies a linear projection of input + onto RNN hidden state. (standard RNN behavior). + 'skip_input' is only allowed when input_size == num_units; + 'auto_select' implies 'skip_input' when input_size == num_units; + otherwise, it implies 'linear_input'. + direction: the direction model that the model operates. Could be either + 'unidirectional' or 'bidirectional' + dropout: whether to enable dropout. With it is 0, dropout is disabled. + seed: the op seed used for initializing dropout. See @{tf.set_random_seed} + for behavior. + name: name of the operation. + Returns: + weights list and bias list + Raises: + ValueError: if rnn_mode or direction is invalid. + """ + + _check_rnn_mode(rnn_mode) + _check_direction(direction) + num_params = _get_num_params(rnn_mode, num_layers, direction) + seed, seed2 = random_seed.get_seed(seed) + weights, biases = gen_cudnn_rnn_ops.cudnn_rnn_params_to_canonical( + rnn_mode=rnn_mode, + num_layers=num_layers, + num_units=num_units, + input_size=input_size, + params=params, + input_mode=input_mode, + direction=direction, + dropout=dropout, + seed=seed, + seed2=seed2, + num_params=num_params, + name=name) + return weights, biases + + +def cudnn_rnn_canonical_to_params(rnn_mode, + num_layers, + num_units, + input_size, + weights, + biases, + input_mode=CUDNN_INPUT_LINEAR_MODE, + direction=CUDNN_RNN_UNIDIRECTION, + dropout=0, + seed=0, + name=None): + """Converts params from the canonical format to a specific format of cuDNN. + + Args: + rnn_mode: a string specifies the mode, under which this RNN model runs. + Could be either 'lstm', 'gru', 'rnn_tanh' or 'rnn_relu'. + num_layers: the number of layers for the RNN model. + num_units: the number of units within the RNN model. + input_size: the size of the input, it could be different from the + num_units. + weights: a Tensor for weight parameters. + biases: a Tensor for bias parameters. + input_mode: indicate whether there is a linear projection between the + input and the actual computation before the first layer. It could be + 'linear_input', 'skip_input' or 'auto_select'. + 'linear_input' (default) always applies a linear projection of input + onto RNN hidden state. (standard RNN behavior). + 'skip_input' is only allowed when input_size == num_units; + 'auto_select' implies 'skip_input' when input_size == num_units; + otherwise, it implies 'linear_input'. + direction: the direction model that the model operates. Could be either + 'unidirectional' or 'bidirectional' + dropout: whether to enable dropout. With it is 0, dropout is disabled. + seed: the op seed used for initializing dropout. See @{tf.set_random_seed} + for behavior. + name: name of the operation. + Returns: + an opaque Cudnn param. + Raises: + ValueError: if rnn_mode or direction is invalid. + """ + _check_rnn_mode(rnn_mode) + _check_direction(direction) + seed, seed2 = random_seed.get_seed(seed) + return gen_cudnn_rnn_ops.cudnn_rnn_canonical_to_params( + rnn_mode=rnn_mode, + num_layers=num_layers, + num_units=num_units, + input_size=input_size, + weights=weights, + biases=biases, + input_mode=input_mode, + direction=direction, + dropout=dropout, + seed=seed, + seed2=seed2, + name=name) + + +def cudnn_opaque_params_size(rnn_mode, + num_layers, + num_units, + input_size, + input_mode=CUDNN_INPUT_LINEAR_MODE, + direction=CUDNN_RNN_UNIDIRECTION, + dtype=dtypes.float32, + dropout=0, + seed=0, + name=None): + """Returns opaque params size for specific Cudnn config. + + Args: + rnn_mode: a string specifies the mode, under which this RNN model runs. + Could be either 'lstm', 'gru', 'rnn_tanh' or 'rnn_relu'. + num_layers: the number of layers for the RNN model. + num_units: the number of units within the RNN model. + input_size: the size of the input, it could be different from the + num_units. + input_mode: indicate whether there is a linear projection between the + input and the actual computation before the first layer. It could be + 'linear_input', 'skip_input' or 'auto_select'. + 'linear_input' (default) always applies a linear projection of input + onto RNN hidden state. (standard RNN behavior). + 'skip_input' is only allowed when input_size == num_units; + 'auto_select' implies 'skip_input' when input_size == num_units; + otherwise, it implies 'linear_input'. + direction: the direction model that the model operates. Could be either + 'unidirectional' or 'bidirectional' + dtype: one of tf.float32 or tf.float64. + dropout: whether to enable dropout. With it is 0, dropout is disabled. + seed: the op seed used for initializing dropout. See @{tf.set_random_seed} + for behavior. + name: name of the operation. + Returns: + a int, size of Cudnn opaque params. + Raises: + ValueError: if rnn_mode or direction is invalid. + """ + _check_rnn_mode(rnn_mode) + _check_direction(direction) + seed, seed2 = random_seed.get_seed(seed) + return gen_cudnn_rnn_ops.cudnn_rnn_params_size( + rnn_mode=rnn_mode, + num_layers=num_layers, + num_units=num_units, + input_size=input_size, + T=dtype, + S=dtypes.int32, + dropout=dropout, + seed=seed, + seed2=seed2, + input_mode=input_mode, + direction=direction, + name=name)[0] + + class _CudnnRNN(object): """Creates an RNN model using the underlying Cudnn implementation. @@ -761,9 +1237,6 @@ class _CudnnRNN(object): Raises: ValueError: if direction is invalid. """ - if direction not in (CUDNN_RNN_UNIDIRECTION, CUDNN_RNN_BIDIRECTION): - raise ValueError("Invalid direction: %s, expect %s or %s", - direction, CUDNN_RNN_UNIDIRECTION, CUDNN_RNN_BIDIRECTION) self._num_layers = num_layers self._num_units = num_units self._input_size = input_size @@ -772,10 +1245,7 @@ class _CudnnRNN(object): self._direction = direction self._dtype = dtype self._dropout = dropout - # get graph and op seed. - self._seed, self._seed2 = random_seed.get_seed(seed) - if self._seed is None and self._seed2 is None: - self._seed, self._seed2 = 0, 0 + self._seed = seed @property def input_mode(self): @@ -807,18 +1277,16 @@ class _CudnnRNN(object): Returns: The calculated parameter buffer size. """ - return gen_cudnn_rnn_ops.cudnn_rnn_params_size( + return cudnn_opaque_params_size( + rnn_mode=self._rnn_mode, num_layers=self._num_layers, num_units=self._num_units, input_size=self._input_size, - T=self._dtype, - S=dtypes.int32, + dtype=self._dtype, dropout=self._dropout, seed=self._seed, - seed2=self._seed2, - rnn_mode=self._rnn_mode, input_mode=self._input_mode, - direction=self._direction)[0] + direction=self._direction) def __call__(self, input_data, input_h, input_c, params, is_training=True): """Runs the forward step for the RNN model. @@ -837,22 +1305,17 @@ class _CudnnRNN(object): output_h: the final state for h. output_c: the final state for c. This is only relevant for LSTM. """ - if self._rnn_mode != CUDNN_LSTM: - # For model that doesn't take input_c, replace with a dummy tensor. - input_c = array_ops.constant([], dtype=self._dtype) - output, output_h, output_c, _ = gen_cudnn_rnn_ops.cudnn_rnn( - input=input_data, - input_h=input_h, - input_c=input_c, - params=params, - rnn_mode=self._rnn_mode, + return _cudnn_rnn( + input_data, + input_h, + input_c, + params, + is_training, + self._rnn_mode, input_mode=self._input_mode, direction=self._direction, dropout=self._dropout, - seed=self._seed, - seed2=self._seed2, - is_training=is_training) - return (output, output_h, output_c) + seed=self._seed) def params_to_canonical(self, params): """Converts params from a specific format of cuDNN to the canonical format. @@ -863,22 +1326,16 @@ class _CudnnRNN(object): Returns: A function for the specific-to-canonical conversion. """ - num_params = self._num_layers * self._NUM_PARAMS_PER_LAYER - if self._direction != CUDNN_RNN_UNIDIRECTION: - num_params *= 2 - weights, biases = gen_cudnn_rnn_ops.cudnn_rnn_params_to_canonical( + return cudnn_rnn_params_to_canonical( + rnn_mode=self._rnn_mode, num_layers=self._num_layers, num_units=self._num_units, input_size=self._input_size, params=params, - dropout=self._dropout, - seed=self._seed, - seed2=self._seed2, - num_params=num_params, - rnn_mode=self._rnn_mode, input_mode=self._input_mode, - direction=self._direction) - return weights, biases + direction=self._direction, + dropout=self._dropout, + seed=self._seed) def canonical_to_params(self, weights, biases): """Converts params from the canonical format to a specific format of cuDNN. @@ -890,18 +1347,17 @@ class _CudnnRNN(object): Returns: A function for the canonical-to-params-to-specific conversion.. """ - return gen_cudnn_rnn_ops.cudnn_rnn_canonical_to_params( + return cudnn_rnn_canonical_to_params( + rnn_mode=self._rnn_mode, num_layers=self._num_layers, num_units=self._num_units, input_size=self._input_size, weights=weights, biases=biases, - dropout=self._dropout, - seed=self._seed, - seed2=self._seed2, - rnn_mode=self._rnn_mode, input_mode=self._input_mode, - direction=self._direction) + direction=self._direction, + dropout=self._dropout, + seed=self._seed) class CudnnLSTM(_CudnnRNN): @@ -1036,9 +1492,16 @@ class _CudnnRNNNoInputC(_CudnnRNN): output: the output sequuence. output_h: the final state for h. """ - output, output_h, _ = super(_CudnnRNNNoInputC, self).__call__( - input_data, input_h, None, params, is_training=is_training) - return (output, output_h) + return _cudnn_rnn_no_input_c( + input_data, + input_h, + params, + is_training, + self._rnn_mode, + input_mode=self._input_mode, + direction=self._direction, + dropout=self._dropout, + seed=self._seed) class CudnnGRU(_CudnnRNNNoInputC): diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py index 5308ab64ace..1c0a5288f7e 100644 --- a/tensorflow/contrib/data/__init__.py +++ b/tensorflow/contrib/data/__init__.py @@ -22,6 +22,7 @@ @@read_batch_features @@rejection_resample +@@group_by_window """ from __future__ import absolute_import @@ -31,6 +32,7 @@ from __future__ import print_function # pylint: disable=unused-import from tensorflow.contrib.data.python.ops.dataset_ops import Dataset from tensorflow.contrib.data.python.ops.dataset_ops import FixedLengthRecordDataset +from tensorflow.contrib.data.python.ops.dataset_ops import group_by_window from tensorflow.contrib.data.python.ops.dataset_ops import Iterator from tensorflow.contrib.data.python.ops.dataset_ops import read_batch_features from tensorflow.contrib.data.python.ops.dataset_ops import rejection_resample diff --git a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py b/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py index 71df1ee0a50..0111aae1035 100644 --- a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py @@ -37,7 +37,9 @@ class GroupByWindowTest(test.TestCase): components = np.random.randint(100, size=(200,)).astype(np.int64) iterator = dataset_ops.Iterator.from_dataset( dataset_ops.Dataset.from_tensor_slices(components).map(lambda x: x * x) - .group_by_window(lambda x: x % 2, lambda _, xs: xs.batch(4), 4)) + .apply( + dataset_ops.group_by_window, + args=(lambda x: x % 2, lambda _, xs: xs.batch(4), 4))) init_op = iterator.initializer get_next = iterator.get_next() @@ -61,8 +63,9 @@ class GroupByWindowTest(test.TestCase): components = np.array( [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 0, 0, 2, 2, 0, 0], dtype=np.int64) iterator = dataset_ops.Iterator.from_dataset( - dataset_ops.Dataset.from_tensor_slices(components).repeat(-1) - .group_by_window(lambda x: x % 3, lambda _, xs: xs.batch(4), 4)) + dataset_ops.Dataset.from_tensor_slices(components).repeat(-1).apply( + dataset_ops.group_by_window, + args=(lambda x: x % 3, lambda _, xs: xs.batch(4), 4))) init_op = iterator.initializer get_next = iterator.get_next() @@ -81,8 +84,9 @@ class GroupByWindowTest(test.TestCase): def testSmallGroups(self): components = np.array([0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0], dtype=np.int64) iterator = dataset_ops.Iterator.from_dataset( - dataset_ops.Dataset.from_tensor_slices(components) - .group_by_window(lambda x: x % 2, lambda _, xs: xs.batch(4), 4)) + dataset_ops.Dataset.from_tensor_slices(components).apply( + dataset_ops.group_by_window, + args=(lambda x: x % 2, lambda _, xs: xs.batch(4), 4))) init_op = iterator.initializer get_next = iterator.get_next() @@ -108,8 +112,9 @@ class GroupByWindowTest(test.TestCase): iterator = dataset_ops.Iterator.from_dataset( dataset_ops.Dataset.from_tensor_slices(components) - .map(lambda x: (x, ops.convert_to_tensor([x * x]))) - .group_by_window(lambda x, _: x % 2, reduce_func, 32)) + .map(lambda x: (x, ops.convert_to_tensor([x * x]))).apply( + dataset_ops.group_by_window, + args=(lambda x, _: x % 2, reduce_func, 32))) init_op = iterator.initializer get_next = iterator.get_next() @@ -124,17 +129,20 @@ class GroupByWindowTest(test.TestCase): def reduce_func(key, window): # Apply two different kinds of padding to the input: tight # padding, and quantized (to a multiple of 10) padding. - return dataset_ops.Dataset.zip((window.padded_batch( - 4, - padded_shapes=tensor_shape.TensorShape([None])), window.padded_batch( + return dataset_ops.Dataset.zip(( + window.padded_batch( + 4, padded_shapes=tensor_shape.TensorShape([None])), + window.padded_batch( 4, padded_shapes=ops.convert_to_tensor([(key + 1) * 10])),)) iterator = dataset_ops.Iterator.from_dataset( dataset_ops.Dataset.from_tensor_slices(components) .map(lambda x: array_ops.fill([math_ops.cast(x, dtypes.int32)], x)) - .group_by_window( - lambda x: math_ops.cast(array_ops.shape(x)[0] // 10, dtypes.int64), - reduce_func, 4)) + .apply( + dataset_ops.group_by_window, + args= + (lambda x: math_ops.cast(array_ops.shape(x)[0] // 10, dtypes.int64), + reduce_func, 4))) init_op = iterator.initializer get_next = iterator.get_next() @@ -151,10 +159,9 @@ class GroupByWindowTest(test.TestCase): self.assertEqual(len(components), sum(counts)) -# NOTE(mrry): These tests are based on the tests in -# bucket_ops_test.py. Currently, different batch sizes for each key -# are not supported, although this would be possible to add to -# `Dataset.group_by_window()`. +# NOTE(mrry): These tests are based on the tests in bucket_ops_test.py. +# Currently, they use a constant batch size, though should be made to use a +# different batch size per key. class BucketTest(test.TestCase): def _dynamicPad(self, bucket, window, window_size): @@ -168,6 +175,7 @@ class BucketTest(test.TestCase): tensor_shape.TensorShape([3]))))) def testSingleBucket(self): + def _map_fn(v): return (v, array_ops.fill([v], v), array_ops.fill([3], string_ops.as_string(v))) @@ -175,9 +183,10 @@ class BucketTest(test.TestCase): input_dataset = ( dataset_ops.Dataset.from_tensor_slices(math_ops.range(32)).map(_map_fn)) - bucketed_dataset = input_dataset.group_by_window( - lambda x, y, z: 0, lambda k, bucket: self._dynamicPad(k, bucket, 32), - 32) + bucketed_dataset = input_dataset.apply( + dataset_ops.group_by_window, + args=(lambda x, y, z: 0, + lambda k, bucket: self._dynamicPad(k, bucket, 32), 32)) iterator = dataset_ops.Iterator.from_dataset(bucketed_dataset) init_op = iterator.initializer @@ -201,6 +210,7 @@ class BucketTest(test.TestCase): self.assertAllEqual(expected_vec3_str, bucketed_values[2]) def testEvenOddBuckets(self): + def _map_fn(v): return (v, array_ops.fill([v], v), array_ops.fill([3], string_ops.as_string(v))) @@ -208,9 +218,10 @@ class BucketTest(test.TestCase): input_dataset = ( dataset_ops.Dataset.from_tensor_slices(math_ops.range(64)).map(_map_fn)) - bucketed_dataset = input_dataset.group_by_window( - lambda x, y, z: math_ops.cast(x % 2, dtypes.int64), - lambda k, bucket: self._dynamicPad(k, bucket, 32), 32) + bucketed_dataset = input_dataset.apply( + dataset_ops.group_by_window, + args=(lambda x, y, z: math_ops.cast(x % 2, dtypes.int64), + lambda k, bucket: self._dynamicPad(k, bucket, 32), 32)) iterator = dataset_ops.Iterator.from_dataset(bucketed_dataset) init_op = iterator.initializer @@ -256,25 +267,31 @@ class BucketTest(test.TestCase): self.assertAllEqual(expected_vec3_str, bucketed_values_odd[2]) def testEvenOddBucketsFilterOutAllOdd(self): + def _map_fn(v): - return {"x": v, - "y": array_ops.fill([v], v), - "z": array_ops.fill([3], string_ops.as_string(v))} + return { + "x": v, + "y": array_ops.fill([v], v), + "z": array_ops.fill([3], string_ops.as_string(v)) + } def _dynamic_pad_fn(bucket, window, _): return dataset_ops.Dataset.zip( (dataset_ops.Dataset.from_tensors(bucket), window.padded_batch( - 32, {"x": tensor_shape.TensorShape([]), - "y": tensor_shape.TensorShape([None]), - "z": tensor_shape.TensorShape([3])}))) + 32, { + "x": tensor_shape.TensorShape([]), + "y": tensor_shape.TensorShape([None]), + "z": tensor_shape.TensorShape([3]) + }))) input_dataset = ( dataset_ops.Dataset.from_tensor_slices(math_ops.range(128)).map(_map_fn) .filter(lambda d: math_ops.equal(d["x"] % 2, 0))) - bucketed_dataset = input_dataset.group_by_window( - lambda d: math_ops.cast(d["x"] % 2, dtypes.int64), - lambda k, bucket: _dynamic_pad_fn(k, bucket, 32), 32) + bucketed_dataset = input_dataset.apply( + dataset_ops.group_by_window, + args=(lambda d: math_ops.cast(d["x"] % 2, dtypes.int64), + lambda k, bucket: _dynamic_pad_fn(k, bucket, 32), 32)) iterator = dataset_ops.Iterator.from_dataset(bucketed_dataset) init_op = iterator.initializer @@ -295,6 +312,40 @@ class BucketTest(test.TestCase): self.assertAllEqual( np.arange(64, 128, 2, dtype=np.int64), bucketed_values_even1["x"]) + def testDynamicWindowSize(self): + components = np.arange(100).astype(np.int64) + + # Key fn: even/odd + # Reduce fn: batches of 5 + # Window size fn: even=5, odd=10 + + def window_size_func(key): + window_sizes = constant_op.constant([5, 10], dtype=dtypes.int64) + return window_sizes[key] + + dataset = dataset_ops.Dataset.from_tensor_slices(components).apply( + dataset_ops.group_by_window, + args=(lambda x: x % 2, lambda _, xs: xs.batch(20), None, + window_size_func)) + iterator = dataset_ops.Iterator.from_dataset(dataset) + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + with self.assertRaises(errors.OutOfRangeError): + batches = 0 + while True: + result = sess.run(get_next) + is_even = all(x % 2 == 0 for x in result) + is_odd = all(x % 2 == 1 for x in result) + self.assertTrue(is_even or is_odd) + expected_batch_size = 5 if is_even else 10 + self.assertEqual(expected_batch_size, result.shape[0]) + batches += 1 + + self.assertEqual(batches, 15) + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/data/python/ops/dataset_ops.py b/tensorflow/contrib/data/python/ops/dataset_ops.py index 20e564b8b77..0ee9acfc97f 100644 --- a/tensorflow/contrib/data/python/ops/dataset_ops.py +++ b/tensorflow/contrib/data/python/ops/dataset_ops.py @@ -1199,28 +1199,9 @@ class Dataset(object): return DenseToSparseBatchDataset(self, batch_size, row_shape) def group_by_window(self, key_func, reduce_func, window_size): - """Performs a windowed "group-by" operation on this dataset. - - This method maps each consecutive element in this dataset to a key - using `key_func` and groups the elements by key. It then applies - `reduce_func` to at most `window_size` elements matching the same - key. All execpt the final window for each key will contain - `window_size` elements; the final window may be smaller. - - Args: - key_func: A function mapping a nested structure of tensors - (having shapes and types defined by `self.output_shapes` and - `self.output_types`) to a scalar `tf.int64` tensor. - reduce_func: A function mapping a key and a dataset of up to `batch_size` - consecutive elements matching that key to another dataset. - window_size: A `tf.int64` scalar `tf.Tensor`, representing the number of - consecutive elements matching the same key to combine in a single - batch, which will be passed to `reduce_func`. - - Returns: - A `Dataset`. - """ - return GroupByWindowDataset(self, key_func, reduce_func, window_size) + """See group_by_window().""" + return self.apply( + group_by_window, args=(key_func, reduce_func, window_size)) def map(self, map_func, @@ -1370,6 +1351,43 @@ class Dataset(object): """ return FilterDataset(self, predicate) + def apply(self, fn, args=(), kwargs={}): # pylint: disable=dangerous-default-value + """Apply a function to this dataset. + + `apply` enables chaining of custom `Dataset` transformations. + + For example: + + ``` + dataset.map( + lambda x: x**2 + ).apply( + group_by_window, args=(key_func, reduce_func, window_size) + ).map( + lambda x: x**3 + ) + ``` + + Args: + fn: A function that takes a `Dataset`, `args`, and `kwargs`, and + returns a `Dataset`. + args: A `tuple` or `list` of arguments to be passed to `fn`. + kwargs: A `dict` of keyword arguments to be passed to `fn`. + + Returns: + The `Dataset` returned by `fn`. + """ + if not (isinstance(args, tuple) or isinstance(args, list)): + raise TypeError("args must be a tuple or list.") + if not isinstance(kwargs, dict): + raise TypeError("kwargs must be a dict.") + + dataset = fn(self, *args, **kwargs) + + if not isinstance(dataset, Dataset): + raise TypeError("fn must return a Dataset.") + return dataset + class TensorDataset(Dataset): """A `Dataset` with a single element, viz. a nested structure of tensors.""" @@ -1927,71 +1945,6 @@ class _ResourceDataset(Dataset): return self._output_types -class GroupByWindowDataset(Dataset): - """A `Dataset` that groups its input and performs a windowed reduction.""" - - def __init__(self, input_dataset, key_func, reduce_func, window_size): - """See `Dataset.group_by_window()` for details.""" - super(GroupByWindowDataset, self).__init__() - self._input_dataset = input_dataset - self._window_size = window_size - - @function.Defun(*nest.flatten(input_dataset.output_types)) - def tf_key_func(*args): - """A wrapper for Defun that facilitates shape inference.""" - # Pass in shape information from the input_dataset. - for arg, shape in zip(args, nest.flatten(input_dataset.output_shapes)): - arg.set_shape(shape) - nested_args = nest.pack_sequence_as(input_dataset.output_types, args) - if _should_unpack_args(nested_args): - ret = key_func(*nested_args) - else: - ret = key_func(nested_args) - ret = ops.convert_to_tensor(ret, dtype=dtypes.int64) - if ret.dtype != dtypes.int64: - raise ValueError("`key_func` must return a single tf.int64 tensor.") - return ret - - self._key_func = tf_key_func - self._key_func.add_to_graph(ops.get_default_graph()) - - @function.Defun(dtypes.int64, dtypes.resource) - def tf_reduce_func(key, window_dataset_resource): - """A wrapper for Defun that facilitates shape inference.""" - key.set_shape([]) - window_dataset = _ResourceDataset(window_dataset_resource, - input_dataset.output_types, - input_dataset.output_shapes) - output_dataset = reduce_func(key, window_dataset) - if not isinstance(output_dataset, Dataset): - raise TypeError("`reduce_func` must return a `Dataset` object.") - self._output_types = output_dataset.output_types - self._output_shapes = output_dataset.output_shapes - return output_dataset.make_dataset_resource() - - self._reduce_func = tf_reduce_func - self._reduce_func.add_to_graph(ops.get_default_graph()) - - def make_dataset_resource(self): - return gen_dataset_ops.group_by_window_dataset( - self._input_dataset.make_dataset_resource(), - self._key_func.captured_inputs, - self._reduce_func.captured_inputs, - self._window_size, - key_func=self._key_func, - reduce_func=self._reduce_func, - output_types=nest.flatten(self.output_types), - output_shapes=nest.flatten(self.output_shapes)) - - @property - def output_shapes(self): - return self._output_shapes - - @property - def output_types(self): - return self._output_types - - class MapDataset(Dataset): """A `Dataset` that maps a function over elements in its input.""" @@ -2660,3 +2613,149 @@ def _get_file_names(file_pattern, randomize_input): if not randomize_input: file_names = sorted(file_names) return file_names + + +class GroupByWindowDataset(Dataset): + """A `Dataset` that groups its input and performs a windowed reduction.""" + + def __init__(self, input_dataset, key_func, reduce_func, window_size_func): + """See `group_by_window()` for details.""" + super(GroupByWindowDataset, self).__init__() + + self._input_dataset = input_dataset + + self._make_key_func(key_func, input_dataset) + self._make_reduce_func(reduce_func, input_dataset) + self._make_window_size_func(window_size_func) + + def _make_window_size_func(self, window_size_func): + """Make wrapping Defun for window_size_func.""" + + @function.Defun(dtypes.int64) + def tf_window_size_func(key): + key.set_shape([]) + window_size = ops.convert_to_tensor( + window_size_func(key), dtype=dtypes.int64) + if window_size.dtype != dtypes.int64: + raise ValueError( + "`window_size_func` must return a single tf.int64 tensor.") + return window_size + + self._window_size_func = tf_window_size_func + self._window_size_func.add_to_graph(ops.get_default_graph()) + + def _make_key_func(self, key_func, input_dataset): + """Make wrapping Defun for key_func.""" + + @function.Defun(*nest.flatten(input_dataset.output_types)) + def tf_key_func(*args): + """A wrapper for Defun that facilitates shape inference.""" + # Pass in shape information from the input_dataset. + for arg, shape in zip(args, nest.flatten(input_dataset.output_shapes)): + arg.set_shape(shape) + nested_args = nest.pack_sequence_as(input_dataset.output_types, args) + if _should_unpack_args(nested_args): + ret = key_func(*nested_args) + else: + ret = key_func(nested_args) + ret = ops.convert_to_tensor(ret, dtype=dtypes.int64) + if ret.dtype != dtypes.int64: + raise ValueError("`key_func` must return a single tf.int64 tensor.") + return ret + + self._key_func = tf_key_func + self._key_func.add_to_graph(ops.get_default_graph()) + + def _make_reduce_func(self, reduce_func, input_dataset): + """Make wrapping Defun for reduce_func.""" + + @function.Defun(dtypes.int64, dtypes.resource) + def tf_reduce_func(key, window_dataset_resource): + """A wrapper for Defun that facilitates shape inference.""" + key.set_shape([]) + window_dataset = _ResourceDataset(window_dataset_resource, + input_dataset.output_types, + input_dataset.output_shapes) + output_dataset = reduce_func(key, window_dataset) + if not isinstance(output_dataset, Dataset): + raise TypeError("`reduce_func` must return a `Dataset` object.") + self._output_types = output_dataset.output_types + self._output_shapes = output_dataset.output_shapes + return output_dataset.make_dataset_resource() + + self._reduce_func = tf_reduce_func + self._reduce_func.add_to_graph(ops.get_default_graph()) + + @property + def output_shapes(self): + return self._output_shapes + + @property + def output_types(self): + return self._output_types + + def make_dataset_resource(self): + return gen_dataset_ops.group_by_window_dataset( + self._input_dataset.make_dataset_resource(), + self._key_func.captured_inputs, + self._reduce_func.captured_inputs, + self._window_size_func.captured_inputs, + key_func=self._key_func, + reduce_func=self._reduce_func, + window_size_func=self._window_size_func, + output_types=nest.flatten(self.output_types), + output_shapes=nest.flatten(self.output_shapes)) + + +def group_by_window(dataset, + key_func, + reduce_func, + window_size=None, + window_size_func=None): + """Performs a windowed "group-by" operation on this dataset. + + This method maps each consecutive element in this dataset to a key + using `key_func` and groups the elements by key. It then applies + `reduce_func` to at most `window_size_func(key)` elements matching the same + key. All execpt the final window for each key will contain + `window_size_func(key)` elements; the final window may be smaller. + + You may provide either a constant `window_size` or a window size determined by + the key through `window_size_func`. + + Args: + dataset: A `Dataset`. + key_func: A function mapping a nested structure of tensors + (having shapes and types defined by `self.output_shapes` and + `self.output_types`) to a scalar `tf.int64` tensor. + reduce_func: A function mapping a key and a dataset of up to `batch_size` + consecutive elements matching that key to another dataset. + window_size: A `tf.int64` scalar `tf.Tensor`, representing the number of + consecutive elements matching the same key to combine in a single + batch, which will be passed to `reduce_func`. Mutually exclusive with + `window_size_func`. + window_size_func: A function mapping a key to a `tf.int64` scalar + `tf.Tensor`, representing the number of consecutive elements matching + the same key to combine in a single batch, which will be passed to + `reduce_func`. Mutually exclusive with `window_size`. + + Returns: + A `Dataset`. + + Raises: + ValueError: if neither or both of {`window_size`, `window_size_func`} are + passed. + """ + if (window_size is not None and window_size_func or + not (window_size is not None or window_size_func)): + raise ValueError("Must pass either window_size or window_size_func.") + + if window_size is not None: + + def constant_window_func(unused_key): + return ops.convert_to_tensor(window_size, dtype=dtypes.int64) + + window_size_func = constant_window_func + + assert window_size_func is not None + return GroupByWindowDataset(dataset, key_func, reduce_func, window_size_func) diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index c78b064b4fd..c2b99d67c7f 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -341,7 +341,7 @@ cuda_py_test( cuda_py_test( name = "sample_stats_test", - size = "small", + size = "medium", srcs = ["python/kernel_tests/sample_stats_test.py"], additional_deps = [ ":distributions_py", diff --git a/tensorflow/contrib/framework/python/ops/checkpoint_ops.py b/tensorflow/contrib/framework/python/ops/checkpoint_ops.py index 848e26ab966..26146790b65 100644 --- a/tensorflow/contrib/framework/python/ops/checkpoint_ops.py +++ b/tensorflow/contrib/framework/python/ops/checkpoint_ops.py @@ -17,440 +17,16 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import math - from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gen_checkpoint_ops from tensorflow.python.ops import init_ops -from tensorflow.python.ops import math_ops - -ops.NotDifferentiable("GenerateVocabRemapping") -ops.NotDifferentiable("LoadAndRemapMatrix") +from tensorflow.python.training import checkpoint_ops -def _load_and_remap_matrix(ckpt_path, - old_tensor_name, - new_row_vocab_offset, - num_rows_to_load, - new_col_vocab_size, - initializer, - old_row_vocab_file=None, - new_row_vocab_file=None, - old_col_vocab_file=None, - new_col_vocab_file=None, - num_row_oov_buckets=0, - num_col_oov_buckets=0, - max_rows_in_memory=-1): - """Loads a 2-D (matrix) `Tensor` from checkpoint. - - Generates 1D-remappings for rows and columns using the - `GenerateVocabRemapping` op, and initializes any anticipated values with the - provided initializer. Then, uses the `LoadAndRemapMatrix` op to create a - matrix that loads existing values from the checkpoint, while filling out - "missing" values with the newly initialized values. See - contrib/framework/ops/checkpoint_ops.cc for more information on the wrapped - functionality (LoadAndRemapMatrix). This wrapper can be used to perform only - row remapping or only col remapping. If only row remapping is desired, - {new,old}_col_vocab_file should be `None`, and vice versa for column - remapping. - - NOTE: This only supports div-partitioning the vocabulary on the 1st dimension - (row axis) via `new_row_vocab_offset`. - - Args: - ckpt_path: Path to the TensorFlow checkpoint (version 2, `TensorBundle`) - from which the old matrix `Tensor` will be loaded. - old_tensor_name: Name of the 2-D `Tensor` to load from checkpoint. - new_row_vocab_offset: A 0-indexed integer representing what line to - start reading at in the new row vocabulary. Used for partitioned - variables. - num_rows_to_load: Number of rows to load for the new vocabulary (note: to - support variable partitioning and partial loading, this does not need to - be the same as the number of entries in `new_row_vocab_file`). - new_col_vocab_size: Number of columns to load - should be the same as the - number of entries in `new_col_vocab_file`, since we don't support - partitioning along the column axis. - initializer: Callable initializer function that accepts a 1-D tensor as the - arg to specify the shape of the returned tensor. Used to initialize - missing values. - old_row_vocab_file: A scalar `Tensor` of type `string` containing the - path to the old row vocabulary file. Can be None, which represents no - remapping on the row axis. - new_row_vocab_file: A scalar `Tensor` of type `string` containing the path - to the new row vocabulary file. Can be None, which represents no remapping - on the row axis - in which case, `new_row_vocab_offset` and - `num_rows_to_load` work under the assumption that the new row vocab is the - same as the old row vocab. - old_col_vocab_file: A scalar `Tensor` of type `string` containing the - path to the old column vocabulary file. Can be None, which represents no - remapping on the column axis. - new_col_vocab_file: A scalar `Tensor` of type `string` containing the path - to the new column vocabulary file. Can be None, which represents no - remapping on the column axis - in which case, `new_col_vocab_size` works - under the assumption that the new col vocab is the same as the old col - vocab. - num_row_oov_buckets: `int` specifying the number of out-of-vocabulary rows - to append. Must be >= 0. - num_col_oov_buckets: `int` specifying the number of out-of-vocabulary - columns to append. Must be >= 0. - max_rows_in_memory: `int` specifying the maximum number of rows to load from - the checkpoint at once. If less than or equal to 0, the entire matrix will - be loaded into memory. Setting this arg trades increased disk reads for - lower memory usage. - - Returns: - A Tensor of shape `[num_rows_to_load + num_row_oov_buckets, - new_col_vocab_size + num_col_oov_buckets]`, with values loaded from the - specified tensor in the checkpoint, and any missing or OOV values - initialized with the given `initializer`. - - Raises: - ValueError: If `num_row_oov_buckets` or `num_col_oov_buckets` < 0. - ValueError: If either `old_row_vocab_file` or `new_row_vocab_file` is - provided, while the other is not. Same for `old_col_vocab_file` and - `new_col_vocab_file`. - ValueError: If neither row vocabs or col vocabs are provided. - """ - if num_row_oov_buckets < 0: - raise ValueError("num_row_oov_buckets must be >= 0, but received %d" % - num_row_oov_buckets) - if num_col_oov_buckets < 0: - raise ValueError("num_col_oov_buckets must be >= 0, but received %d" % - num_col_oov_buckets) - - if bool(old_row_vocab_file) != bool(new_row_vocab_file): - raise ValueError( - "old_row_vocab_file and new_row_vocab_file must both be specified or " - "left unspecified. old_row_vocab_file='{}', new_row_vocab_file='{}'". - format(old_row_vocab_file, new_row_vocab_file)) - if bool(old_col_vocab_file) != bool(new_col_vocab_file): - raise ValueError( - "old_col_vocab_file and new_col_vocab_file must both be specified or " - "left unspecified. old_col_vocab_file='{}', new_col_vocab_file='{}'". - format(old_col_vocab_file, new_col_vocab_file)) - - remap_rows = new_row_vocab_file and old_row_vocab_file - remap_cols = new_col_vocab_file and old_col_vocab_file - if not (remap_rows or remap_cols): - raise ValueError( - "Must provide either row or column vocab files. If no remapping is " - "necessary, consider using `tf.contrib.framework.init_from_checkpoint` " - "instead.") - - num_rows_present = num_rows_to_load - if remap_rows: - row_remapping, num_rows_present = ( - gen_checkpoint_ops._generate_vocab_remapping( # pylint: disable=protected-access - new_vocab_file=new_row_vocab_file, - old_vocab_file=old_row_vocab_file, - new_vocab_offset=new_row_vocab_offset, - num_new_vocab=num_rows_to_load)) - else: - # Even when the rows are not being reordered, we still need to generate a - # remapping to account for initializing partitioned Variables (when - # new_row_vocab_offset is non-zero). - row_remapping = math_ops.range( - new_row_vocab_offset, - new_row_vocab_offset + num_rows_to_load, - dtype=dtypes.int64) - - col_remapping = [] - num_cols_present = new_col_vocab_size - if remap_cols: - col_remapping, num_cols_present = ( - gen_checkpoint_ops._generate_vocab_remapping( # pylint: disable=protected-access - new_vocab_file=new_col_vocab_file, - old_vocab_file=old_col_vocab_file, - new_vocab_offset=0, # Offset is unused for cols (no partitioning). - num_new_vocab=new_col_vocab_size)) - - init_vals = initializer([ - num_rows_to_load * new_col_vocab_size - - num_rows_present * num_cols_present, 1 - ]) - return_tensor = gen_checkpoint_ops._load_and_remap_matrix( # pylint: disable=protected-access - ckpt_path=ckpt_path, - old_tensor_name=old_tensor_name, - row_remapping=row_remapping, - col_remapping=col_remapping, - initializing_values=init_vals, - num_rows=num_rows_to_load, - num_cols=new_col_vocab_size, - max_rows_in_memory=max_rows_in_memory) - - # Add OOV row(s) and column(s). - if num_row_oov_buckets > 0: - init_row_oov_val = initializer([num_row_oov_buckets, new_col_vocab_size]) - init_row_oov_val = ops.convert_to_tensor(init_row_oov_val) - return_tensor = array_ops.concat([return_tensor, init_row_oov_val], 0) - if num_col_oov_buckets > 0: - # We need to add any row OOV to the new column shape. - init_col_oov_val = initializer( - [num_rows_to_load + num_row_oov_buckets, num_col_oov_buckets]) - init_col_oov_val = ops.convert_to_tensor(init_col_oov_val) - return_tensor = array_ops.concat([return_tensor, init_col_oov_val], 1) - - return return_tensor - - -def load_and_remap_matrix_initializer(ckpt_path, - old_tensor_name, - new_row_vocab_size, - new_col_vocab_size, - old_row_vocab_file=None, - new_row_vocab_file=None, - old_col_vocab_file=None, - new_col_vocab_file=None, - num_row_oov_buckets=0, - num_col_oov_buckets=0, - initializer=None, - max_rows_in_memory=-1): - r"""Returns a var initializer for loading and remapping a 2-D (matrix) tensor. - - The returned initializer loads a 2-D (matrix) `Tensor` with name - `old_tensor_name` from the checkpoint at `ckpt_path`. It will reorder the - rows/columns according to the specified vocab files and append additional - out-of-vocabulary rows/columns according to the number of OOV buckets. - - The format of the file at the `{old,new}_{row,col}_vocab_file` path should be - a text file, with each line containing a single entity within the vocabulary. - Let the function `line_of(f, "x")` return the 0-indexed line number of the - entity "x" in file f, and the function `entity_at(f, i)` return the entity at - line i of file f. Then, row i of the new output matrix will be taken from row - `line_of(old_row_vocab_file, entity_at(new_row_vocab_file, i))` of the old - matrix. If any entity in `new_row_vocab_file` is not found in - `old_row_vocab_file`, that row is considered a "missing" row, and its values - will be initialized using the `initializer` arg. The same logic also applies - for the columns. - - For example, assuming that: - - * `old_row_vocab_file` contains "mercury\nvenus\nmars" - * `new_row_vocab_file` contains "venus\njupiter\nmercury" - * `old_col_vocab_file` contains "good\nbetter\nbest" - * `new_col_vocab_file` contains "good\nbest\nfantastic" - * `initializer` returns the natural numbers `[1, 2, 3, 4, ...]` - * `w(i, j)` represents the value from row i, column j of the old matrix - - Then the new output matrix will look like: - - `[[w(1, 0), w(1, 2), 1], - [2, 3, 4], - [w(0, 0), w(0, 2), 5]]` - - If we further specify that: - - * `num_row_oov_buckets` == 2 - * `num_col_oov_buckets` == 1 - - Then the new output matrix will look like: - - `[[w(1, 0), w(1, 2), 1, 12], - [2, 3, 4, 13], - [w(0, 0), w(0, 2), 5, 14], - [6, 7, 8, 15], - [9, 10, 11, 16]]` - - If `{old,new}_row_vocab_file` are None, we assume that the old and new row - vocab files are the same, and no row remapping is done. If - `{old,new}_col_vocab_file` are None, we assume that the old and new column - vocab files are the same, and no column remapping is done. - - The returned initializer only supports div-partitioning along the row axis. It - does not support partitioning along the column axis or mod-partitioning. - - NOTE: When this is used to warm-start variables, client code should use - `tf.lookup.index_table_from_tensor()` like - contrib/layers/python/layers/feature_column.py does, as opposed to - `tf.feature_to_id()` - in order to ensure the underlying lookup tables are the - same. - - Args: - ckpt_path: Path to the TensorFlow checkpoint (version 2, `TensorBundle`) - from which the old matrix `Tensor` will be loaded. - old_tensor_name: Name of the 2-D `Tensor` to load from checkpoint. - new_row_vocab_size: `int` specifying the number of entries in - `new_row_vocab_file`. If no row remapping is needed (no row vocab - provided), this should be equal to the number of rows to load from the old - matrix (which can theoretically be smaller than the number of rows in the - old matrix). - new_col_vocab_size: `int` specifying the number of entries in - `new_col_vocab_file`. If no column remapping is needed (no column vocab - provided), this should be equal to the number of columns in the old - matrix. - old_row_vocab_file: A scalar `Tensor` of type `string` containing the - path to the old row vocabulary file. Can be None, which represents no - remapping on the row axis. - new_row_vocab_file: A scalar `Tensor` of type `string` containing the path - to the new row vocabulary file. Can be None, which represents no remapping - on the row axis. - old_col_vocab_file: A scalar `Tensor` of type `string` containing the - path to the old column vocabulary file. Can be None, which represents no - remapping on the column axis. - new_col_vocab_file: A scalar `Tensor` of type `string` containing the path - to the new column vocabulary file. Can be None, which represents no - remapping on the column axis. - num_row_oov_buckets: `int` specifying the number of out-of-vocabulary rows - to append. Must be >= 0. - num_col_oov_buckets: `int` specifying the number of out-of-vocabulary - columns to append. Must be >= 0. - initializer: Initializer function to initialize missing values. Accepts a - 1-D tensor as the arg to specify the shape of the returned tensor. If - `None`, defaults to using `zeros_initializer()`. - max_rows_in_memory: `int` specifying the maximum number of rows to load from - the checkpoint at once. If less than or equal to 0, the entire matrix will - be loaded into memory. Setting this arg trades increased disk reads for - lower memory usage. - - Returns: - A variable initializer function that should be used to initialize a - (potentially partitioned) `Variable` whose complete shape is - `[new_row_vocab_size + num_row_oov_buckets, new_col_vocab_size + - num_col_oov_buckets]`. - - Raises: - TypeError: If `initializer` is specified but not callable. - """ - if initializer is None: - # TODO(b/25671353): Consider using sqrt(6/(fan_in + fan_out)) instead, from - # Glorot and Bengio, 2010. - initializer = init_ops.zeros_initializer() - - if not callable(initializer): - raise TypeError( - "initializer must be callable, instead of being {} of type {}.".format( - initializer, type(initializer))) - - def _initializer(shape, dtype=dtypes.float32, partition_info=None): - """Variable initializer. - - Args: - shape: Shape of `Tensor` to return. Should include OOV on both axes. - dtype: Must be float32. - partition_info: variable_scope._PartitionInfo. - - Returns: - `Tensor` of shape `shape`. - - Raises: - TypeError: If `dtype` is anything other than float32. - ValueError: For shape mismatch upon invocation. - """ - # Sanity checks. - if dtype != dtypes.float32: - raise TypeError( - "Currently, only float32 is supported. Received dtype: {}".format( - dtype)) - if len(shape) != 2: - raise ValueError("Expected 2-dim shape, but received: {}".format(shape)) - if shape[0] <= 0: - raise ValueError( - "Expected 1st dim of shape to be > 0, but received shape: {}".format( - shape)) - if shape[1] != (new_col_vocab_size + num_col_oov_buckets): - raise ValueError( - "Expected 2nd dim of shape to be new_col_vocab_size ({}) + " - "num_col_oov_buckets ({}) = {}, but received shape: {}".format( - new_col_vocab_size, num_col_oov_buckets, - new_col_vocab_size + num_col_oov_buckets, shape)) - - offset = 0 - if partition_info is not None: - offset = partition_info.single_offset(shape) - - if offset + shape[0] > new_row_vocab_size + num_row_oov_buckets: - raise ValueError( - "Trying to initialize {} additional rows after {} rows have already " - "been initialized, which would exceed expected total row count of " - "new_row_vocab_size ({}) + num_row_oov_buckets ({}) = {}.".format( - shape[0], offset, new_row_vocab_size, num_row_oov_buckets, - new_row_vocab_size + num_row_oov_buckets)) - - row_oov_buckets_to_use = min(shape[0], - max(0, offset + shape[0] - new_row_vocab_size)) - num_rows_to_load = shape[0] - row_oov_buckets_to_use - - return _load_and_remap_matrix( - ckpt_path=ckpt_path, - old_tensor_name=old_tensor_name, - new_row_vocab_offset=offset, - num_rows_to_load=num_rows_to_load, - new_col_vocab_size=new_col_vocab_size, - initializer=initializer, - old_row_vocab_file=old_row_vocab_file, - new_row_vocab_file=new_row_vocab_file, - old_col_vocab_file=old_col_vocab_file, - new_col_vocab_file=new_col_vocab_file, - num_row_oov_buckets=row_oov_buckets_to_use, - num_col_oov_buckets=num_col_oov_buckets, - max_rows_in_memory=max_rows_in_memory) - - return _initializer - - -def load_embedding_initializer(ckpt_path, - embedding_tensor_name, - new_vocab_size, - embedding_dim, - old_vocab_file, - new_vocab_file, - num_oov_buckets=0, - initializer=None, - max_rows_in_memory=-1): - """Returns a variable initializer for loading pre-trained embeddings. - - Wrapper around `load_and_remap_matrix_initializer()` specialized for loading - embedding weights and remapping according to the provided vocab files. See - docs for `load_and_remap_matrix_initializer()` for more details. - - NOTE: Only for use with div-partitioned variables / vocabularies. - - Args: - ckpt_path: Path to the TensorFlow checkpoint (version 2, `TensorBundle`) - from which the old matrix `Tensor` will be loaded. - embedding_tensor_name: Name of the 2-D `Tensor` to load from checkpoint. - new_vocab_size: Number of entries in the new vocab. - embedding_dim: `int` specifying the dimension of the embedding vectors from - the checkpoint. Must match the number of columns in the old embedding - matrix. - old_vocab_file: A scalar `Tensor` of type `string` containing the - path to the old vocabulary file. - new_vocab_file: A scalar `Tensor` of type `string` containing the - path to the new vocabulary file. - num_oov_buckets: `int` specifying the number of out-of-vocabulary - buckets to use. Must be >= 0. - initializer: Initializer function that accepts a 1-D tensor as the arg to - specify the shape of the returned tensor. If `None`, defaults to using - `truncated_normal_initializer()`. - max_rows_in_memory: `int` specifying the maximum number of rows to load from - the checkpoint at once. If less than or equal to 0, the entire matrix will - be loaded into memory. Setting this arg trades increased disk reads for - lower memory usage. - - Returns: - A variable initializer function. - """ - if initializer is None: - # TODO(b/25671353): This should be kept in sync with the stddev used by - # feature_column.py's _EmbeddingColumn. - initializer = init_ops.truncated_normal_initializer( - stddev=1.0 / math.sqrt(embedding_dim)) - - return load_and_remap_matrix_initializer( - ckpt_path=ckpt_path, - old_tensor_name=embedding_tensor_name, - new_row_vocab_size=new_vocab_size, - new_col_vocab_size=embedding_dim, - old_row_vocab_file=old_vocab_file, - new_row_vocab_file=new_vocab_file, - old_col_vocab_file=None, - new_col_vocab_file=None, - num_row_oov_buckets=num_oov_buckets, - num_col_oov_buckets=0, - initializer=initializer, - max_rows_in_memory=max_rows_in_memory) +# pylint: disable=protected-access,line-too-long +load_and_remap_matrix_initializer = checkpoint_ops._load_and_remap_matrix_initializer +# pylint: enable=line-too-long +load_embedding_initializer = checkpoint_ops._load_embedding_initializer +# pylint: enable=protected-access def load_linear_multiclass_bias_initializer(ckpt_path, diff --git a/tensorflow/contrib/framework/python/ops/checkpoint_ops_test.py b/tensorflow/contrib/framework/python/ops/checkpoint_ops_test.py index a11d373244d..b7b9f5c59e1 100644 --- a/tensorflow/contrib/framework/python/ops/checkpoint_ops_test.py +++ b/tensorflow/contrib/framework/python/ops/checkpoint_ops_test.py @@ -21,7 +21,6 @@ import os import numpy as np from tensorflow.contrib import framework as contrib_framework -from tensorflow.contrib.framework.python.ops import checkpoint_ops from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -38,250 +37,6 @@ FLAGS = flags.FLAGS _TESTDATA_PATH = 'contrib/framework/testdata' -class LoadAndRemapWrappersTest(test.TestCase): - """Tests for the functionality of the Python wrappers.""" - - def setUp(self): - self.bundle_file = os.path.join( - test.test_src_dir_path(_TESTDATA_PATH), 'bundle_checkpoint') - self.new_feature_vocab_file = os.path.join( - test.test_src_dir_path(_TESTDATA_PATH), 'bundle_checkpoint_vocab.txt') - self.old_feature_vocab_file = os.path.join( - test.test_src_dir_path(_TESTDATA_PATH), - 'bundle_checkpoint_vocab_with_oov.txt') - self.new_class_vocab_file = os.path.join( - test.test_src_dir_path(_TESTDATA_PATH), 'keyword_new.txt') - self.old_class_vocab_file = os.path.join( - test.test_src_dir_path(_TESTDATA_PATH), 'keyword.txt') - self.init_val = 42 - - def _init_val_initializer(shape, dtype=None, partition_info=None): - del dtype, partition_info # Unused by this unit-testing initializer. - return array_ops.tile( - constant_op.constant([[self.init_val]], dtype=dtypes.float32), shape) - - self.initializer = _init_val_initializer - - def test_load_and_remap_matrix(self): - """Tests the end-to-end loading / remapping of weights.""" - # _load_and_remap_matrix() is the generalized wrapper that takes in row and - # column vocabulary files, calls the relevant remappings, and returns the - # weight matrix. Take this example to be linear multi-class by providing - # both row and column vocabularies. - remapped_matrix = checkpoint_ops._load_and_remap_matrix( - new_row_vocab_file=self.new_feature_vocab_file, - old_row_vocab_file=self.old_feature_vocab_file, - num_rows_to_load=4, - new_col_vocab_file=self.new_class_vocab_file, - old_col_vocab_file=self.old_class_vocab_file, - new_col_vocab_size=4, - old_tensor_name='some_scope/embeddings', - ckpt_path=[self.bundle_file], - new_row_vocab_offset=1, - initializer=self.initializer, - num_row_oov_buckets=1, - num_col_oov_buckets=1) - - # [4 in vocab + 1 oov features, 4 in vocab + 1 oov classes]. The offset - # means we read - expected_remapped_matrix = np.concatenate( - [ - np.reshape([18, 34, 50, self.init_val, self.init_val], [5, 1]), - np.reshape([16, 32, 48, self.init_val, self.init_val], [5, 1]), - np.reshape([self.init_val] * 5, [5, 1]), - np.reshape([17, 33, 49, self.init_val, self.init_val], [5, 1]), - np.reshape([self.init_val] * 5, [5, 1]) - ], - axis=1) - - with self.test_session(): - self.assertAllClose(expected_remapped_matrix, remapped_matrix.eval()) - - def test_load_and_remap_output_layer_weight_initializer_linear(self): - """Tests for the output layer initializer in the linear multi-class case.""" - loading_initializer = (contrib_framework.load_and_remap_matrix_initializer( - new_row_vocab_size=5, - new_col_vocab_file=self.new_class_vocab_file, - old_col_vocab_file=self.old_class_vocab_file, - new_col_vocab_size=4, - old_tensor_name='some_scope/embeddings', - ckpt_path=[self.bundle_file], - new_row_vocab_file=self.new_feature_vocab_file, - old_row_vocab_file=self.old_feature_vocab_file, - num_row_oov_buckets=1, - num_col_oov_buckets=1, - initializer=self.initializer)) - - expected_remapped_matrix = np.concatenate( - [ - np.reshape([2, 18, 34, 50, self.init_val, self.init_val], [6, 1]), - np.reshape([0, 16, 32, 48, self.init_val, self.init_val], [6, 1]), - np.reshape([self.init_val] * 6, [6, 1]), - np.reshape([1, 17, 33, 49, self.init_val, self.init_val], [6, 1]), - np.reshape([self.init_val] * 6, [6, 1]) - ], - axis=1) - - # The new weight matrix is of size - # [5 feature vocab + 1 feature OOV, 4 class vocab + 1 class OOV]. Use a - # partitioned variable to confirm that the offset logic works. - remapped_matrix = variable_scope.get_variable( - name='linear/obtained_weight_matrix', - shape=[6, 5], - initializer=loading_initializer, - partitioner=partitioned_variables.fixed_size_partitioner(2)) - - with self.test_session(): - variables.global_variables_initializer().run() - self.assertAllClose(expected_remapped_matrix, - remapped_matrix.as_tensor().eval()) - - def test_load_and_remap_output_layer_weight_initializer_dnn_output(self): - """Tests for the output layer initializer in the DNN output case.""" - loading_initializer = (contrib_framework.load_and_remap_matrix_initializer( - new_row_vocab_size=5, - new_col_vocab_file=self.new_class_vocab_file, - old_col_vocab_file=self.old_class_vocab_file, - new_col_vocab_size=4, - old_tensor_name='some_scope/embeddings', - ckpt_path=[self.bundle_file], - num_col_oov_buckets=1, - initializer=self.initializer)) - - expected_remapped_matrix = np.concatenate( - [ - np.reshape([2, 18, 34, 50, 66], [5, 1]), - np.reshape([0, 16, 32, 48, 64], [5, 1]), - np.reshape([self.init_val] * 5, [5, 1]), - np.reshape([1, 17, 33, 49, 65], [5, 1]), - np.reshape([self.init_val] * 5, [5, 1]) - ], - axis=1) - - # The new weight matrix is of size - # [5-sized input layer, 4 class vocab + 1 class OOV]. - remapped_matrix = variable_scope.get_variable( - name='dnn_output/obtained_weight_matrix', - shape=[5, 5], - initializer=loading_initializer, - partitioner=partitioned_variables.fixed_size_partitioner(2)) - - with self.test_session(): - variables.global_variables_initializer().run() - self.assertAllClose(expected_remapped_matrix, - remapped_matrix.as_tensor().eval()) - - def test_initializer_with_oov_only_partition(self): - """Tests for the output layer initializer where one partition is all OOV.""" - loading_initializer = (contrib_framework.load_and_remap_matrix_initializer( - new_row_vocab_size=5, - new_col_vocab_file=self.new_class_vocab_file, - old_col_vocab_file=self.old_class_vocab_file, - new_col_vocab_size=4, - old_tensor_name='some_scope/embeddings', - ckpt_path=[self.bundle_file], - new_row_vocab_file=self.new_feature_vocab_file, - old_row_vocab_file=self.old_feature_vocab_file, - num_row_oov_buckets=5, - num_col_oov_buckets=1, - initializer=self.initializer)) - - expected_remapped_matrix = np.concatenate( - [ - np.reshape([2, 18, 34, 50] + [self.init_val] * 6, [10, 1]), - np.reshape([0, 16, 32, 48] + [self.init_val] * 6, [10, 1]), - np.reshape([self.init_val] * 10, [10, 1]), - np.reshape([1, 17, 33, 49] + [self.init_val] * 6, [10, 1]), - np.reshape([self.init_val] * 10, [10, 1]), - ], - axis=1) - - # The new weight matrix is of size - # [5 feature vocab + 5 feature OOV, 4 class vocab + 1 class OOV]. The - # second partition has only OOV. - remapped_matrix = variable_scope.get_variable( - name='linear_all_oov/obtained_weight_matrix', - shape=[10, 5], - initializer=loading_initializer, - partitioner=partitioned_variables.fixed_size_partitioner(2)) - - with self.test_session(): - variables.global_variables_initializer().run() - self.assertAllClose(expected_remapped_matrix, - remapped_matrix.as_tensor().eval()) - - def test_load_and_remap_linear_multiclass_initializer_default_init(self): - """Tests where the zeros_initializer default is used for linear.""" - loading_initializer = (contrib_framework.load_and_remap_matrix_initializer( - new_row_vocab_size=5, - new_col_vocab_file=self.new_class_vocab_file, - old_col_vocab_file=self.old_class_vocab_file, - new_col_vocab_size=4, - old_tensor_name='some_scope/embeddings', - ckpt_path=[self.bundle_file], - new_row_vocab_file=self.new_feature_vocab_file, - old_row_vocab_file=self.old_feature_vocab_file, - num_row_oov_buckets=1, - num_col_oov_buckets=1)) - - expected_remapped_matrix = np.concatenate( - [ - np.reshape([2, 18, 34, 50, 0, 0], [6, 1]), - np.reshape([0, 16, 32, 48, 0, 0], [6, 1]), - np.reshape([0] * 6, [6, 1]), - np.reshape([1, 17, 33, 49, 0, 0], [6, 1]), - np.reshape([0] * 6, [6, 1]) - ], - axis=1) - - remapped_matrix = variable_scope.get_variable( - name='linear_init_fallback/obtained_weight_matrix', - shape=[6, 5], - initializer=loading_initializer, - partitioner=partitioned_variables.fixed_size_partitioner(2)) - - with self.test_session(): - variables.global_variables_initializer().run() - self.assertAllClose(expected_remapped_matrix, - remapped_matrix.as_tensor().eval()) - - def test_load_embedding_initializer(self): - """Tests for the load_embedding_initializer wrapper.""" - embedding_loading_initializer = ( - contrib_framework.load_embedding_initializer( - new_vocab_file=self.new_feature_vocab_file, - old_vocab_file=self.old_feature_vocab_file, - new_vocab_size=5, - embedding_dim=16, - embedding_tensor_name='some_scope/embeddings', - ckpt_path=[self.bundle_file], - num_oov_buckets=1, - initializer=self.initializer)) - - expected_remapped_embeddings = np.concatenate( - [ - np.reshape(range(64), [4, 16]), - np.reshape([self.init_val] * 32, [2, 16]), - ], - axis=0) - - # The new weight matrix is of size - # [5 feature vocab + 1 feature OOV, 16 (embedding dimension)], where the - # last vocab row (2nd last row) is newly initialized (wasn't found in - # previous vocab) and the actual last row is OOV and also newly initialized. - # Use a partitioned variable to confirm that the offset logic works. - remapped_embeddings = variable_scope.get_variable( - name='embedding/obtained_embedding_matrix', - shape=[6, 16], - initializer=embedding_loading_initializer, - partitioner=partitioned_variables.fixed_size_partitioner(2)) - - with self.test_session(): - variables.global_variables_initializer().run() - self.assertAllClose(expected_remapped_embeddings, - remapped_embeddings.as_tensor().eval()) - - class LoadMulticlassBiasTest(test.TestCase): """Tests for the load_linear_multiclass_bias_initializer functionality.""" diff --git a/tensorflow/contrib/gan/BUILD b/tensorflow/contrib/gan/BUILD new file mode 100644 index 00000000000..b2de2823563 --- /dev/null +++ b/tensorflow/contrib/gan/BUILD @@ -0,0 +1,27 @@ +package(default_visibility = ["//tensorflow:__subpackages__"]) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +py_library( + name = "gan", + srcs = [ + "__init__.py", + ], + srcs_version = "PY2AND3", + deps = [ + ], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/contrib/gan/README.md b/tensorflow/contrib/gan/README.md new file mode 100644 index 00000000000..586e5ac331c --- /dev/null +++ b/tensorflow/contrib/gan/README.md @@ -0,0 +1,4 @@ +This directory contains the TFGAN project. + +This file will have more details as code is added. + diff --git a/tensorflow/contrib/gan/__init__.py b/tensorflow/contrib/gan/__init__.py new file mode 100644 index 00000000000..a46b0e8d5de --- /dev/null +++ b/tensorflow/contrib/gan/__init__.py @@ -0,0 +1,19 @@ +# Copyright 2017 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""TFGAN grouped API.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function diff --git a/tensorflow/contrib/gdr/BUILD b/tensorflow/contrib/gdr/BUILD index 645e364d191..bebcf079ba4 100644 --- a/tensorflow/contrib/gdr/BUILD +++ b/tensorflow/contrib/gdr/BUILD @@ -62,6 +62,7 @@ tf_cuda_library( }), deps = [ ":gdr_proto_cc", + "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", "//tensorflow/core:gpu_runtime", "//tensorflow/core:lib", diff --git a/tensorflow/contrib/image/BUILD b/tensorflow/contrib/image/BUILD index e631c243c3c..a27bec48010 100755 --- a/tensorflow/contrib/image/BUILD +++ b/tensorflow/contrib/image/BUILD @@ -121,12 +121,9 @@ tf_gen_op_wrapper_py( cc_library( name = "image_ops_cc", - srcs = [ - "ops/image_ops.cc", - ], + srcs = ["ops/image_ops.cc"], deps = [ ":image_ops_kernels", - "//tensorflow/core", "//tensorflow/core:framework", ], alwayslink = 1, diff --git a/tensorflow/contrib/keras/BUILD b/tensorflow/contrib/keras/BUILD index a09045d7fda..26f0e415180 100644 --- a/tensorflow/contrib/keras/BUILD +++ b/tensorflow/contrib/keras/BUILD @@ -551,6 +551,7 @@ py_test( size = "small", srcs = ["python/keras/utils/io_utils_test.py"], srcs_version = "PY2AND3", + tags = ["notsan"], deps = [ ":keras", "//tensorflow/python:client_testlib", diff --git a/tensorflow/contrib/keras/python/keras/utils/io_utils_test.py b/tensorflow/contrib/keras/python/keras/utils/io_utils_test.py index baa9781e71f..f6820ee0394 100644 --- a/tensorflow/contrib/keras/python/keras/utils/io_utils_test.py +++ b/tensorflow/contrib/keras/python/keras/utils/io_utils_test.py @@ -57,43 +57,44 @@ class TestIOUtils(test.TestCase): h5_path = os.path.join(temp_dir, 'test.h5') create_dataset(h5_path) - # Instantiating HDF5Matrix for the training set, - # which is a slice of the first 150 elements - x_train = keras.utils.io_utils.HDF5Matrix( - h5_path, 'my_data', start=0, end=150) - y_train = keras.utils.io_utils.HDF5Matrix( - h5_path, 'my_labels', start=0, end=150) + with self.test_session(): + # Instantiating HDF5Matrix for the training set, + # which is a slice of the first 150 elements + x_train = keras.utils.io_utils.HDF5Matrix( + h5_path, 'my_data', start=0, end=150) + y_train = keras.utils.io_utils.HDF5Matrix( + h5_path, 'my_labels', start=0, end=150) - # Likewise for the test set - x_test = keras.utils.io_utils.HDF5Matrix( - h5_path, 'my_data', start=150, end=200) - y_test = keras.utils.io_utils.HDF5Matrix( - h5_path, 'my_labels', start=150, end=200) + # Likewise for the test set + x_test = keras.utils.io_utils.HDF5Matrix( + h5_path, 'my_data', start=150, end=200) + y_test = keras.utils.io_utils.HDF5Matrix( + h5_path, 'my_labels', start=150, end=200) - # HDF5Matrix behave more or less like Numpy matrices - # with regard to indexing - self.assertEqual(y_train.shape, (150, 1)) - # But they do not support negative indices, so don't try print(x_train[-1]) + # HDF5Matrix behave more or less like Numpy matrices + # with regard to indexing + self.assertEqual(y_train.shape, (150, 1)) + # But they don't support negative indices, so don't try print(x_train[-1]) - self.assertEqual(y_train.dtype, np.dtype('i')) - self.assertEqual(y_train.ndim, 2) - self.assertEqual(y_train.size, 150) + self.assertEqual(y_train.dtype, np.dtype('i')) + self.assertEqual(y_train.ndim, 2) + self.assertEqual(y_train.size, 150) - model = keras.models.Sequential() - model.add(keras.layers.Dense(64, input_shape=(10,), activation='relu')) - model.add(keras.layers.Dense(1, activation='sigmoid')) - model.compile(loss='binary_crossentropy', optimizer='sgd') + model = keras.models.Sequential() + model.add(keras.layers.Dense(64, input_shape=(10,), activation='relu')) + model.add(keras.layers.Dense(1, activation='sigmoid')) + model.compile(loss='binary_crossentropy', optimizer='sgd') - # Note: you have to use shuffle='batch' or False with HDF5Matrix - model.fit(x_train, y_train, batch_size=32, shuffle='batch', verbose=False) - # test that evalutation and prediction - # don't crash and return reasonable results - out_pred = model.predict(x_test, batch_size=32, verbose=False) - out_eval = model.evaluate(x_test, y_test, batch_size=32, verbose=False) + # Note: you have to use shuffle='batch' or False with HDF5Matrix + model.fit(x_train, y_train, batch_size=32, shuffle='batch', verbose=False) + # test that evalutation and prediction + # don't crash and return reasonable results + out_pred = model.predict(x_test, batch_size=32, verbose=False) + out_eval = model.evaluate(x_test, y_test, batch_size=32, verbose=False) - self.assertEqual(out_pred.shape, (50, 1)) - self.assertEqual(out_eval.shape, ()) - self.assertGreater(out_eval, 0) + self.assertEqual(out_pred.shape, (50, 1)) + self.assertEqual(out_eval.shape, ()) + self.assertGreater(out_eval, 0) if __name__ == '__main__': diff --git a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py index bafde464afb..4c50d40aaa9 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py @@ -28,7 +28,6 @@ import six from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging @@ -44,7 +43,7 @@ def _get_in_out_shape(x_shape, y_shape, n_classes, batch_size=None): x_is_dict, y_is_dict = isinstance( x_shape, dict), y_shape is not None and isinstance(y_shape, dict) if y_is_dict and n_classes is not None: - assert (isinstance(n_classes, dict)) + assert isinstance(n_classes, dict) if batch_size is None: batch_size = list(x_shape.values())[0][0] if x_is_dict else x_shape[0] @@ -322,10 +321,12 @@ class DataFeeder(object): self._x = dict([(k, check_array(v, v.dtype)) for k, v in list(x.items()) ]) if x_is_dict else check_array(x, x.dtype) - self._y = None if y is None else \ - dict([(k, check_array(v, v.dtype)) for k, v in list(y.items())]) if y_is_dict else check_array(y, y.dtype) + self._y = None if y is None else ( + dict([(k, check_array(v, v.dtype)) for k, v in list(y.items())]) + if y_is_dict else check_array(y, y.dtype)) - # self.n_classes is not None means we're converting raw target indices to one-hot. + # self.n_classes is not None means we're converting raw target indices + # to one-hot. if n_classes is not None: if not y_is_dict: y_dtype = (np.int64 @@ -344,12 +345,15 @@ class DataFeeder(object): x_shape, y_shape, n_classes, batch_size) # Input dtype matches dtype of x. - self._input_dtype = dict([(k, _check_dtype(v.dtype)) for k, v in list(self._x.items())]) if x_is_dict \ - else _check_dtype(self._x.dtype) + self._input_dtype = ( + dict([(k, _check_dtype(v.dtype)) for k, v in list(self._x.items())]) + if x_is_dict else _check_dtype(self._x.dtype)) - # note: self._output_dtype = np.float32 when y is None - self._output_dtype = dict([(k, _check_dtype(v.dtype)) for k, v in list(self._y.items())]) if y_is_dict \ - else _check_dtype(self._y.dtype) if y is not None else np.float32 + # self._output_dtype == np.float32 when y is None + self._output_dtype = ( + dict([(k, _check_dtype(v.dtype)) for k, v in list(self._y.items())]) + if y_is_dict else ( + _check_dtype(self._y.dtype) if y is not None else np.float32)) # self.n_classes is None means we're passing in raw target indices if n_classes is not None and y_is_dict: diff --git a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py index 1e68a3ef660..676e1f2b51c 100644 --- a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py +++ b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== - """Utilities supporting export to SavedModel. Some contents of this file are moved to tensorflow/python/estimator/export.py: @@ -39,6 +38,7 @@ import time from tensorflow.contrib.layers.python.layers import feature_column from tensorflow.contrib.learn.python.learn import export_strategy from tensorflow.contrib.learn.python.learn.estimators import constants +from tensorflow.contrib.learn.python.learn.estimators import metric_key from tensorflow.contrib.learn.python.learn.estimators import prediction_key from tensorflow.contrib.learn.python.learn.utils import gc from tensorflow.contrib.learn.python.learn.utils import input_fn_utils @@ -75,8 +75,8 @@ FEATURES_INPUT_ALTERNATIVE_KEY = 'features_input_alternative' _FALLBACK_DEFAULT_OUTPUT_ALTERNATIVE_KEY = 'default_output_alternative' -def build_standardized_signature_def( - input_tensors, output_tensors, problem_type): +def build_standardized_signature_def(input_tensors, output_tensors, + problem_type): """Build a SignatureDef using problem type and input and output Tensors. Note that this delegates the actual creation of the signatures to methods in @@ -116,8 +116,8 @@ def build_standardized_signature_def( (_, predictions), = output_tensors.items() return signature_def_utils.regression_signature_def(examples, predictions) else: - return signature_def_utils.predict_signature_def( - input_tensors, output_tensors) + return signature_def_utils.predict_signature_def(input_tensors, + output_tensors) def _get_classification_scores(output_tensors): @@ -139,17 +139,15 @@ def _is_classification_problem(problem_type, input_tensors, output_tensors): classes = _get_classification_classes(output_tensors) scores = _get_classification_scores(output_tensors) return ((problem_type == constants.ProblemType.CLASSIFICATION or - problem_type == constants.ProblemType.LOGISTIC_REGRESSION) - and len(input_tensors) == 1 - and (classes is not None or - scores is not None or - len(output_tensors) == 1)) + problem_type == constants.ProblemType.LOGISTIC_REGRESSION) and + len(input_tensors) == 1 and + (classes is not None or scores is not None or + len(output_tensors) == 1)) def _is_regression_problem(problem_type, input_tensors, output_tensors): - return (problem_type == constants.ProblemType.LINEAR_REGRESSION - and len(input_tensors) == 1 - and len(output_tensors) == 1) + return (problem_type == constants.ProblemType.LINEAR_REGRESSION and + len(input_tensors) == 1 and len(output_tensors) == 1) def get_input_alternatives(input_ops): @@ -177,9 +175,7 @@ def get_input_alternatives(input_ops): return input_alternatives, features -def get_output_alternatives( - model_fn_ops, - default_output_alternative_key=None): +def get_output_alternatives(model_fn_ops, default_output_alternative_key=None): """Obtain all output alternatives using the model_fn output and heuristics. Args: @@ -218,8 +214,10 @@ def get_output_alternatives( default_outputs = {prediction_key.PredictionKey.GENERIC: default_outputs} actual_default_output_alternative_key = ( _FALLBACK_DEFAULT_OUTPUT_ALTERNATIVE_KEY) - output_alternatives = {actual_default_output_alternative_key: - (default_problem_type, default_outputs)} + output_alternatives = { + actual_default_output_alternative_key: (default_problem_type, + default_outputs) + } return output_alternatives, actual_default_output_alternative_key if default_output_alternative_key: @@ -246,13 +244,12 @@ def build_all_signature_defs(input_alternatives, output_alternatives, actual_default_output_alternative_key): """Build `SignatureDef`s from all pairs of input and output alternatives.""" - signature_def_map = { - ('%s:%s' % (input_key, output_key or 'None')): - build_standardized_signature_def( - inputs, outputs, problem_type) - for input_key, inputs in input_alternatives.items() - for output_key, (problem_type, outputs) - in output_alternatives.items()} + signature_def_map = {('%s:%s' % (input_key, output_key or 'None')): + build_standardized_signature_def(inputs, outputs, + problem_type) + for input_key, inputs in input_alternatives.items() + for output_key, (problem_type, + outputs) in output_alternatives.items()} # Add the default SignatureDef default_inputs = input_alternatives.get(DEFAULT_INPUT_ALTERNATIVE_KEY) @@ -263,8 +260,8 @@ def build_all_signature_defs(input_alternatives, output_alternatives, (default_problem_type, default_outputs) = ( output_alternatives[actual_default_output_alternative_key]) signature_def_map[signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] = ( - build_standardized_signature_def( - default_inputs, default_outputs, default_problem_type)) + build_standardized_signature_def(default_inputs, default_outputs, + default_problem_type)) return signature_def_map @@ -308,9 +305,8 @@ def get_timestamped_export_dir(export_dir_base): return export_dir time.sleep(1) attempts += 1 - logging.warn( - 'Export directory {} already exists; retrying (attempt {}/{})'.format( - export_dir, attempts, MAX_DIRECTORY_CREATION_ATTEMPTS)) + logging.warn('Export directory {} already exists; retrying (attempt {}/{})'. + format(export_dir, attempts, MAX_DIRECTORY_CREATION_ATTEMPTS)) raise RuntimeError('Failed to obtain a unique export directory name after ' '{} attempts.'.format(MAX_DIRECTORY_CREATION_ATTEMPTS)) @@ -330,8 +326,7 @@ def get_temp_export_dir(timestamped_export_dir): """ (dirname, basename) = os.path.split(timestamped_export_dir) temp_export_dir = os.path.join( - compat.as_bytes(dirname), - compat.as_bytes('temp-{}'.format(basename))) + compat.as_bytes(dirname), compat.as_bytes('temp-{}'.format(basename))) return temp_export_dir @@ -357,8 +352,8 @@ def get_most_recent_export(export_dir_base): A gc.Path, with is just a namedtuple of (path, export_version). """ select_filter = gc.largest_export_versions(1) - results = select_filter(gc.get_paths(export_dir_base, - parser=_export_version_parser)) + results = select_filter( + gc.get_paths(export_dir_base, parser=_export_version_parser)) return next(iter(results or []), None) @@ -378,8 +373,8 @@ def garbage_collect_exports(export_dir_base, exports_to_keep): keep_filter = gc.largest_export_versions(exports_to_keep) delete_filter = gc.negation(keep_filter) - for p in delete_filter(gc.get_paths(export_dir_base, - parser=_export_version_parser)): + for p in delete_filter( + gc.get_paths(export_dir_base, parser=_export_version_parser)): try: gfile.DeleteRecursively(p.path) except errors_impl.NotFoundError as e: @@ -416,10 +411,7 @@ def make_export_strategy(serving_input_fn, An ExportStrategy that can be passed to the Experiment constructor. """ - def export_fn(estimator, - export_dir_base, - checkpoint_path=None - ): + def export_fn(estimator, export_dir_base, checkpoint_path=None): """Exports the given Estimator as a SavedModel. Args: @@ -512,3 +504,128 @@ def make_parsing_export_strategy(feature_columns, assets_extra=assets_extra, as_text=as_text, exports_to_keep=exports_to_keep) + + +def _default_compare_fn(curr_best_eval_result, cand_eval_result): + """Compares two evaluation results and returns true if the 2nd one is better. + + Both evaluation results should have the values for MetricKey.LOSS, which are + used for comparison. + + Args: + curr_best_eval_result: current best eval metrics. + cand_eval_result: candidate eval metrics. + + Returns: + True if cand_eval_result is better. + + Raises: + ValueError: If input eval result is None or no loss is available. + """ + default_key = metric_key.MetricKey.LOSS + if not curr_best_eval_result or default_key not in curr_best_eval_result: + raise ValueError( + 'curr_best_eval_result cannot be empty or no loss is found in it.') + + if not cand_eval_result or default_key not in cand_eval_result: + raise ValueError( + 'cand_eval_result cannot be empty or no loss is found in it.') + + return curr_best_eval_result[default_key] > cand_eval_result[default_key] + + +class BestModelSelector(object): + """A helper that keeps track of export selection candidates.""" + + def __init__(self, compare_fn=None): + """Constructor of this class. + + Args: + compare_fn: a function that returns true if the candidate is better than + the current best model. + """ + self._best_eval_result = None + self._compare_fn = compare_fn or _default_compare_fn + + def update(self, checkpoint_path, eval_result): + """Records a given checkpoint and exports if this is the best model. + + Args: + checkpoint_path: the checkpoint path to export. + eval_result: a dictionary which is usually generated in evaluation runs. + By default, eval_results contains 'loss' field. + + Returns: + A string representing the path to the checkpoint to be exported. + A dictionary of the same type of eval_result. + + Raises: + ValueError: if checkpoint path is empty. + ValueError: if eval_results is None object. + """ + if not checkpoint_path: + raise ValueError('Checkpoint path is empty.') + if eval_result is None: + raise ValueError('%s has empty evaluation results.', checkpoint_path) + + if (self._best_eval_result is None or + self._compare_fn(self._best_eval_result, eval_result)): + self._best_eval_result = eval_result + return checkpoint_path, eval_result + else: + return '', None + + +def make_best_model_export_strategy(serving_input_fn, + exports_to_keep=1, + compare_fn=None, + default_output_alternative_key=None): + """Creates an custom ExportStrategy for use with tf.contrib.learn.Experiment. + + Args: + serving_input_fn: a function that takes no arguments and returns an + `InputFnOps`. + exports_to_keep: an integer indicating how many historical best models need + to be preserved. + compare_fn: a function that select the 'best' candidate from a dictionary + of evaluation result keyed by corresponding checkpoint path. + default_output_alternative_key: the key for default serving signature for + multi-headed inference graphs. + + Returns: + An ExportStrategy that can be passed to the Experiment constructor. + """ + best_model_export_strategy = make_export_strategy( + serving_input_fn, + exports_to_keep=exports_to_keep, + default_output_alternative_key=default_output_alternative_key) + + best_model_selector = BestModelSelector(compare_fn) + + def export_fn(estimator, export_dir_base, checkpoint_path, eval_result=None): + """Exports the given Estimator as a SavedModel. + + Args: + estimator: the Estimator to export. + export_dir_base: A string containing a directory to write the exported + graph and checkpoints. + checkpoint_path: The checkpoint path to export. If None (the default), + the most recent checkpoint found within the model directory is chosen. + eval_result: placehold args matching the call signature of ExportStrategy. + + Returns: + The string path to the exported directory. + """ + + export_checkpoint_path, export_eval_result = best_model_selector.update( + checkpoint_path, eval_result) + + if export_checkpoint_path and export_eval_result is not None: + checkpoint_base = os.path.basename(export_checkpoint_path) + export_dir = os.path.join(export_dir_base, checkpoint_base) + return best_model_export_strategy.export( + estimator, export_dir, export_checkpoint_path, export_eval_result) + else: + return '' + + return export_strategy.ExportStrategy('best_model', export_fn) diff --git a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py index 9e778ab72ad..66bca9c0f53 100644 --- a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py +++ b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py @@ -24,6 +24,7 @@ import time from tensorflow.contrib.layers.python.layers import feature_column as fc from tensorflow.contrib.learn.python.learn import export_strategy as export_strategy_lib from tensorflow.contrib.learn.python.learn.estimators import constants +from tensorflow.contrib.learn.python.learn.estimators import estimator as core_estimator from tensorflow.contrib.learn.python.learn.estimators import model_fn from tensorflow.contrib.learn.python.learn.utils import input_fn_utils from tensorflow.contrib.learn.python.learn.utils import saved_model_export_utils @@ -40,18 +41,43 @@ from tensorflow.python.saved_model import signature_def_utils from tensorflow.python.util import compat +class TestEstimator(core_estimator.Estimator): + + def __init__(self, *args, **kwargs): + super(TestEstimator, self).__init__(*args, **kwargs) + self.last_exported_checkpoint = "" + self.last_exported_dir = "" + + # @Override + def export_savedmodel(self, + export_dir, + serving_input_fn, + default_output_alternative_key=None, + assets_extra=None, + as_text=False, + checkpoint_path=None): + + if not os.path.exists(export_dir): + os.makedirs(export_dir) + + open(os.path.join(export_dir, "placeholder.txt"), "a").close() + + self.last_exported_checkpoint = checkpoint_path + self.last_exported_dir = export_dir + + return export_dir + + class SavedModelExportUtilsTest(test.TestCase): def test_build_standardized_signature_def_regression(self): input_tensors = { "input-1": - array_ops.placeholder( - dtypes.float32, 1, name="input-tensor-1") + array_ops.placeholder(dtypes.float32, 1, name="input-tensor-1") } output_tensors = { "output-1": - array_ops.placeholder( - dtypes.float32, 1, name="output-tensor-1") + array_ops.placeholder(dtypes.float32, 1, name="output-tensor-1") } problem_type = constants.ProblemType.LINEAR_REGRESSION actual_signature_def = ( @@ -61,10 +87,9 @@ class SavedModelExportUtilsTest(test.TestCase): shape = tensor_shape_pb2.TensorShapeProto( dim=[tensor_shape_pb2.TensorShapeProto.Dim(size=1)]) dtype = types_pb2.DataType.Value("DT_FLOAT") - expected_signature_def.inputs[ - signature_constants.REGRESS_INPUTS].CopyFrom( - meta_graph_pb2.TensorInfo( - name="input-tensor-1:0", dtype=dtype, tensor_shape=shape)) + expected_signature_def.inputs[signature_constants.REGRESS_INPUTS].CopyFrom( + meta_graph_pb2.TensorInfo( + name="input-tensor-1:0", dtype=dtype, tensor_shape=shape)) expected_signature_def.outputs[ signature_constants.REGRESS_OUTPUTS].CopyFrom( meta_graph_pb2.TensorInfo( @@ -77,13 +102,11 @@ class SavedModelExportUtilsTest(test.TestCase): """Tests classification with one output tensor.""" input_tensors = { "input-1": - array_ops.placeholder( - dtypes.float32, 1, name="input-tensor-1") + array_ops.placeholder(dtypes.float32, 1, name="input-tensor-1") } output_tensors = { "output-1": - array_ops.placeholder( - dtypes.string, 1, name="output-tensor-1") + array_ops.placeholder(dtypes.string, 1, name="output-tensor-1") } problem_type = constants.ProblemType.CLASSIFICATION actual_signature_def = ( @@ -94,14 +117,14 @@ class SavedModelExportUtilsTest(test.TestCase): dim=[tensor_shape_pb2.TensorShapeProto.Dim(size=1)]) dtype_float = types_pb2.DataType.Value("DT_FLOAT") dtype_string = types_pb2.DataType.Value("DT_STRING") - expected_signature_def.inputs[ - signature_constants.CLASSIFY_INPUTS].CopyFrom( - meta_graph_pb2.TensorInfo( - name="input-tensor-1:0", dtype=dtype_float, tensor_shape=shape)) + expected_signature_def.inputs[signature_constants.CLASSIFY_INPUTS].CopyFrom( + meta_graph_pb2.TensorInfo( + name="input-tensor-1:0", dtype=dtype_float, tensor_shape=shape)) expected_signature_def.outputs[ signature_constants.CLASSIFY_OUTPUT_CLASSES].CopyFrom( meta_graph_pb2.TensorInfo( - name="output-tensor-1:0", dtype=dtype_string, + name="output-tensor-1:0", + dtype=dtype_string, tensor_shape=shape)) expected_signature_def.method_name = ( @@ -112,8 +135,7 @@ class SavedModelExportUtilsTest(test.TestCase): """Tests multiple output tensors that include classes and probabilities.""" input_tensors = { "input-1": - array_ops.placeholder( - dtypes.float32, 1, name="input-tensor-1") + array_ops.placeholder(dtypes.float32, 1, name="input-tensor-1") } output_tensors = { "classes": @@ -136,19 +158,20 @@ class SavedModelExportUtilsTest(test.TestCase): dim=[tensor_shape_pb2.TensorShapeProto.Dim(size=1)]) dtype_float = types_pb2.DataType.Value("DT_FLOAT") dtype_string = types_pb2.DataType.Value("DT_STRING") - expected_signature_def.inputs[ - signature_constants.CLASSIFY_INPUTS].CopyFrom( - meta_graph_pb2.TensorInfo( - name="input-tensor-1:0", dtype=dtype_float, tensor_shape=shape)) + expected_signature_def.inputs[signature_constants.CLASSIFY_INPUTS].CopyFrom( + meta_graph_pb2.TensorInfo( + name="input-tensor-1:0", dtype=dtype_float, tensor_shape=shape)) expected_signature_def.outputs[ signature_constants.CLASSIFY_OUTPUT_CLASSES].CopyFrom( meta_graph_pb2.TensorInfo( - name="output-tensor-classes:0", dtype=dtype_string, + name="output-tensor-classes:0", + dtype=dtype_string, tensor_shape=shape)) expected_signature_def.outputs[ signature_constants.CLASSIFY_OUTPUT_SCORES].CopyFrom( meta_graph_pb2.TensorInfo( - name="output-tensor-proba:0", dtype=dtype_float, + name="output-tensor-proba:0", + dtype=dtype_float, tensor_shape=shape)) expected_signature_def.method_name = ( @@ -159,8 +182,7 @@ class SavedModelExportUtilsTest(test.TestCase): """Tests multiple output tensors that include classes and scores.""" input_tensors = { "input-1": - array_ops.placeholder( - dtypes.float32, 1, name="input-tensor-1") + array_ops.placeholder(dtypes.float32, 1, name="input-tensor-1") } output_tensors = { "classes": @@ -182,19 +204,20 @@ class SavedModelExportUtilsTest(test.TestCase): dim=[tensor_shape_pb2.TensorShapeProto.Dim(size=1)]) dtype_float = types_pb2.DataType.Value("DT_FLOAT") dtype_string = types_pb2.DataType.Value("DT_STRING") - expected_signature_def.inputs[ - signature_constants.CLASSIFY_INPUTS].CopyFrom( - meta_graph_pb2.TensorInfo( - name="input-tensor-1:0", dtype=dtype_float, tensor_shape=shape)) + expected_signature_def.inputs[signature_constants.CLASSIFY_INPUTS].CopyFrom( + meta_graph_pb2.TensorInfo( + name="input-tensor-1:0", dtype=dtype_float, tensor_shape=shape)) expected_signature_def.outputs[ signature_constants.CLASSIFY_OUTPUT_CLASSES].CopyFrom( meta_graph_pb2.TensorInfo( - name="output-tensor-classes:0", dtype=dtype_string, + name="output-tensor-classes:0", + dtype=dtype_string, tensor_shape=shape)) expected_signature_def.outputs[ signature_constants.CLASSIFY_OUTPUT_SCORES].CopyFrom( meta_graph_pb2.TensorInfo( - name="output-tensor-scores:0", dtype=dtype_float, + name="output-tensor-scores:0", + dtype=dtype_float, tensor_shape=shape)) expected_signature_def.method_name = ( @@ -205,8 +228,7 @@ class SavedModelExportUtilsTest(test.TestCase): """Tests classification without classes tensor.""" input_tensors = { "input-1": - array_ops.placeholder( - dtypes.float32, 1, name="input-tensor-1") + array_ops.placeholder(dtypes.float32, 1, name="input-tensor-1") } output_tensors = { "probabilities": @@ -224,14 +246,14 @@ class SavedModelExportUtilsTest(test.TestCase): shape = tensor_shape_pb2.TensorShapeProto( dim=[tensor_shape_pb2.TensorShapeProto.Dim(size=1)]) dtype_float = types_pb2.DataType.Value("DT_FLOAT") - expected_signature_def.inputs[ - signature_constants.CLASSIFY_INPUTS].CopyFrom( - meta_graph_pb2.TensorInfo( - name="input-tensor-1:0", dtype=dtype_float, tensor_shape=shape)) + expected_signature_def.inputs[signature_constants.CLASSIFY_INPUTS].CopyFrom( + meta_graph_pb2.TensorInfo( + name="input-tensor-1:0", dtype=dtype_float, tensor_shape=shape)) expected_signature_def.outputs[ signature_constants.CLASSIFY_OUTPUT_SCORES].CopyFrom( meta_graph_pb2.TensorInfo( - name="output-tensor-proba:0", dtype=dtype_float, + name="output-tensor-proba:0", + dtype=dtype_float, tensor_shape=shape)) expected_signature_def.method_name = ( @@ -246,8 +268,7 @@ class SavedModelExportUtilsTest(test.TestCase): """ input_tensors = { "input-1": - array_ops.placeholder( - dtypes.float32, 1, name="input-tensor-1") + array_ops.placeholder(dtypes.float32, 1, name="input-tensor-1") } output_tensors = { "classes": @@ -268,14 +289,14 @@ class SavedModelExportUtilsTest(test.TestCase): shape = tensor_shape_pb2.TensorShapeProto( dim=[tensor_shape_pb2.TensorShapeProto.Dim(size=1)]) dtype_float = types_pb2.DataType.Value("DT_FLOAT") - expected_signature_def.inputs[ - signature_constants.CLASSIFY_INPUTS].CopyFrom( - meta_graph_pb2.TensorInfo( - name="input-tensor-1:0", dtype=dtype_float, tensor_shape=shape)) + expected_signature_def.inputs[signature_constants.CLASSIFY_INPUTS].CopyFrom( + meta_graph_pb2.TensorInfo( + name="input-tensor-1:0", dtype=dtype_float, tensor_shape=shape)) expected_signature_def.outputs[ signature_constants.CLASSIFY_OUTPUT_SCORES].CopyFrom( meta_graph_pb2.TensorInfo( - name="output-tensor-scores:0", dtype=dtype_float, + name="output-tensor-scores:0", + dtype=dtype_float, tensor_shape=shape)) expected_signature_def.method_name = ( @@ -290,8 +311,7 @@ class SavedModelExportUtilsTest(test.TestCase): """ input_tensors = { "input-1": - array_ops.placeholder( - dtypes.float32, 1, name="input-tensor-1") + array_ops.placeholder(dtypes.float32, 1, name="input-tensor-1") } output_tensors = { "classes": @@ -310,17 +330,18 @@ class SavedModelExportUtilsTest(test.TestCase): dim=[tensor_shape_pb2.TensorShapeProto.Dim(size=1)]) dtype_int64 = types_pb2.DataType.Value("DT_INT64") dtype_float = types_pb2.DataType.Value("DT_FLOAT") - expected_signature_def.inputs[ - "input-1"].CopyFrom( - meta_graph_pb2.TensorInfo( - name="input-tensor-1:0", dtype=dtype_float, tensor_shape=shape)) + expected_signature_def.inputs["input-1"].CopyFrom( + meta_graph_pb2.TensorInfo( + name="input-tensor-1:0", dtype=dtype_float, tensor_shape=shape)) expected_signature_def.outputs["classes"].CopyFrom( meta_graph_pb2.TensorInfo( - name="output-tensor-classes:0", dtype=dtype_int64, + name="output-tensor-classes:0", + dtype=dtype_int64, tensor_shape=shape)) expected_signature_def.outputs["logits"].CopyFrom( meta_graph_pb2.TensorInfo( - name="output-tensor-logits:0", dtype=dtype_float, + name="output-tensor-logits:0", + dtype=dtype_float, tensor_shape=shape)) expected_signature_def.method_name = ( @@ -379,8 +400,9 @@ class SavedModelExportUtilsTest(test.TestCase): def test_get_output_alternatives_single_no_default(self): prediction_tensor = constant_op.constant(["bogus"]) provided_output_alternatives = { - "head-1": (constants.ProblemType.LINEAR_REGRESSION, - {"output": prediction_tensor}), + "head-1": (constants.ProblemType.LINEAR_REGRESSION, { + "output": prediction_tensor + }), } model_fn_ops = model_fn.ModelFnOps( model_fn.ModeKeys.INFER, @@ -390,10 +412,11 @@ class SavedModelExportUtilsTest(test.TestCase): output_alternatives, _ = saved_model_export_utils.get_output_alternatives( model_fn_ops) - self.assertEqual({"head-1": - (constants.ProblemType.LINEAR_REGRESSION, - {"output": prediction_tensor})}, - output_alternatives) + self.assertEqual({ + "head-1": (constants.ProblemType.LINEAR_REGRESSION, { + "output": prediction_tensor + }) + }, output_alternatives) def test_get_output_alternatives_multi_no_default(self): provided_output_alternatives = { @@ -424,10 +447,11 @@ class SavedModelExportUtilsTest(test.TestCase): output_alternatives, _ = saved_model_export_utils.get_output_alternatives( model_fn_ops) - self.assertEqual( - {"default_output_alternative": (constants.ProblemType.UNSPECIFIED, { - "some_output": prediction_tensor})}, - output_alternatives) + self.assertEqual({ + "default_output_alternative": (constants.ProblemType.UNSPECIFIED, { + "some_output": prediction_tensor + }) + }, output_alternatives) def test_get_output_alternatives_empty_provided_with_default(self): prediction_tensor = constant_op.constant(["bogus"]) @@ -452,10 +476,11 @@ class SavedModelExportUtilsTest(test.TestCase): output_alternatives, _ = saved_model_export_utils.get_output_alternatives( model_fn_ops) - self.assertEqual( - {"default_output_alternative": (constants.ProblemType.UNSPECIFIED, { - "some_output": prediction_tensor})}, - output_alternatives) + self.assertEqual({ + "default_output_alternative": (constants.ProblemType.UNSPECIFIED, { + "some_output": prediction_tensor + }) + }, output_alternatives) def test_get_output_alternatives_implicit_single(self): prediction_tensor = constant_op.constant(["bogus"]) @@ -506,14 +531,14 @@ class SavedModelExportUtilsTest(test.TestCase): expected_signature_defs = { "serving_default": - signature_def_utils.regression_signature_def(input_example, - output_1), + signature_def_utils.regression_signature_def( + input_example, output_1), "default_input_alternative:head-1": - signature_def_utils.regression_signature_def(input_example, - output_1), + signature_def_utils.regression_signature_def( + input_example, output_1), "default_input_alternative:head-2": - signature_def_utils.classification_signature_def(input_example, - output_2, None), + signature_def_utils.classification_signature_def( + input_example, output_2, None), "default_input_alternative:head-3": signature_def_utils.predict_signature_def({ "default input": input_example @@ -624,17 +649,20 @@ class SavedModelExportUtilsTest(test.TestCase): (most_recent_export_dir, most_recent_export_version) = ( saved_model_export_utils.get_most_recent_export(export_dir_base)) - self.assertEqual(compat.as_bytes(export_dir_4), - compat.as_bytes(most_recent_export_dir)) - self.assertEqual(compat.as_bytes(export_dir_4), - os.path.join(compat.as_bytes(export_dir_base), - compat.as_bytes( - str(most_recent_export_version)))) + self.assertEqual( + compat.as_bytes(export_dir_4), compat.as_bytes(most_recent_export_dir)) + self.assertEqual( + compat.as_bytes(export_dir_4), + os.path.join( + compat.as_bytes(export_dir_base), + compat.as_bytes(str(most_recent_export_version)))) def test_make_export_strategy(self): """Only tests that an ExportStrategy instance is created.""" + def _serving_input_fn(): return array_ops.constant([1]), None + export_strategy = saved_model_export_utils.make_export_strategy( serving_input_fn=_serving_input_fn, default_output_alternative_key="default", @@ -655,14 +683,61 @@ class SavedModelExportUtilsTest(test.TestCase): real_valued_col1 = fc.real_valued_column("real_valued_column1") bucketized_col1 = fc.bucketized_column( fc.real_valued_column("real_valued_column_for_bucketization1"), [0, 4]) - feature_columns = [sparse_col, embedding_col, real_valued_col1, - bucketized_col1] + feature_columns = [ + sparse_col, embedding_col, real_valued_col1, bucketized_col1 + ] export_strategy = saved_model_export_utils.make_parsing_export_strategy( feature_columns=feature_columns) self.assertTrue( isinstance(export_strategy, export_strategy_lib.ExportStrategy)) + def test_make_best_model_export_strategy(self): + export_dir_base = tempfile.mkdtemp() + "export/" + gfile.MkDir(export_dir_base) + + test_estimator = TestEstimator() + export_strategy = saved_model_export_utils.make_best_model_export_strategy( + serving_input_fn=None, exports_to_keep=3, compare_fn=None) + + self.assertNotEqual("", + export_strategy.export(test_estimator, export_dir_base, + "fake_ckpt_0", {"loss": 100})) + self.assertNotEqual("", test_estimator.last_exported_dir) + self.assertNotEqual("", test_estimator.last_exported_checkpoint) + + self.assertEqual("", + export_strategy.export(test_estimator, export_dir_base, + "fake_ckpt_1", {"loss": 101})) + self.assertEqual(test_estimator.last_exported_dir, + os.path.join(export_dir_base, "fake_ckpt_0")) + + self.assertNotEqual("", + export_strategy.export(test_estimator, export_dir_base, + "fake_ckpt_2", {"loss": 10})) + self.assertEqual(test_estimator.last_exported_dir, + os.path.join(export_dir_base, "fake_ckpt_2")) + + self.assertEqual("", + export_strategy.export(test_estimator, export_dir_base, + "fake_ckpt_3", {"loss": 20})) + self.assertEqual(test_estimator.last_exported_dir, + os.path.join(export_dir_base, "fake_ckpt_2")) + + def test_make_best_model_export_strategy_exceptions(self): + export_dir_base = tempfile.mkdtemp() + "export/" + + test_estimator = TestEstimator() + export_strategy = saved_model_export_utils.make_best_model_export_strategy( + serving_input_fn=None, exports_to_keep=3, compare_fn=None) + + with self.assertRaises(ValueError): + export_strategy.export(test_estimator, export_dir_base, "", {"loss": 200}) + + with self.assertRaises(ValueError): + export_strategy.export(test_estimator, export_dir_base, "fake_ckpt_1", + None) + def _create_test_export_dir(export_dir_base): export_dir = saved_model_export_utils.get_timestamped_export_dir( diff --git a/tensorflow/contrib/receptive_field/BUILD b/tensorflow/contrib/receptive_field/BUILD new file mode 100644 index 00000000000..ed2f3af08cb --- /dev/null +++ b/tensorflow/contrib/receptive_field/BUILD @@ -0,0 +1,71 @@ +# Description: +# Contains modules to compute receptive field parameters for CNN models. + +package( + default_visibility = ["//visibility:public"], +) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +load("//tensorflow:tensorflow.bzl", "py_test") + +# Transitive dependencies of this target will be included in the pip package. +py_library( + name = "receptive_field_pip", + deps = [ + ":graph_compute_order_py", + ":receptive_field_py", + ], +) + +py_library( + name = "graph_compute_order_py", + srcs = [ + "__init__.py", + "python/util/graph_compute_order.py", + ], + srcs_version = "PY2AND3", +) + +py_library( + name = "receptive_field_py", + srcs = [ + "__init__.py", + "python/util/receptive_field.py", + ], + srcs_version = "PY2AND3", + deps = [ + ":graph_compute_order_py", + "//tensorflow/contrib/util:util_py", + "//tensorflow/python:platform", + ], +) + +py_test( + name = "receptive_field_test", + srcs = ["python/util/receptive_field_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":receptive_field_py", + "//tensorflow/contrib/framework:framework_py", + "//tensorflow/contrib/slim", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:dtypes", + "//tensorflow/python:nn", + ], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/contrib/receptive_field/README.md b/tensorflow/contrib/receptive_field/README.md new file mode 100644 index 00000000000..b150b903b23 --- /dev/null +++ b/tensorflow/contrib/receptive_field/README.md @@ -0,0 +1,165 @@ +# Receptive field computation for convnets + +This library enables you to easily compute the receptive field parameters of +your favorite convnet. You can use it to understand how big of an input image +region your output features depend on. Better yet, using the parameters computed +by the library, you can easily find the exact image region which is used to +compute each convnet feature. + +## Basic usage + +The main function to be called is `compute_receptive_field_from_graph_def`, +which will return the receptive field, effective stride and effective padding +for both horizontal and vertical directions. + +For example, if your model is constructed using the function +`my_model_construction()`, you can use the library as follows: + +```python +import tensorflow as tf +from tensorflow.contrib import receptive_field + +# Construct graph. +g = tf.Graph() +with g.as_default(): + images = tf.placeholder(tf.float32, shape=(1, None, None, 3), name='input_image') + my_model_construction(images) + +# Compute receptive field parameters. +rf_x, rf_y, eff_stride_x, eff_stride_y, eff_pad_x, eff_pad_y = \ + receptive_field.compute_receptive_field_from_graph_def( \ + g.as_graph_def(), 'input_image', 'my_output_endpoint') +``` + +Here's a simple example of computing the receptive field parameters for +Inception-Resnet-v2. To get this to work, be sure to checkout +[tensorflow/models](https://github.com/tensorflow/models), so that the Inception +models are available to you. This can be done in three simple commands: + +```sh +git clone https://github.com/tensorflow/models +cd models/slim +sudo python setup.py install_lib +``` + +You can then compute the receptive field parameters for Inception-Resnet-v2 as: + +```python +from nets import inception +import tensorflow as tf +from tensorflow.contrib import receptive_field + +# Construct graph. +g = tf.Graph() +with g.as_default(): + images = tf.placeholder(tf.float32, shape=(1, None, None, 3), name='input_image') + inception.inception_resnet_v2_base(images) + +# Compute receptive field parameters. +rf_x, rf_y, eff_stride_x, eff_stride_y, eff_pad_x, eff_pad_y = \ + receptive_field.compute_receptive_field_from_graph_def( \ + g.as_graph_def(), 'input_image', 'InceptionResnetV2/Conv2d_7b_1x1/Relu') +``` + +This will give you `rf_x = rf_y = 3039`, `eff_stride_x = eff_stride_y = 32`, and +`eff_pad_x = eff_pad_y = 1482`. This means that each feature that is output at +the node `'InceptionResnetV2/Conv2d_7b_1x1/Relu'` is computed from a region +which is of size `3039x3039`. Further, by using the expressions + +```python +center_x = -eff_pad_x + feature_x*eff_stride_x + (rf_x - 1)/2 +center_y = -eff_pad_y + feature_y*eff_stride_y + (rf_y - 1)/2 +``` + +one can compute the center of the region in the input image that is used to +compute the output feature at position `[feature_x, feature_y]`. For example, +the feature at position `[0, 2]` at the output of the layer +`'InceptionResnetV2/Conv2d_7b_1x1/Relu'` is centered in the original image in +the position `[37, 101]`. + +TODO: include link to derivations and definitions of different parameters. + +## Receptive field benchmark + +As you might expect, it is straightforward to run this library on the popular +convnets, and gather their receptive fields. We provide a python script which +does exactly that, available under `python/util/examples/rf_benchmark.py`. + +To get this to work, be sure to checkout +[tensorflow/models](https://github.com/tensorflow/models) (see the 3-command +instructions for this above). Then, simply: + +```sh +cd python/util/examples +python rf_benchmark.py --csv_path /tmp/rf_benchmark_results.csv +``` + +The script will write to stdout the receptive field parameters for many variants +of several popular convnets: AlexNet, VGG, ResNet, Inception, Mobilenet. They +are also written to the file `/tmp/rf_benchmark_results.csv`. + +TODO: include here a plot for receptive field sizes of different convnets. + +TODO: include table/link to pre-computed RF parameters. + +## Compute RF parameters from a graph pbtxt + +We also provide a utility to compute the receptive field parameters directly +from a graph protobuf file. + +Have a `graph.pbtxt` file and want to compute its receptive field parameters? We +got you covered. The only prerequisite is to install +[google/protobuf](https://github.com/google/protobuf), which you probably +already have if you're using tensorflow (otherwise, follow installation +instructions [here](https://github.com/google/protobuf/tree/master/python)). + +This should work: + +```sh +cd python/util/examples +python compute_rf.py \ + --graph_path /path/to/graph.pbtxt \ + --output_path /path/to/output/rf_info.txt \ + --input_node my_input_node \ + --output_node my_output_node +``` + +Don't know how to generate a graph protobuf file? Take a look at the +`write_inception_resnet_v2_graph.py` script, which shows how to save it for the +Inception-Resnet-v2 model: + +```sh +cd python/util/examples +python write_inception_resnet_v2_graph.py --graph_dir /tmp --graph_filename graph.pbtxt +``` + +This will write the Inception-Resnet-v2 graph protobuf to `/tmp/graph.pbtxt`. + +For completeness, here's how you would use this file to get the receptive field +parameters of the Inception-Resnet-v2 model: + +```sh +cd python/util/examples +python compute_rf.py \ + --graph_path /tmp/graph.pbtxt \ + --output_path /tmp/rf_info.txt \ + --input_node input_image \ + --output_node InceptionResnetV2/Conv2d_7b_1x1/Relu +``` + +This will write the receptive field parameters of the model to +`/tmp/rf_info.txt`, which will look like: + +```sh +Receptive field size (horizontal) = 3039 +Receptive field size (vertical) = 3039 +Effective stride (horizontal) = 32 +Effective stride (vertical) = 32 +Effective padding (horizontal) = 1482 +Effective padding (vertical) = 1482 +``` + +## Authors + +André Araujo (github id: andrefaraujo) and Mark Sandler (github id: +marksandler) diff --git a/tensorflow/contrib/receptive_field/__init__.py b/tensorflow/contrib/receptive_field/__init__.py new file mode 100644 index 00000000000..10745a6a53d --- /dev/null +++ b/tensorflow/contrib/receptive_field/__init__.py @@ -0,0 +1,23 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Module to compute receptive field parameters for CNN tensorflow models.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +# pylint: disable=unused-import +from tensorflow.contrib.receptive_field.python.util.graph_compute_order import get_compute_order +from tensorflow.contrib.receptive_field.python.util.receptive_field import compute_receptive_field_from_graph_def +# pylint: enable=unused-import diff --git a/tensorflow/contrib/receptive_field/python/__init__.py b/tensorflow/contrib/receptive_field/python/__init__.py new file mode 100644 index 00000000000..217047f92d3 --- /dev/null +++ b/tensorflow/contrib/receptive_field/python/__init__.py @@ -0,0 +1,19 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Module to compute receptive field parameters for CNN tensorflow models.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function diff --git a/tensorflow/contrib/receptive_field/python/util/examples/compute_rf.py b/tensorflow/contrib/receptive_field/python/util/examples/compute_rf.py new file mode 100644 index 00000000000..1cf978b90a3 --- /dev/null +++ b/tensorflow/contrib/receptive_field/python/util/examples/compute_rf.py @@ -0,0 +1,94 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Computes Receptive Field (RF) information given a graph protobuf. + +For an example of usage, see accompanying file compute_rf.sh +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import sys + +from google.protobuf import text_format + +from tensorflow.contrib import receptive_field +from tensorflow.core.framework import graph_pb2 +from tensorflow.python.platform import app +from tensorflow.python.platform import gfile +from tensorflow.python.platform import tf_logging as logging + +cmd_args = None + + +def _load_graphdef(path): + """Helper function to load GraphDef from file. + + Args: + path: Path to pbtxt file. + + Returns: + graph_def: A GraphDef object. + """ + graph_def = graph_pb2.GraphDef() + pbstr = gfile.Open(path).read() + text_format.Parse(pbstr, graph_def) + return graph_def + + +def main(unused_argv): + + graph_def = _load_graphdef(cmd_args.graph_path) + + (receptive_field_x, receptive_field_y, effective_stride_x, effective_stride_y, + effective_padding_x, effective_padding_y + ) = receptive_field.compute_receptive_field_from_graph_def( + graph_def, cmd_args.input_node, cmd_args.output_node) + + logging.info('Receptive field size (horizontal) = %s', receptive_field_x) + logging.info('Receptive field size (vertical) = %s', receptive_field_y) + logging.info('Effective stride (horizontal) = %s', effective_stride_x) + logging.info('Effective stride (vertical) = %s', effective_stride_y) + logging.info('Effective padding (horizontal) = %s', effective_padding_x) + logging.info('Effective padding (vertical) = %s', effective_padding_y) + + f = gfile.GFile('%s' % cmd_args.output_path, 'w') + f.write('Receptive field size (horizontal) = %s\n' % receptive_field_x) + f.write('Receptive field size (vertical) = %s\n' % receptive_field_y) + f.write('Effective stride (horizontal) = %s\n' % effective_stride_x) + f.write('Effective stride (vertical) = %s\n' % effective_stride_y) + f.write('Effective padding (horizontal) = %s\n' % effective_padding_x) + f.write('Effective padding (vertical) = %s\n' % effective_padding_y) + f.close() + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.register('type', 'bool', lambda v: v.lower() == 'true') + parser.add_argument( + '--graph_path', type=str, default='', help='Graph path (pbtxt format).') + parser.add_argument( + '--output_path', + type=str, + default='', + help='Path to output text file where RF information will be written to.') + parser.add_argument( + '--input_node', type=str, default='', help='Name of input node.') + parser.add_argument( + '--output_node', type=str, default='', help='Name of output node.') + cmd_args, unparsed = parser.parse_known_args() + app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/contrib/receptive_field/python/util/examples/rf_benchmark.py b/tensorflow/contrib/receptive_field/python/util/examples/rf_benchmark.py new file mode 100644 index 00000000000..94228dfa61b --- /dev/null +++ b/tensorflow/contrib/receptive_field/python/util/examples/rf_benchmark.py @@ -0,0 +1,460 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Computes Receptive Field (RF) information for different models. + +The receptive field (and related parameters) for the different models are +printed to stdout, and may also optionally be written to a CSV file. + +For an example of usage, see rf_benchmark.sh +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import csv +import sys + +from nets import alexnet +from nets import inception +from nets import mobilenet_v1 +from nets import resnet_v1 +from nets import resnet_v2 +from nets import vgg +from tensorflow.contrib import framework +from tensorflow.contrib import receptive_field +from tensorflow.contrib import slim +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import app + +cmd_args = None + +# Input node name for all architectures. +_INPUT_NODE = 'input_image' + +# Variants of different network architectures. + +# - resnet: different versions and sizes. +_SUPPORTED_RESNET_VARIANTS = [ + 'resnet_v1_50', 'resnet_v1_101', 'resnet_v1_152', 'resnet_v1_200', + 'resnet_v2_50', 'resnet_v2_101', 'resnet_v2_152', 'resnet_v2_200' +] + +# - inception_resnet_v2: default, and version with SAME padding. +_SUPPORTED_INCEPTIONRESNETV2_VARIANTS = [ + 'inception_resnet_v2', 'inception_resnet_v2-same' +] + +# - inception_v2: default, and version with no separable conv. +_SUPPORTED_INCEPTIONV2_VARIANTS = [ + 'inception_v2', 'inception_v2-no-separable-conv' +] + +# - inception_v3: default version. +_SUPPORTED_INCEPTIONV3_VARIANTS = ['inception_v3'] + +# - inception_v4: default version. +_SUPPORTED_INCEPTIONV4_VARIANTS = ['inception_v4'] + +# - alexnet_v2: default version. +_SUPPORTED_ALEXNETV2_VARIANTS = ['alexnet_v2'] + +# - vgg: vgg_a (with 11 layers) and vgg_16 (version D). +_SUPPORTED_VGG_VARIANTS = ['vgg_a', 'vgg_16'] + +# - mobilenet_v1: 100% and 75%. +_SUPPORTED_MOBILENETV1_VARIANTS = ['mobilenet_v1', 'mobilenet_v1_075'] + + +def _construct_model(model_type='resnet_v1_50'): + """Constructs model for the desired type of CNN. + + Args: + model_type: Type of model to be used. + + Returns: + end_points: A dictionary from components of the network to the corresponding + activations. + + Raises: + ValueError: If the model_type is not supported. + """ + # Placeholder input. + images = array_ops.placeholder( + dtypes.float32, shape=(1, None, None, 3), name=_INPUT_NODE) + + # Construct model. + if model_type == 'inception_resnet_v2': + _, end_points = inception.inception_resnet_v2_base(images) + elif model_type == 'inception_resnet_v2-same': + _, end_points = inception.inception_resnet_v2_base( + images, align_feature_maps=True) + elif model_type == 'inception_v2': + _, end_points = inception.inception_v2_base(images) + elif model_type == 'inception_v2-no-separable-conv': + _, end_points = inception.inception_v2_base( + images, use_separable_conv=False) + elif model_type == 'inception_v3': + _, end_points = inception.inception_v3_base(images) + elif model_type == 'inception_v4': + _, end_points = inception.inception_v4_base(images) + elif model_type == 'alexnet_v2': + _, end_points = alexnet.alexnet_v2(images) + elif model_type == 'vgg_a': + _, end_points = vgg.vgg_a(images) + elif model_type == 'vgg_16': + _, end_points = vgg.vgg_16(images) + elif model_type == 'mobilenet_v1': + _, end_points = mobilenet_v1.mobilenet_v1_base(images) + elif model_type == 'mobilenet_v1_075': + _, end_points = mobilenet_v1.mobilenet_v1_base( + images, depth_multiplier=0.75) + elif model_type == 'resnet_v1_50': + _, end_points = resnet_v1.resnet_v1_50( + images, num_classes=None, is_training=False, global_pool=False) + elif model_type == 'resnet_v1_101': + _, end_points = resnet_v1.resnet_v1_101( + images, num_classes=None, is_training=False, global_pool=False) + elif model_type == 'resnet_v1_152': + _, end_points = resnet_v1.resnet_v1_152( + images, num_classes=None, is_training=False, global_pool=False) + elif model_type == 'resnet_v1_200': + _, end_points = resnet_v1.resnet_v1_200( + images, num_classes=None, is_training=False, global_pool=False) + elif model_type == 'resnet_v2_50': + _, end_points = resnet_v2.resnet_v2_50( + images, num_classes=None, is_training=False, global_pool=False) + elif model_type == 'resnet_v2_101': + _, end_points = resnet_v2.resnet_v2_101( + images, num_classes=None, is_training=False, global_pool=False) + elif model_type == 'resnet_v2_152': + _, end_points = resnet_v2.resnet_v2_152( + images, num_classes=None, is_training=False, global_pool=False) + elif model_type == 'resnet_v2_200': + _, end_points = resnet_v2.resnet_v2_200( + images, num_classes=None, is_training=False, global_pool=False) + else: + raise ValueError('Unsupported model_type %s.' % model_type) + + return end_points + + +def _get_desired_end_point_keys(model_type='resnet_v1_50'): + """Gets list of desired end point keys for a type of CNN. + + Args: + model_type: Type of model to be used. + + Returns: + desired_end_point_types: A list containing the desired end-points. + + Raises: + ValueError: If the model_type is not supported. + """ + if model_type in _SUPPORTED_RESNET_VARIANTS: + blocks = ['block1', 'block2', 'block3', 'block4'] + desired_end_point_keys = ['%s/%s' % (model_type, i) for i in blocks] + elif model_type in _SUPPORTED_INCEPTIONRESNETV2_VARIANTS: + desired_end_point_keys = [ + 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'MaxPool_3a_3x3', + 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3', 'Mixed_5b', + 'Mixed_6a', 'PreAuxLogits', 'Mixed_7a', 'Conv2d_7b_1x1' + ] + elif model_type in _SUPPORTED_INCEPTIONV2_VARIANTS: + desired_end_point_keys = [ + 'Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1', 'Conv2d_2c_3x3', + 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c', 'Mixed_4a', 'Mixed_4b', + 'Mixed_4c', 'Mixed_4d', 'Mixed_4e', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c' + ] + elif model_type in _SUPPORTED_INCEPTIONV3_VARIANTS: + desired_end_point_keys = [ + 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'MaxPool_3a_3x3', + 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3', 'Mixed_5b', + 'Mixed_5c', 'Mixed_5d', 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', + 'Mixed_6e', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c' + ] + elif model_type in _SUPPORTED_INCEPTIONV4_VARIANTS: + desired_end_point_keys = [ + 'Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', 'Mixed_3a', + 'Mixed_4a', 'Mixed_5a', 'Mixed_5b', 'Mixed_5c', 'Mixed_5d', 'Mixed_5e', + 'Mixed_6a', 'Mixed_6b', 'Mixed_6c', 'Mixed_6d', 'Mixed_6e', 'Mixed_6f', + 'Mixed_6g', 'Mixed_6h', 'Mixed_7a', 'Mixed_7b', 'Mixed_7c', 'Mixed_7d' + ] + elif model_type in _SUPPORTED_ALEXNETV2_VARIANTS: + ep = ['conv1', 'pool1', 'conv2', 'conv3', 'conv4', 'conv5', 'pool5'] + desired_end_point_keys = ['%s/%s' % (model_type, i) for i in ep] + elif model_type in _SUPPORTED_VGG_VARIANTS: + ep = [ + 'conv1/conv1_1', 'pool1', 'conv2/conv2_1', 'pool2', 'conv3/conv3_1', + 'conv3/conv3_2', 'pool3', 'conv4/conv4_1', 'conv4/conv4_2', 'pool4', + 'conv5/conv5_1', 'conv5/conv5_2', 'pool5' + ] + desired_end_point_keys = ['%s/%s' % (model_type, i) for i in ep] + elif model_type in _SUPPORTED_MOBILENETV1_VARIANTS: + desired_end_point_keys = [ + 'Conv2d_0', 'Conv2d_1_pointwise', 'Conv2d_2_pointwise', + 'Conv2d_3_pointwise', 'Conv2d_4_pointwise', 'Conv2d_5_pointwise', + 'Conv2d_6_pointwise', 'Conv2d_7_pointwise', 'Conv2d_8_pointwise', + 'Conv2d_9_pointwise', 'Conv2d_10_pointwise', 'Conv2d_11_pointwise', + 'Conv2d_12_pointwise', 'Conv2d_13_pointwise' + ] + else: + raise ValueError('Unsupported model_type %s.' % model_type) + + return desired_end_point_keys + + +def _model_graph_def(model_type='resnet_v1_50', arg_sc=None): + """Constructs a model graph, returning GraphDef and end-points. + + Args: + model_type: Type of model to be used. + arg_sc: Optional arg scope to use in constructing the graph. + + Returns: + graph_def: GraphDef of constructed graph. + end_points: A dictionary from components of the network to the corresponding + activations. + """ + if arg_sc is None: + arg_sc = {} + g = ops.Graph() + with g.as_default(): + with framework.arg_scope(arg_sc): + end_points = _construct_model(model_type) + + return g.as_graph_def(), end_points + + +def _model_rf(graphdef, + end_points, + desired_end_point_keys, + model_type='resnet_v1_50', + csv_writer=None): + """Computes receptive field information for a given CNN model. + + The information will be printed to stdout. If the RF parameters are the same + for the horizontal and vertical directions, it will be printed only once. + Otherwise, they are printed once for the horizontal and once for the vertical + directions. + + Args: + graphdef: GraphDef of given model. + end_points: A dictionary from components of the model to the corresponding + activations. + desired_end_point_keys: List of desired end points for which receptive field + information will be computed. + model_type: Type of model to be used, used only for printing purposes. + csv_writer: A CSV writer for RF parameters, which is used if it is not None. + """ + for desired_end_point_key in desired_end_point_keys: + print('- %s:' % desired_end_point_key) + output_node_with_colon = end_points[desired_end_point_key].name + pos = output_node_with_colon.rfind(':') + output_node = output_node_with_colon[:pos] + (receptive_field_x, receptive_field_y, effective_stride_x, + effective_stride_y, effective_padding_x, effective_padding_y + ) = receptive_field.compute_receptive_field_from_graph_def( + graphdef, _INPUT_NODE, output_node) + # If values are the same in horizontal/vertical directions, just report one + # of them. Otherwise, report both. + if (receptive_field_x == receptive_field_y) and ( + effective_stride_x == effective_stride_y) and ( + effective_padding_x == effective_padding_y): + print('Receptive field size = %5s, effective stride = %5s, effective ' + 'padding = %5s' % (str(receptive_field_x), str(effective_stride_x), + str(effective_padding_x))) + else: + print('Receptive field size: horizontal = %5s, vertical = %5s. ' + 'Effective stride: horizontal = %5s, vertical = %5s. Effective ' + 'padding: horizontal = %5s, vertical = %5s' % + (str(receptive_field_x), str(receptive_field_y), + str(effective_stride_x), str(effective_stride_y), + str(effective_padding_x), str(effective_padding_y))) + if csv_writer is not None: + csv_writer.writerow({ + 'CNN': model_type, + 'end_point': desired_end_point_key, + 'RF size hor': str(receptive_field_x), + 'RF size ver': str(receptive_field_y), + 'effective stride hor': str(effective_stride_x), + 'effective stride ver': str(effective_stride_y), + 'effective padding hor': str(effective_padding_x), + 'effective padding ver': str(effective_padding_y) + }) + + +def _process_model_rf(model_type='resnet_v1_50', csv_writer=None, arg_sc=None): + """Contructs model graph and desired end-points, and compute RF. + + The computed RF parameters are printed to stdout by the _model_rf function. + + Args: + model_type: Type of model to be used. + csv_writer: A CSV writer for RF parameters, which is used if it is not None. + arg_sc: Optional arg scope to use in constructing the graph. + + """ + print('********************%s' % model_type) + graphdef, end_points = _model_graph_def(model_type, arg_sc) + desired_end_point_keys = _get_desired_end_point_keys(model_type) + _model_rf(graphdef, end_points, desired_end_point_keys, model_type, + csv_writer) + + +def _resnet_rf(csv_writer=None): + """Computes RF and associated parameters for resnet models. + + The computed values are written to stdout. + + Args: + csv_writer: A CSV writer for RF parameters, which is used if it is not None. + """ + for model_type in _SUPPORTED_RESNET_VARIANTS: + arg_sc = resnet_v1.resnet_arg_scope() + _process_model_rf(model_type, csv_writer, arg_sc) + + +def _inception_resnet_v2_rf(csv_writer=None): + """Computes RF and associated parameters for the inception_resnet_v2 model. + + The computed values are written to stdout. + + Args: + csv_writer: A CSV writer for RF parameters, which is used if it is not None. + """ + for model_type in _SUPPORTED_INCEPTIONRESNETV2_VARIANTS: + _process_model_rf(model_type, csv_writer) + + +def _inception_v2_rf(csv_writer=None): + """Computes RF and associated parameters for the inception_v2 model. + + The computed values are written to stdout. + + Args: + csv_writer: A CSV writer for RF parameters, which is used if it is not None. + """ + for model_type in _SUPPORTED_INCEPTIONV2_VARIANTS: + _process_model_rf(model_type, csv_writer) + + +def _inception_v3_rf(csv_writer=None): + """Computes RF and associated parameters for the inception_v3 model. + + The computed values are written to stdout. + + Args: + csv_writer: A CSV writer for RF parameters, which is used if it is not None. + """ + for model_type in _SUPPORTED_INCEPTIONV3_VARIANTS: + _process_model_rf(model_type, csv_writer) + + +def _inception_v4_rf(csv_writer=None): + """Computes RF and associated parameters for the inception_v4 model. + + The computed values are written to stdout. + + Args: + csv_writer: A CSV writer for RF parameters, which is used if it is not None. + """ + for model_type in _SUPPORTED_INCEPTIONV4_VARIANTS: + _process_model_rf(model_type, csv_writer) + + +def _alexnet_v2_rf(csv_writer=None): + """Computes RF and associated parameters for the alexnet_v2 model. + + The computed values are written to stdout. + + Args: + csv_writer: A CSV writer for RF parameters, which is used if it is not None. + """ + for model_type in _SUPPORTED_ALEXNETV2_VARIANTS: + _process_model_rf(model_type, csv_writer) + + +def _vgg_rf(csv_writer=None): + """Computes RF and associated parameters for the vgg model. + + The computed values are written to stdout. + + Args: + csv_writer: A CSV writer for RF parameters, which is used if it is not None. + """ + for model_type in _SUPPORTED_VGG_VARIANTS: + _process_model_rf(model_type, csv_writer) + + +def _mobilenet_v1_rf(csv_writer=None): + """Computes RF and associated parameters for the mobilenet_v1 model. + + The computed values are written to stdout. + + Args: + csv_writer: A CSV writer for RF parameters, which is used if it is not None. + """ + for model_type in _SUPPORTED_MOBILENETV1_VARIANTS: + with slim.arg_scope( + [slim.batch_norm, slim.dropout], is_training=False) as arg_sc: + _process_model_rf(model_type, csv_writer, arg_sc) + + +def main(unused_argv): + # Configure CSV file which will be written, if desired. + if cmd_args.csv_path: + csv_file = open(cmd_args.csv_path, 'w') + field_names = [ + 'CNN', 'end_point', 'RF size hor', 'RF size ver', + 'effective stride hor', 'effective stride ver', 'effective padding hor', + 'effective padding ver' + ] + rf_writer = csv.DictWriter(csv_file, fieldnames=field_names) + rf_writer.writeheader() + else: + rf_writer = None + + # Compute RF parameters for each network architecture. + _alexnet_v2_rf(rf_writer) + _vgg_rf(rf_writer) + _inception_v2_rf(rf_writer) + _inception_v3_rf(rf_writer) + _inception_v4_rf(rf_writer) + _inception_resnet_v2_rf(rf_writer) + _mobilenet_v1_rf(rf_writer) + _resnet_rf(rf_writer) + + # Close CSV file, if it was opened. + if cmd_args.csv_path: + csv_file.close() + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.register('type', 'bool', lambda v: v.lower() == 'true') + parser.add_argument( + '--csv_path', + type=str, + default='', + help="""\ + Path to CSV file that will be written with RF parameters.If empty, no + file will be written.\ + """) + cmd_args, unparsed = parser.parse_known_args() + app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/contrib/receptive_field/python/util/examples/write_inception_resnet_v2_graph.py b/tensorflow/contrib/receptive_field/python/util/examples/write_inception_resnet_v2_graph.py new file mode 100644 index 00000000000..793ae163d80 --- /dev/null +++ b/tensorflow/contrib/receptive_field/python/util/examples/write_inception_resnet_v2_graph.py @@ -0,0 +1,61 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Simple script to write Inception-ResNet-v2 model to graph file. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import sys + +from nets import inception +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import graph_io +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.platform import app + +cmd_args = None + + +def main(unused_argv): + # Model definition. + g = ops.Graph() + with g.as_default(): + images = array_ops.placeholder( + dtypes.float32, shape=(1, None, None, 3), name='input_image') + inception.inception_resnet_v2_base(images) + + graph_io.write_graph(g.as_graph_def(), cmd_args.graph_dir, + cmd_args.graph_filename) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.register('type', 'bool', lambda v: v.lower() == 'true') + parser.add_argument( + '--graph_dir', + type=str, + default='/tmp', + help='Directory where graph will be saved.') + parser.add_argument( + '--graph_filename', + type=str, + default='graph.pbtxt', + help='Filename of graph that will be saved.') + cmd_args, unparsed = parser.parse_known_args() + app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/contrib/receptive_field/python/util/graph_compute_order.py b/tensorflow/contrib/receptive_field/python/util/graph_compute_order.py new file mode 100644 index 00000000000..8af4be16d6c --- /dev/null +++ b/tensorflow/contrib/receptive_field/python/util/graph_compute_order.py @@ -0,0 +1,88 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Library to compute order of computations in a graph. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import collections + + +class GraphDefHelper(object): + """Helper class to collect node names and definitions. + + Example: + b = GraphDefHelper(graph_def) + # Prints node that produces given output. + print b.output_of['conv/foo/bar'] + """ + + def __init__(self, gd): + self.output_of = {} + for each in gd.node: + self.output_of[each.name] = each + + +# pylint: disable=invalid-name +_NodeEntry = collections.namedtuple('NodeEntry', field_names=['order', 'node']) + + +def _get_computed_nodes(g, output, seen): + """Traverses the graph in topological order. + + Args: + g: GraphDefHelper object. + output: current node. + seen: map of nodes we've already traversed. + Returns: + order in topological sort for 'output'. + """ + if output in seen: + return seen[output].order + node_def = g.output_of.get(output, None) + if node_def is None: + seen[output] = _NodeEntry(0, None) + return 0 + + r = 0 + for each in node_def.input: + # Parses name of input node. + if each.startswith('^'): + each = each[1:] + each = each.split(':')[0] + # Recursively computes ordering. + new_v = _get_computed_nodes(g, each, seen) + r = max(r, new_v + 1) + + seen[output] = _NodeEntry(r, node_def) + + return seen[output].order + + +def get_compute_order(graph_def): + """Computes order of computation for a given graph. + + Args: + graph_def: GraphDef object. + Returns: + map: name -> {order, node} + """ + helper = GraphDefHelper(graph_def) + seen = collections.defaultdict(_NodeEntry) + for each in graph_def.node: + _get_computed_nodes(helper, each.name, seen) + return seen diff --git a/tensorflow/contrib/receptive_field/python/util/receptive_field.py b/tensorflow/contrib/receptive_field/python/util/receptive_field.py new file mode 100644 index 00000000000..db190a1a416 --- /dev/null +++ b/tensorflow/contrib/receptive_field/python/util/receptive_field.py @@ -0,0 +1,485 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Functions to compute receptive field of a fully-convolutional network. + +Please refer to the following g3doc for detailed explanation on how this +computation is performed, and why it is important: +g3doc/photos/vision/features/delf/g3doc/rf_computation.md +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math +from tensorflow.contrib.receptive_field.python.util import graph_compute_order +from tensorflow.contrib.util import make_ndarray +from tensorflow.python.platform import tf_logging as logging + +# White-listed layer operations, which do not affect the receptive field +# computation. +_UNCHANGED_RF_LAYER_OPS = [ + "Softplus", "Relu", "BiasAdd", "Mul", "Add", "Const", "Identity", + "VariableV2", "Sub", "Rsqrt", "ConcatV2" +] + +# Different ways in which padding modes may be spelled. +_VALID_PADDING = ["VALID", b"VALID"] +_SAME_PADDING = ["SAME", b"SAME"] + + +def _stride_size(node): + """Computes stride size given a TF node. + + Args: + node: Tensorflow node (NodeDef proto). + + Returns: + stride_x: Stride size for horizontal direction (integer). + stride_y: Stride size for vertical direction (integer). + """ + strides_attr = node.attr["strides"] + logging.vlog(4, "strides_attr = %s", strides_attr) + stride_y = strides_attr.list.i[1] + stride_x = strides_attr.list.i[2] + return stride_x, stride_y + + +def _conv_kernel_size(node, name_to_order_node): + """Computes kernel size given a TF convolution or pooling node. + + Args: + node: Tensorflow node (NodeDef proto). + name_to_order_node: Map from name to {order, node}. Output of + graph_compute_order.get_compute_order(). + + Returns: + kernel_size_x: Kernel size for horizontal direction (integer). + kernel_size_y: Kernel size for vertical direction (integer). + + Raises: + ValueError: If the weight layer node is invalid. + """ + weights_layer_read_name = node.input[1] + if not weights_layer_read_name.endswith("/read"): + raise ValueError( + "Weight layer's name input to conv layer does not end with '/read'") + weights_layer_param_name = weights_layer_read_name[:-5] + weights_node = name_to_order_node[weights_layer_param_name].node + if weights_node.op != "VariableV2": + raise ValueError("Weight layer is not of type VariableV2") + shape = weights_node.attr["shape"] + logging.vlog(4, "weight shape = %s", shape) + kernel_size_y = shape.shape.dim[0].size + kernel_size_x = shape.shape.dim[1].size + return kernel_size_x, kernel_size_y + + +def _padding_size_conv_pool(node, kernel_size, stride): + """Computes padding size given a TF convolution or pooling node. + + Args: + node: Tensorflow node (NodeDef proto). + kernel_size: Kernel size of node (integer). + stride: Stride size of node (integer). + + Returns: + padding: Padding size (integer). + + Raises: + ValueError: If padding is invalid. + """ + # In this case, we need to carefully consider the different TF padding modes. + # The padding depends on kernel size, and may depend on input size. If it + # depends on input size, we raise an exception. + padding_attr = node.attr["padding"] + logging.vlog(4, "padding_attr = %s", padding_attr) + if padding_attr.s in _VALID_PADDING: + padding = 0 + elif padding_attr.s in _SAME_PADDING: + if kernel_size == 1: + padding = 0 + elif stride == 1: + padding = int(math.floor((float(kernel_size) - 1) / 2)) + elif stride == 2 and kernel_size % 2 == 0: + padding = int(math.floor((float(kernel_size) - 1) / 2)) + else: + padding = None + logging.warning( + "Padding depends on input size, which means that the effective " + "padding may be different depending on the input image " + "dimensionality. In this case, alignment check will be skipped.") + else: + raise ValueError("Invalid padding operation %s" % padding_attr.s) + return padding + + +def _pool_kernel_size(node): + """Computes kernel size given a TF pooling node. + + Args: + node: Tensorflow node (NodeDef proto). + + Returns: + kernel_size_x: Kernel size for horizontal direction (integer). + kernel_size_y: Kernel size for vertical direction (integer). + + Raises: + ValueError: If pooling is invalid. + """ + ksize = node.attr["ksize"] + kernel_size_y = ksize.list.i[1] + kernel_size_x = ksize.list.i[2] + if ksize.list.i[0] != 1: + raise ValueError("pool ksize for first dim is not 1") + if ksize.list.i[3] != 1: + raise ValueError("pool ksize for last dim is not 1") + return kernel_size_x, kernel_size_y + + +def _padding_size_pad_layer(node, name_to_order_node): + """Computes padding size given a TF padding node. + + Args: + node: Tensorflow node (NodeDef proto). + name_to_order_node: Map from name to {order, node}. Output of + graph_compute_order.get_compute_order(). + + Returns: + padding_x: Padding size for horizontal direction (integer). + padding_y: Padding size for vertical direction (integer). + + Raises: + ValueError: If padding layer is invalid. + """ + paddings_layer_name = node.input[1] + if not paddings_layer_name.endswith("/paddings"): + raise ValueError("Padding layer name does not end with '/paddings'") + paddings_node = name_to_order_node[paddings_layer_name].node + if paddings_node.op != "Const": + raise ValueError("Padding op is not Const") + value = paddings_node.attr["value"] + t = make_ndarray(value.tensor) + padding_y = t[1][0] + padding_x = t[2][0] + if t[0][0] != 0: + raise ValueError("padding is not zero for first tensor dim") + if t[3][0] != 0: + raise ValueError("padding is not zero for last tensor dim") + return padding_x, padding_y + + +def _get_layer_params(node, name_to_order_node): + """Gets layer parameters relevant for RF computation. + + Currently, only these nodes are supported: + - Conv2D + - DepthwiseConv2dNative + - Pad + - MaxPool + - AvgPool + - all nodes listed in _UNCHANGED_RF_LAYER_OPS + + Args: + node: Tensorflow node (NodeDef proto). + name_to_order_node: Map from name to {order, node}. Output of + graph_compute_order.get_compute_order(). + + Returns: + kernel_size_x: Kernel size for horizontal direction (integer). + kernel_size_y: Kernel size for vertical direction (integer). + stride_x: Stride size for horizontal direction (integer). + stride_y: Stride size for vertical direction (integer). + padding_x: Padding size for horizontal direction (integer). + padding_y: Padding size for vertical direction (integer). + + Raises: + ValueError: If layer op is unknown. + """ + logging.vlog(3, "node.op = %s", node.op) + logging.vlog(4, "node = %s", node) + if node.op == "Conv2D" or node.op == "DepthwiseConv2dNative": + stride_x, stride_y = _stride_size(node) + kernel_size_x, kernel_size_y = _conv_kernel_size(node, name_to_order_node) + # Compute the padding for this node separately for each direction. + padding_x = _padding_size_conv_pool(node, kernel_size_x, stride_x) + padding_y = _padding_size_conv_pool(node, kernel_size_y, stride_y) + elif node.op == "Pad": + # Kernel and stride are simply 1 in this case. + kernel_size_x = 1 + kernel_size_y = 1 + stride_x = 1 + stride_y = 1 + padding_x, padding_y = _padding_size_pad_layer(node, name_to_order_node) + elif node.op == "MaxPool" or node.op == "AvgPool": + stride_x, stride_y = _stride_size(node) + kernel_size_x, kernel_size_y = _pool_kernel_size(node) + # Compute the padding for this node separately for each direction. + padding_x = _padding_size_conv_pool(node, kernel_size_x, stride_x) + padding_y = _padding_size_conv_pool(node, kernel_size_y, stride_y) + elif node.op in _UNCHANGED_RF_LAYER_OPS: + # These nodes do not modify the RF parameters. + kernel_size_x = 1 + kernel_size_y = 1 + stride_x = 1 + stride_y = 1 + padding_x = 0 + padding_y = 0 + else: + raise ValueError("Unknown layer op: %s" % node.op) + return kernel_size_x, kernel_size_y, stride_x, stride_y, padding_x, padding_y + + +def _reverse_sort_by_order(name_to_order_node): + """Sorts map of name_to_order_node nodes in reverse order. + + The output is such that the nodes in name_to_order_node are sorted in + descending order of the "order" field. + + Args: + name_to_order_node: Map from name to {order, node}. Output of + graph_compute_order.get_compute_order(). + + Returns: + sorted_name_to_order_node: Sorted version of the input, in descending order. + """ + return sorted(name_to_order_node.items(), key=lambda x: -x[1].order) + + +def _get_rf_size_node_input(stride, kernel_size, rf_size_output): + """Computes RF size at the input of a given layer. + + Args: + stride: Stride of given layer (integer). + kernel_size: Kernel size of given layer (integer). + rf_size_output: RF size at output of given layer (integer). + + Returns: + rf_size_input: RF size at input of given layer (integer). + """ + return stride * rf_size_output + kernel_size - stride + + +def _get_effective_stride_node_input(stride, effective_stride_output): + """Computes effective stride at the input of a given layer. + + Args: + stride: Stride of given layer (integer). + effective_stride_output: Effective stride at output of given layer + (integer). + + Returns: + effective_stride_input: Effective stride at input of given layer + (integer). + """ + return stride * effective_stride_output + + +def _get_effective_padding_node_input(stride, padding, + effective_padding_output): + """Computes effective padding at the input of a given layer. + + Args: + stride: Stride of given layer (integer). + padding: Padding of given layer (integer). + effective_padding_output: Effective padding at output of given layer + (integer). + + Returns: + effective_padding_input: Effective padding at input of given layer + (integer). + """ + return stride * effective_padding_output + padding + + +def compute_receptive_field_from_graph_def(graph_def, input_node, output_node): + """Computes receptive field (RF) parameters from a GraphDef object. + + Args: + graph_def: GraphDef object. + input_node: Name of the input node from graph. + output_node: Name of the output node from graph. + + Returns: + rf_size_x: Receptive field size of network in the horizontal direction, with + respect to specified input and output. + rf_size_y: Receptive field size of network in the vertical direction, with + respect to specified input and output. + effective_stride_x: Effective stride of network in the horizontal direction, + with respect to specified input and output. + effective_stride_y: Effective stride of network in the vertical direction, + with respect to specified input and output. + effective_padding_x: Effective padding of network in the horizontal + direction, with respect to specified input and output. + effective_padding_y: Effective padding of network in the vertical + direction, with respect to specified input and output. + + Raises: + ValueError: If network is not aligned or if either input or output nodes + cannot be found. For network criterion alignment, see + photos/vision/features/delf/g3doc/rf_computation.md + """ + # Computes order of computation for a given graph. + name_to_order_node = graph_compute_order.get_compute_order( + graph_def=graph_def) + + # Sort in reverse topological order. + order = _reverse_sort_by_order(name_to_order_node) + + # Dictionaries to keep track of receptive field, effective stride and + # effective padding of different nodes. + rf_sizes_x = {} + rf_sizes_y = {} + effective_strides_x = {} + effective_strides_y = {} + effective_paddings_x = {} + effective_paddings_y = {} + + # Initialize dicts for output_node. + rf_sizes_x[output_node] = 1 + rf_sizes_y[output_node] = 1 + effective_strides_x[output_node] = 1 + effective_strides_y[output_node] = 1 + effective_paddings_x[output_node] = 0 + effective_paddings_y[output_node] = 0 + + # Flag to denote if we found output node yet. If we have not, we skip nodes + # until the output node is found. + found_output_node = False + + # Flag to denote if padding is undefined. This happens when SAME padding mode + # is used in conjunction with stride and kernel sizes which make it such that + # the padding to be applied would depend on the input size. In this case, + # alignment checks are skipped, and the effective padding is None. + undefined_padding = False + + for _, (o, node) in order: + if node: + logging.vlog(3, "%10d %-100s %-20s" % (o, node.name[:90], node.op)) + else: + continue + + # When we find input node, we can stop. + if node.name == input_node: + break + + # Loop until we find the output node. All nodes before finding the output + # one are irrelevant, so they can be skipped. + if not found_output_node: + if node.name == output_node: + found_output_node = True + + if found_output_node: + if node.name not in rf_sizes_x: + assert node.name not in rf_sizes_y, ("Node %s is in rf_sizes_y, but " + "not in rf_sizes_x" % node.name) + # In this case, node is not relevant since it's not part of the + # computation we're interested in. + logging.vlog(3, "Irrelevant node %s, skipping it...", node.name) + continue + + # Get params for this layer. + kernel_size_x, kernel_size_y, stride_x, stride_y, padding_x, padding_y = ( + _get_layer_params(node, name_to_order_node)) + logging.vlog(3, "kernel_size_x = %s, kernel_size_y = %s, " + "stride_x = %s, stride_y = %s, " + "padding_x = %s, padding_y = %s" % + (kernel_size_x, kernel_size_y, stride_x, stride_y, padding_x, + padding_y)) + if padding_x is None or padding_y is None: + undefined_padding = True + + # Get parameters at input of this layer which may or may not be propagated + # to the input layers. + rf_size_input_x = _get_rf_size_node_input(stride_x, kernel_size_x, + rf_sizes_x[node.name]) + rf_size_input_y = _get_rf_size_node_input(stride_y, kernel_size_y, + rf_sizes_y[node.name]) + effective_stride_input_x = _get_effective_stride_node_input( + stride_x, effective_strides_x[node.name]) + effective_stride_input_y = _get_effective_stride_node_input( + stride_y, effective_strides_y[node.name]) + if not undefined_padding: + effective_padding_input_x = _get_effective_padding_node_input( + stride_x, padding_x, effective_paddings_x[node.name]) + effective_padding_input_y = _get_effective_padding_node_input( + stride_y, padding_y, effective_paddings_y[node.name]) + else: + effective_padding_input_x = None + effective_padding_input_y = None + + # Loop over this node's inputs and potentially propagate information down. + for inp_name in node.input: + logging.vlog(4, "inp_name = %s", inp_name) + inp_node = name_to_order_node[inp_name].node + logging.vlog(4, "inp_node = \n%s", inp_node) + if inp_node.name in rf_sizes_x: + assert inp_node.name in rf_sizes_y, ( + "Node %s is in rf_sizes_x, but " + "not in rf_sizes_y" % inp_node.name) + # This node was already discovered through a previous path, so we need + # to make sure that graph is aligned. This alignment check is skipped + # if the padding is not defined, since in this case alignment cannot + # be checked. + if not undefined_padding: + if effective_strides_x[inp_node.name] != effective_stride_input_x: + raise ValueError( + "Graph is not aligned since effective stride from different " + "paths is different in horizontal direction") + if effective_strides_y[inp_node.name] != effective_stride_input_y: + raise ValueError( + "Graph is not aligned since effective stride from different " + "paths is different in vertical direction") + if (rf_sizes_x[inp_node.name] - 1 + ) / 2 - effective_paddings_x[inp_node.name] != ( + rf_size_input_x - 1) / 2 - effective_padding_input_x: + raise ValueError( + "Graph is not aligned since center shift from different " + "paths is different in horizontal direction") + if (rf_sizes_y[inp_node.name] - 1 + ) / 2 - effective_paddings_y[inp_node.name] != ( + rf_size_input_y - 1) / 2 - effective_padding_input_y: + raise ValueError( + "Graph is not aligned since center shift from different " + "paths is different in vertical direction") + # Keep track of path with largest RF, for both directions. + if rf_sizes_x[inp_node.name] < rf_size_input_x: + rf_sizes_x[inp_node.name] = rf_size_input_x + effective_strides_x[inp_node.name] = effective_stride_input_x + effective_paddings_x[inp_node.name] = effective_padding_input_x + if rf_sizes_y[inp_node.name] < rf_size_input_y: + rf_sizes_y[inp_node.name] = rf_size_input_y + effective_strides_y[inp_node.name] = effective_stride_input_y + effective_paddings_y[inp_node.name] = effective_padding_input_y + else: + assert inp_node.name not in rf_sizes_y, ( + "Node %s is in rf_sizes_y, but " + "not in rf_sizes_x" % inp_node.name) + # In this case, it is the first time we encounter this node. So we + # propagate the RF parameters. + rf_sizes_x[inp_node.name] = rf_size_input_x + rf_sizes_y[inp_node.name] = rf_size_input_y + effective_strides_x[inp_node.name] = effective_stride_input_x + effective_strides_y[inp_node.name] = effective_stride_input_y + effective_paddings_x[inp_node.name] = effective_padding_input_x + effective_paddings_y[inp_node.name] = effective_padding_input_y + + if not found_output_node: + raise ValueError("Output node was not found") + if input_node not in rf_sizes_x: + raise ValueError("Input node was not found") + return (rf_sizes_x[input_node], rf_sizes_y[input_node], + effective_strides_x[input_node], effective_strides_y[input_node], + effective_paddings_x[input_node], effective_paddings_y[input_node]) diff --git a/tensorflow/contrib/receptive_field/python/util/receptive_field_test.py b/tensorflow/contrib/receptive_field/python/util/receptive_field_test.py new file mode 100644 index 00000000000..2771389250b --- /dev/null +++ b/tensorflow/contrib/receptive_field/python/util/receptive_field_test.py @@ -0,0 +1,225 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for receptive_fields module.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib import slim +from tensorflow.contrib.receptive_field.python.util import receptive_field +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import nn +from tensorflow.python.platform import test + + +def create_test_network_1(): + """Aligned network for test. + + The graph corresponds to the example from the second figure in + go/cnn-rf-computation#arbitrary-computation-graphs + + Returns: + g: Tensorflow graph object (Graph proto). + """ + g = ops.Graph() + with g.as_default(): + # An 8x8 test image. + x = array_ops.placeholder(dtypes.float32, (1, 8, 8, 1), name='input_image') + # Left branch. + l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='VALID') + # Right branch. + l2_pad = array_ops.pad(x, [[0, 0], [1, 0], [1, 0], [0, 0]]) + l2 = slim.conv2d(l2_pad, 1, [3, 3], stride=2, scope='L2', padding='VALID') + l3 = slim.conv2d(l2, 1, [1, 1], stride=2, scope='L3', padding='VALID') + # Addition. + nn.relu(l1 + l3, name='output') + return g + + +def create_test_network_2(): + """Aligned network for test. + + The graph corresponds to a variation to the example from the second figure in + go/cnn-rf-computation#arbitrary-computation-graphs. Layers 2 and 3 are changed + to max-pooling operations. Since the functionality is the same as convolution, + the network is aligned and the receptive field size is the same as from the + network created using create_test_network_1(). + + Returns: + g: Tensorflow graph object (Graph proto). + """ + g = ops.Graph() + with g.as_default(): + # An 8x8 test image. + x = array_ops.placeholder(dtypes.float32, (1, 8, 8, 1), name='input_image') + # Left branch. + l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='VALID') + # Right branch. + l2_pad = array_ops.pad(x, [[0, 0], [1, 0], [1, 0], [0, 0]]) + l2 = slim.max_pool2d(l2_pad, [3, 3], stride=2, scope='L2', padding='VALID') + l3 = slim.max_pool2d(l2, [1, 1], stride=2, scope='L3', padding='VALID') + # Addition. + nn.relu(l1 + l3, name='output') + return g + + +def create_test_network_3(): + """Misaligned network for test. + + The graph corresponds to the example from the first figure in + go/cnn-rf-computation#arbitrary-computation-graphs + + Returns: + g: Tensorflow graph object (Graph proto). + """ + g = ops.Graph() + with g.as_default(): + # An 8x8 test image. + x = array_ops.placeholder(dtypes.float32, (1, 8, 8, 1), name='input_image') + # Left branch. + l1_pad = array_ops.pad(x, [[0, 0], [2, 1], [2, 1], [0, 0]]) + l1 = slim.conv2d(l1_pad, 1, [5, 5], stride=2, scope='L1', padding='VALID') + # Right branch. + l2 = slim.conv2d(x, 1, [3, 3], stride=1, scope='L2', padding='VALID') + l3 = slim.conv2d(l2, 1, [3, 3], stride=1, scope='L3', padding='VALID') + # Addition. + nn.relu(l1 + l3, name='output') + return g + + +def create_test_network_4(): + """Misaligned network for test. + + The graph corresponds to a variation from the example from the second figure + in go/cnn-rf-computation#arbitrary-computation-graphs. Layer 2 uses 'SAME' + padding, which makes its padding dependent on the input image dimensionality. + In this case, the effective padding will be undetermined, and the utility is + not able to check the network alignment. + + Returns: + g: Tensorflow graph object (Graph proto). + """ + g = ops.Graph() + with g.as_default(): + # An 8x8 test image. + x = array_ops.placeholder(dtypes.float32, (1, 8, 8, 1), name='input_image') + # Left branch. + l1 = slim.conv2d(x, 1, [1, 1], stride=4, scope='L1', padding='VALID') + # Right branch. + l2 = slim.conv2d(x, 1, [3, 3], stride=2, scope='L2', padding='SAME') + l3 = slim.conv2d(l2, 1, [1, 1], stride=2, scope='L3', padding='VALID') + # Addition. + nn.relu(l1 + l3, name='output') + return g + + +def create_test_network_5(): + """Single-path network for testing non-square kernels. + + The graph is similar to the right branch of the graph from + create_test_network_1(), except that the kernel sizes are changed to be + non-square. + + Returns: + g: Tensorflow graph object (Graph proto). + """ + g = ops.Graph() + with g.as_default(): + # An 8x8 test image. + x = array_ops.placeholder(dtypes.float32, (1, 8, 8, 1), name='input_image') + # Two convolutional layers, where the first one has non-square kernel. + l1 = slim.conv2d(x, 1, [3, 5], stride=2, scope='L1', padding='VALID') + l2 = slim.conv2d(l1, 1, [3, 1], stride=2, scope='L2', padding='VALID') + # ReLU. + nn.relu(l2, name='output') + return g + + +class RfUtilsTest(test.TestCase): + + def testComputeRFFromGraphDefAligned(self): + graph_def = create_test_network_1().as_graph_def() + input_node = 'input_image' + output_node = 'output' + (receptive_field_x, receptive_field_y, effective_stride_x, + effective_stride_y, effective_padding_x, effective_padding_y) = ( + receptive_field.compute_receptive_field_from_graph_def( + graph_def, input_node, output_node)) + self.assertEqual(receptive_field_x, 3) + self.assertEqual(receptive_field_y, 3) + self.assertEqual(effective_stride_x, 4) + self.assertEqual(effective_stride_y, 4) + self.assertEqual(effective_padding_x, 1) + self.assertEqual(effective_padding_y, 1) + + def testComputeRFFromGraphDefAligned2(self): + graph_def = create_test_network_2().as_graph_def() + input_node = 'input_image' + output_node = 'output' + (receptive_field_x, receptive_field_y, effective_stride_x, + effective_stride_y, effective_padding_x, effective_padding_y) = ( + receptive_field.compute_receptive_field_from_graph_def( + graph_def, input_node, output_node)) + self.assertEqual(receptive_field_x, 3) + self.assertEqual(receptive_field_y, 3) + self.assertEqual(effective_stride_x, 4) + self.assertEqual(effective_stride_y, 4) + self.assertEqual(effective_padding_x, 1) + self.assertEqual(effective_padding_y, 1) + + def testComputeRFFromGraphDefUnaligned(self): + graph_def = create_test_network_3().as_graph_def() + input_node = 'input_image' + output_node = 'output' + with self.assertRaises(ValueError): + receptive_field.compute_receptive_field_from_graph_def( + graph_def, input_node, output_node) + + def testComputeRFFromGraphDefUnaligned2(self): + graph_def = create_test_network_4().as_graph_def() + input_node = 'input_image' + output_node = 'output' + (receptive_field_x, receptive_field_y, effective_stride_x, + effective_stride_y, effective_padding_x, effective_padding_y) = ( + receptive_field.compute_receptive_field_from_graph_def( + graph_def, input_node, output_node)) + self.assertEqual(receptive_field_x, 3) + self.assertEqual(receptive_field_y, 3) + self.assertEqual(effective_stride_x, 4) + self.assertEqual(effective_stride_y, 4) + self.assertEqual(effective_padding_x, None) + self.assertEqual(effective_padding_y, None) + + def testComputeRFFromGraphDefNonSquareRF(self): + graph_def = create_test_network_5().as_graph_def() + input_node = 'input_image' + output_node = 'output' + (receptive_field_x, receptive_field_y, effective_stride_x, + effective_stride_y, effective_padding_x, effective_padding_y) = ( + receptive_field.compute_receptive_field_from_graph_def( + graph_def, input_node, output_node)) + self.assertEqual(receptive_field_x, 5) + self.assertEqual(receptive_field_y, 7) + self.assertEqual(effective_stride_x, 4) + self.assertEqual(effective_stride_y, 4) + self.assertEqual(effective_padding_x, 0) + self.assertEqual(effective_padding_y, 0) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/summary/BUILD b/tensorflow/contrib/summary/BUILD new file mode 100644 index 00000000000..bc305022642 --- /dev/null +++ b/tensorflow/contrib/summary/BUILD @@ -0,0 +1,59 @@ +licenses(["notice"]) # Apache 2.0 + +exports_files([ + "LICENSE", +]) + +load( + "//tensorflow:tensorflow.bzl", + "py_test", + "tf_gen_op_wrapper_py", +) + +tf_gen_op_wrapper_py( + name = "gen_summary_ops", + out = "gen_summary_ops.py", + deps = ["//tensorflow/core:summary_ops_op_lib"], +) + +py_test( + name = "summary_ops_test", + srcs = ["summary_ops_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":summary_ops", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:platform", + "//tensorflow/python:training", + "//tensorflow/python/eager:context", + "//tensorflow/python/eager:test", + ], +) + +py_library( + name = "summary_ops", + srcs = ["summary_ops.py"], + srcs_version = "PY2AND3", + visibility = ["//tensorflow:internal"], + deps = [ + ":gen_summary_ops", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:summary_op_util", + "//tensorflow/python:training", + "//tensorflow/python/eager:context", + ], +) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) diff --git a/tensorflow/contrib/summary/summary_ops.py b/tensorflow/contrib/summary/summary_ops.py new file mode 100644 index 00000000000..05e627adf1c --- /dev/null +++ b/tensorflow/contrib/summary/summary_ops.py @@ -0,0 +1,159 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Operations to emit summaries.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.summary import gen_summary_ops +from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import summary_op_util +from tensorflow.python.training import training_util + + +# Name for a collection which is expected to have at most a single boolean +# Tensor. If this tensor is True the summary ops will record summaries. +_SHOULD_RECORD_SUMMARIES_NAME = "ShouldRecordSummaries" + + +def should_record_summaries(): + """Returns boolean Tensor which is true if summaries should be recorded.""" + should_record_collection = ops.get_collection(_SHOULD_RECORD_SUMMARIES_NAME) + if not should_record_collection: + return constant_op.constant(False) + if len(should_record_collection) != 1: + raise ValueError( + "More than one tensor specified for whether summaries " + "should be recorded: %s" % should_record_collection) + return should_record_collection[0] + + +# TODO(apassos) consider how to handle local step here. +def record_summaries_every_n_global_steps(n): + """Sets the should_record_summaries Tensor to true if global_step % n == 0.""" + collection_ref = ops.get_collection_ref(_SHOULD_RECORD_SUMMARIES_NAME) + collection_ref[:] = [training_util.get_global_step() % n == 0] + + +def always_record_summaries(): + """Sets the should_record_summaries Tensor to always true.""" + collection_ref = ops.get_collection_ref(_SHOULD_RECORD_SUMMARIES_NAME) + collection_ref[:] = [constant_op.constant(True)] + + +def never_record_summaries(): + """Sets the should_record_summaries Tensor to always false.""" + collection_ref = ops.get_collection_ref(_SHOULD_RECORD_SUMMARIES_NAME) + collection_ref[:] = [constant_op.constant(False)] + + +def create_summary_file_writer(logdir, + max_queue=None, + flush_secs=None, + filename_suffix=None): + """Creates a summary file writer in the current context.""" + if max_queue is None: + max_queue = constant_op.constant(10) + if flush_secs is None: + flush_secs = constant_op.constant(120) + if filename_suffix is None: + filename_suffix = constant_op.constant("") + resource = gen_summary_ops.summary_writer() + gen_summary_ops.create_summary_file_writer(resource, logdir, max_queue, + flush_secs, filename_suffix) + context.context().summary_writer_resource = resource + + +def _nothing(): + """Convenient else branch for when summaries do not record.""" + return + + +def generic(name, tensor, metadata, family=None): + """Writes a tensor summary if possible.""" + + def record(): + with summary_op_util.summary_scope( + name, family, values=[tensor]) as (tag, scope): + gen_summary_ops.write_summary(context.context().summary_writer_resource, + training_util.get_global_step(), tensor, + tag, metadata, name=scope) + return control_flow_ops.cond(should_record_summaries(), record, _nothing) + + +def scalar(name, tensor, family=None): + """Writes a scalar summary if possible.""" + + def record(): + with summary_op_util.summary_scope( + name, family, values=[tensor]) as (tag, scope): + gen_summary_ops.write_scalar_summary( + context.context().summary_writer_resource, + training_util.get_global_step(), tag, tensor, name=scope) + + return control_flow_ops.cond(should_record_summaries(), record, _nothing) + + +def histogram(name, tensor, family=None): + """Writes a histogram summary if possible.""" + + def record(): + with summary_op_util.summary_scope( + name, family, values=[tensor]) as (tag, scope): + gen_summary_ops.write_histogram_summary( + context.context().summary_writer_resource, + training_util.get_global_step(), tag, tensor, name=scope) + + return control_flow_ops.cond(should_record_summaries(), record, _nothing) + + +def image(name, tensor, bad_color=None, max_images=3, family=None): + """Writes an image summary if possible.""" + + def record(): + if bad_color is None: + bad_color_ = constant_op.constant([255, 0, 0, 255], dtype=dtypes.uint8) + with summary_op_util.summary_scope( + name, family, values=[tensor]) as (tag, scope): + gen_summary_ops.write_image_summary( + context.context().summary_writer_resource, + training_util.get_global_step(), tag, tensor, bad_color_, max_images, + name=scope) + + return control_flow_ops.cond(should_record_summaries(), record, _nothing) + + +def audio(name, tensor, sample_rate, max_outputs, family=None): + """Writes an audio summary if possible.""" + + def record(): + with summary_op_util.summary_scope( + name, family, values=[tensor]) as (tag, scope): + gen_summary_ops.write_audio_summary( + context.context().summary_writer_resource, + training_util.get_global_step(), + tag, + tensor, + sample_rate=sample_rate, + max_outputs=max_outputs, + name=scope) + + return control_flow_ops.cond(should_record_summaries(), record, _nothing) diff --git a/tensorflow/contrib/summary/summary_ops_test.py b/tensorflow/contrib/summary/summary_ops_test.py new file mode 100644 index 00000000000..56c1a16f7f0 --- /dev/null +++ b/tensorflow/contrib/summary/summary_ops_test.py @@ -0,0 +1,52 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tempfile + +from tensorflow.contrib.summary import summary_ops +from tensorflow.python.eager import test +from tensorflow.python.framework import test_util +from tensorflow.python.platform import gfile +from tensorflow.python.training import training_util + + +class TargetTest(test_util.TensorFlowTestCase): + + def testShouldRecordSummary(self): + self.assertFalse(summary_ops.should_record_summaries().numpy()) + summary_ops.always_record_summaries() + self.assertTrue(summary_ops.should_record_summaries().numpy()) + + def testSummaryOps(self): + training_util.get_or_create_global_step() + logdir = tempfile.mkdtemp() + summary_ops.create_summary_file_writer(logdir, max_queue=0) + summary_ops.always_record_summaries() + summary_ops.generic('tensor', 1, '') + summary_ops.scalar('scalar', 2.0) + summary_ops.histogram('histogram', [1.0]) + summary_ops.image('image', [[[[1.0]]]]) + summary_ops.audio('audio', [[1.0]], 1.0, 1) + # The working condition of the ops is tested in the C++ test so we just + # test here that we're calling them correctly. + self.assertTrue(gfile.Exists(logdir)) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 7c883ec9266..6748a765623 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -102,10 +102,12 @@ def _increase_eval_step_op(iterations_per_loop): use_locking=True) -def _tpu_job(run_config): +def _tpu_job(run_config, mode): # The tpu job is determined by the run_config. Right now, this method is # required as tpu_config is not part of the RunConfig. - return None if run_config.master in ['', 'local'] else 'tpu_worker' + master = (run_config.evaluation_master if mode == model_fn_lib.ModeKeys.EVAL + else run_config.master) + return None if master in ['', 'local'] else 'tpu_worker' def _is_running_on_cpu(use_tpu, mode, eval_batch_size): @@ -265,9 +267,9 @@ class TPUInfeedOutfeedSessionHook(session_run_hook.SessionRunHook): dequeue. """ - def __init__(self, run_config, enqueue_fn, dequeue_ops=None): + def __init__(self, run_config, mode, enqueue_fn, dequeue_ops=None): self._iterations = run_config.tpu_config.iterations_per_loop - self._tpu_job = _tpu_job(run_config) + self._tpu_job = _tpu_job(run_config, mode) self._enqueue_fn = enqueue_fn self._dequeue_ops = dequeue_ops @@ -899,7 +901,7 @@ class _EvalMetrics(object): """ num_shards = run_config.tpu_config.num_shards - job = _tpu_job(run_config) + job = _tpu_job(run_config, model_fn_lib.ModeKeys.EVAL) job_device = '' if job is None else ('/job:%s' % job) # For each i, dequeue_ops[i] is a list containing the tensors from all @@ -978,18 +980,20 @@ class TPUEstimator(estimator_lib.Estimator): Example (MNIST): ``` + # The metric Fn which runs on CPU. + def metric_fn(labels, logits): + predictions = tf.argmax(logits, 1) + return { + 'accuracy': tf.metrics.precision( + labels=labels, predictions=predictions), + } + + # Your model Fn which runs on TPU. def model_fn(features, labels, mode, config, params): ... logits = ... if mode = tf.estimator.ModeKeys.EVAL: - def metric_fn(labels, logits): - predictions = tf.argmax(logits, 1) - return { - 'precision': tf.metrics.precision( - labels=labels, predictions=predictions), - } - return tpu_estimator.TPUEstimatorSpec( mode=mode, loss=loss, @@ -1162,7 +1166,7 @@ class TPUEstimator(estimator_lib.Estimator): with ops.device('/device:CPU:0'): return input_fn(**kwargs) - job = _tpu_job(config) + job = _tpu_job(config, mode) def placement_function(index): if job is None: return '/replica:0/task:0/device:CPU:0' @@ -1190,13 +1194,14 @@ class TPUEstimator(estimator_lib.Estimator): # TODO(b/64607814): Ensure batch_axis works with nested structures. def _create_infeed_enqueue_ops_and_dequeue_fn(inputs_holder, run_config, - batch_axis): + batch_axis, mode): """Utility to convert input_fn to enqueue and dequeue fns for TPU. Args: inputs_holder: An `_InputsHolder` holding features and labels. run_config: A `RunConfig` instance. batch_axis: A python list of batch dimensions. + mode: ModeKeys Returns: A tuple of (dequeue_fn, enqueue_fn) @@ -1239,7 +1244,7 @@ def _create_infeed_enqueue_ops_and_dequeue_fn(inputs_holder, run_config, return infeed_queue.generate_enqueue_ops( sharded_inputs, tpu_ordinal_function=tpu_ordinal_function) else: - job = _tpu_job(run_config) + job = _tpu_job(run_config, mode) def placement_function(index): if job is None: return '/replica:0/task:0/device:CPU:0' @@ -1271,12 +1276,12 @@ def _augment_model_fn(model_fn, train_batch_size, eval_batch_size, use_tpu, num_shards=config.tpu_config.num_shards) dequeue_fn, enqueue_fn = _create_infeed_enqueue_ops_and_dequeue_fn( - inputs, config, batch_axis) + inputs, config, batch_axis, mode) if mode == model_fn_lib.ModeKeys.TRAIN: loss = _train_on_tpu_system(model_fn_wrapper, dequeue_fn) hooks = [ - TPUInfeedOutfeedSessionHook(config, enqueue_fn), + TPUInfeedOutfeedSessionHook(config, mode, enqueue_fn), training.LoggingTensorHook( {'loss': array_ops.identity(loss), 'step': training.get_global_step()}, @@ -1318,7 +1323,7 @@ def _augment_model_fn(model_fn, train_batch_size, eval_batch_size, use_tpu, eval_metric_ops.to_metric_metric_ops_for_tpu( config, dummy_update_op)) hooks = [ - TPUInfeedOutfeedSessionHook(config, enqueue_fn, eval_update_ops), + TPUInfeedOutfeedSessionHook(config, mode, enqueue_fn, eval_update_ops), ] return model_fn_lib.EstimatorSpec( diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index de9eb057e48..35394eeb877 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -122,6 +122,7 @@ load( "tf_additional_gpu_tracer_cuda_deps", "tf_pyclif_proto_library", "tf_jspb_proto_library", + "tf_nano_proto_library", ) load( "//tensorflow/core:platform/default/build_config_root.bzl", @@ -212,6 +213,15 @@ tf_jspb_proto_library( deps = [":protos_all_cc"], ) +tf_nano_proto_library( + name = "protos_all_nano_proto", + field_style = "accessors", + generate_equals = 1, + generate_intdefs = 1, + visibility = ["//visibility:public"], + deps = [":protos_all_cc"], +) + exports_files([ "framework/types.proto", ]) @@ -556,6 +566,7 @@ tf_gen_op_libs( "state_ops", "stateless_random_ops", "string_ops", + "summary_ops", "training_ops", ], ) @@ -766,6 +777,7 @@ cc_library( "//tensorflow/core/kernels:state", "//tensorflow/core/kernels:stateless_random_ops", "//tensorflow/core/kernels:string", + "//tensorflow/core/kernels:summary_kernels", "//tensorflow/core/kernels:training_ops", "//tensorflow/core/kernels:word2vec_kernels", ] + tf_additional_cloud_kernel_deps() + if_not_windows([ @@ -3079,7 +3091,6 @@ cc_test( srcs = ["example/example_parser_configuration_test.cc"], data = [":example_parser_configuration_testdata"], deps = [ - ":core", ":core_cpu", ":core_cpu_internal", ":direct_session_internal", diff --git a/tensorflow/core/example/feature_util.cc b/tensorflow/core/example/feature_util.cc index 6f3cc6c6c5d..f0593ede82f 100644 --- a/tensorflow/core/example/feature_util.cc +++ b/tensorflow/core/example/feature_util.cc @@ -18,77 +18,129 @@ limitations under the License. namespace tensorflow { namespace internal { - -::tensorflow::Feature& ExampleFeature(const string& name, - ::tensorflow::Example* example) { - ::tensorflow::Features* features = example->mutable_features(); - return (*features->mutable_feature())[name]; +Feature& ExampleFeature(const string& name, Example* example) { + return *GetFeature(name, example); } -} // namespace internal +} // namespace internal template <> -bool ExampleHasFeature(const string& name, - const Example& example) { - auto it = example.features().feature().find(name); - return (it != example.features().feature().end()) && +bool HasFeature<>(const string& key, const Features& features) { + return (features.feature().find(key) != features.feature().end()); +} + +template <> +bool HasFeature(const string& key, const Features& features) { + auto it = features.feature().find(key); + return (it != features.feature().end()) && (it->second.kind_case() == Feature::KindCase::kInt64List); } template <> -bool ExampleHasFeature(const string& name, const Example& example) { - auto it = example.features().feature().find(name); - return (it != example.features().feature().end()) && +bool HasFeature(const string& key, const Features& features) { + auto it = features.feature().find(key); + return (it != features.feature().end()) && (it->second.kind_case() == Feature::KindCase::kFloatList); } template <> -bool ExampleHasFeature(const string& name, const Example& example) { - auto it = example.features().feature().find(name); - return (it != example.features().feature().end()) && +bool HasFeature(const string& key, const Features& features) { + auto it = features.feature().find(key); + return (it != features.feature().end()) && (it->second.kind_case() == Feature::KindCase::kBytesList); } +bool HasFeatureList(const string& key, + const SequenceExample& sequence_example) { + auto& feature_list = sequence_example.feature_lists().feature_list(); + return (feature_list.find(key) != feature_list.end()); +} + template <> const protobuf::RepeatedField& GetFeatureValues( - const string& name, const Example& example) { - return example.features().feature().at(name).int64_list().value(); + const Feature& feature) { + return feature.int64_list().value(); } template <> protobuf::RepeatedField* GetFeatureValues( - const string& name, Example* example) { - return internal::ExampleFeature(name, example) - .mutable_int64_list() - ->mutable_value(); + Feature* feature) { + return feature->mutable_int64_list()->mutable_value(); } template <> const protobuf::RepeatedField& GetFeatureValues( - const string& name, const Example& example) { - return example.features().feature().at(name).float_list().value(); + const Feature& feature) { + return feature.float_list().value(); } template <> -protobuf::RepeatedField* GetFeatureValues(const string& name, - Example* example) { - return internal::ExampleFeature(name, example) - .mutable_float_list() - ->mutable_value(); +protobuf::RepeatedField* GetFeatureValues(Feature* feature) { + return feature->mutable_float_list()->mutable_value(); } template <> const protobuf::RepeatedPtrField& GetFeatureValues( - const string& name, const Example& example) { - return example.features().feature().at(name).bytes_list().value(); + const Feature& feature) { + return feature.bytes_list().value(); } template <> -protobuf::RepeatedPtrField* GetFeatureValues(const string& name, - Example* example) { - return internal::ExampleFeature(name, example) - .mutable_bytes_list() - ->mutable_value(); +protobuf::RepeatedPtrField* GetFeatureValues(Feature* feature) { + return feature->mutable_bytes_list()->mutable_value(); } +const protobuf::RepeatedPtrField& GetFeatureList( + const string& key, const SequenceExample& sequence_example) { + return sequence_example.feature_lists().feature_list().at(key).feature(); +} + +protobuf::RepeatedPtrField* GetFeatureList( + const string& feature_list_key, SequenceExample* sequence_example) { + return (*sequence_example->mutable_feature_lists() + ->mutable_feature_list())[feature_list_key] + .mutable_feature(); +} + +template <> +Features* GetFeatures(Features* proto) { + return proto; +} + +template <> +Features* GetFeatures(Example* proto) { + return proto->mutable_features(); +} + +template <> +const Features& GetFeatures(const Features& proto) { + return proto; +} + +template <> +const Features& GetFeatures(const Example& proto) { + return proto.features(); +} + +template <> +const protobuf::RepeatedField& GetFeatureValues( + const Feature& feature); + +template <> +protobuf::RepeatedField* GetFeatureValues( + Feature* feature); + +template <> +const protobuf::RepeatedField& GetFeatureValues( + const Feature& feature); + +template <> +protobuf::RepeatedField* GetFeatureValues(Feature* feature); + +template <> +const protobuf::RepeatedPtrField& GetFeatureValues( + const Feature& feature); + +template <> +protobuf::RepeatedPtrField* GetFeatureValues(Feature* feature); } // namespace tensorflow diff --git a/tensorflow/core/example/feature_util.h b/tensorflow/core/example/feature_util.h index 4004411cb17..a87c2c9a57c 100644 --- a/tensorflow/core/example/feature_util.h +++ b/tensorflow/core/example/feature_util.h @@ -13,9 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// A set of lightweight wrappers which simplify access to Example features. +// A set of lightweight wrappers which simplify access to Feature protos. // // TensorFlow Example proto uses associative maps on top of oneof fields. +// SequenceExample proto uses associative map of FeatureList. // So accessing feature values is not very convenient. // // For example, to read a first value of integer feature "tag": @@ -42,9 +43,59 @@ limitations under the License. // (RepeatedPtrField for byte list). So refer to its documentation of // RepeatedField for full list of supported methods. // -// NOTE: It is also important to mention that due to the nature of oneof proto -// fields setting a feature of one type automatically clears all values stored -// as another type with the same feature name. +// NOTE: Due to the nature of oneof proto fields setting a feature of one type +// automatically clears all values stored as another type with the same feature +// key. +// +// This library also has tools to work with SequenceExample protos. +// +// To get a value from SequenceExample.context: +// int id = GetFeatureValues("tag", se.context()).Get(0); +// To add a value to the context: +// GetFeatureValues("tag", se.mutable_context())->Add(42); +// +// To add values to feature_lists: +// AppendFeatureValues({4.0}, +// GetFeatureList("movie_ratings", &se)->Add()); +// AppendFeatureValues({5.0, 3.0}, +// GetFeatureList("movie_ratings", &se)->Add()); +// This will create a feature list keyed as "images" with two features: +// feature_lists { +// feature_list { +// key: "images" +// value { +// feature { float_list { value: [4.0] } } +// feature { float_list { value: [5.0, 3.0] } } +// } +// } } +// +// Functions exposed by this library: +// HasFeature<[FeatureType]>(key, proto) -> bool +// Returns true if a feature with the specified key, and optionally +// FeatureType, belongs to the Features or Example proto. +// HasFeatureList(key, sequence_example) -> bool +// Returns true if SequenceExample has a feature_list with the key. +// GetFeatureValues(key, proto) -> RepeatedField +// Returns values for the specified key and the FeatureType. +// Supported types for the proto: Example, Features. +// GetFeatureList(key, sequence_example) -> RepeatedPtrField +// Returns Feature protos associated with a key. +// AppendFeatureValues(begin, end, feature) +// AppendFeatureValues(container or initializer_list, feature) +// Copies values into a Feature. +// AppendFeatureValues(begin, end, key, proto) +// AppendFeatureValues(container or initializer_list, key, proto) +// Copies values into Features and Example protos with the specified key. +// +// Auxiliary functions, it is unlikely you'll need to use them directly: +// GetFeatures(proto) -> Features +// A convenience function to get Features proto. +// Supported types for the proto: Example, Features. +// GetFeature(key, proto) -> Feature* +// Returns a Feature proto for the specified key, creates a new if +// necessary. Supported types for the proto: Example, Features. +// GetFeatureValues(feature) -> RepeatedField +// Returns values of the feature for the FeatureType. #ifndef TENSORFLOW_EXAMPLE_FEATURE_H_ #define TENSORFLOW_EXAMPLE_FEATURE_H_ @@ -62,10 +113,11 @@ namespace tensorflow { namespace internal { +// DEPRECATED: Use GetFeature instead. +// TODO(gorban): Update all clients in a followup CL. // Returns a reference to a feature corresponding to the name. // Note: it will create a new Feature if it is missing in the example. -::tensorflow::Feature& ExampleFeature(const string& name, - ::tensorflow::Example* example); +Feature& ExampleFeature(const string& name, Example* example); // Specializations of RepeatedFieldTrait define a type of RepeatedField // corresponding to a selected feature type. @@ -127,89 +179,135 @@ struct FeatureTrait< } // namespace internal -// Returns true if feature with the specified name belongs to the example proto. -// Doesn't check feature type. Note that specialized versions return false if -// the feature has a wrong type. -template -bool ExampleHasFeature(const string& name, const Example& example) { - return example.features().feature().find(name) != - example.features().feature().end(); -} +// Returns true if sequence_example has a feature_list with the specified key. +bool HasFeatureList(const string& key, const SequenceExample& sequence_example); + +// A family of template functions to return mutable Features proto from a +// container proto. Supported ProtoTypes: Example, Features. +template +Features* GetFeatures(ProtoType* proto); + +template +const Features& GetFeatures(const ProtoType& proto); // Base declaration of a family of template functions to return a read only -// repeated field corresponding to a feature with the specified name. +// repeated field of feature values. template const typename internal::RepeatedFieldTrait::Type& -GetFeatureValues(const string& name, const Example& example); +GetFeatureValues(const Feature& feature); -// Base declaration of a family of template functions to return a mutable -// repeated field corresponding to a feature with the specified name. +// Returns a read only repeated field corresponding to a feature with the +// specified name and FeatureType. Supported ProtoTypes: Example, Features. +template +const typename internal::RepeatedFieldTrait::Type& +GetFeatureValues(const string& key, const ProtoType& proto) { + return GetFeatureValues(GetFeatures(proto).feature().at(key)); +} + +// Returns a mutable repeated field of a feature values. template typename internal::RepeatedFieldTrait::Type* GetFeatureValues( - const string& name, Example* example); + Feature* feature); + +// Returns a mutable repeated field corresponding to a feature with the +// specified name and FeatureType. Supported ProtoTypes: Example, Features. +template +typename internal::RepeatedFieldTrait::Type* GetFeatureValues( + const string& key, ProtoType* proto) { + ::tensorflow::Feature& feature = + (*GetFeatures(proto)->mutable_feature())[key]; + return GetFeatureValues(&feature); +} + +// Returns a Feature proto for the specified key, creates a new if necessary. +// Supported types for the proto: Example, Features. +template +Feature* GetFeature(const string& key, ProtoType* proto) { + return &(*GetFeatures(proto)->mutable_feature())[key]; +} + +// Returns a repeated field with features corresponding to a feature_list key. +const protobuf::RepeatedPtrField& GetFeatureList( + const string& key, const SequenceExample& sequence_example); + +// Returns a mutable repeated field with features corresponding to a +// feature_list key. It will create a new FeatureList if necessary. +protobuf::RepeatedPtrField* GetFeatureList( + const string& feature_list_key, SequenceExample* sequence_example); -// Copies elements from the range, defined by [first, last) into a feature. template void AppendFeatureValues(IteratorType first, IteratorType last, - const string& name, Example* example) { + Feature* feature) { using FeatureType = typename internal::FeatureTrait< typename std::iterator_traits::value_type>::Type; - std::copy(first, last, protobuf::RepeatedFieldBackInserter( - GetFeatureValues(name, example))); + std::copy(first, last, + protobuf::RepeatedFieldBackInserter( + GetFeatureValues(feature))); +} + +template +void AppendFeatureValues(std::initializer_list container, + Feature* feature) { + AppendFeatureValues(container.begin(), container.end(), feature); +} + +template +void AppendFeatureValues(const ContainerType& container, Feature* feature) { + using IteratorType = typename ContainerType::const_iterator; + AppendFeatureValues(container.begin(), container.end(), + feature); +} + +// Copies elements from the range, defined by [first, last) into the feature +// obtainable from the (proto, key) combination. +template +void AppendFeatureValues(IteratorType first, IteratorType last, + const string& key, ProtoType* proto) { + AppendFeatureValues(first, last, GetFeature(key, GetFeatures(proto))); } // Copies all elements from the container into a feature. -template -void AppendFeatureValues(const ContainerType& container, const string& name, - Example* example) { +template +void AppendFeatureValues(const ContainerType& container, const string& key, + ProtoType* proto) { using IteratorType = typename ContainerType::const_iterator; - AppendFeatureValues(container.begin(), container.end(), name, - example); + AppendFeatureValues(container.begin(), container.end(), key, + proto); } -// Copies all elements from the initializer list into a feature. -template +// Copies all elements from the initializer list into a Feature contained by +// Features or Example proto. +template void AppendFeatureValues(std::initializer_list container, - const string& name, Example* example) { + const string& key, ProtoType* proto) { using IteratorType = typename std::initializer_list::const_iterator; - AppendFeatureValues(container.begin(), container.end(), name, - example); + AppendFeatureValues(container.begin(), container.end(), key, + proto); } -template <> -bool ExampleHasFeature(const string& name, - const Example& example); +// Returns true if a feature with the specified key belongs to the Features. +// The template parameter pack accepts zero or one template argument - which +// is FeatureType. If the FeatureType not specified (zero template arguments) +// the function will not check the feature type. Otherwise it will return false +// if the feature has a wrong type. +template +bool HasFeature(const string& key, const Features& features); -template <> -bool ExampleHasFeature(const string& name, const Example& example); +// Returns true if a feature with the specified key belongs to the Example. +// Doesn't check feature type if used without FeatureType, otherwise the +// specialized versions return false if the feature has a wrong type. +template +bool HasFeature(const string& key, const Example& example) { + return HasFeature(key, GetFeatures(example)); +}; -template <> -bool ExampleHasFeature(const string& name, const Example& example); - -template <> -const protobuf::RepeatedField& GetFeatureValues( - const string& name, const Example& example); - -template <> -protobuf::RepeatedField* GetFeatureValues( - const string& name, Example* example); - -template <> -const protobuf::RepeatedField& GetFeatureValues( - const string& name, const Example& example); - -template <> -protobuf::RepeatedField* GetFeatureValues(const string& name, - Example* example); - -template <> -const protobuf::RepeatedPtrField& GetFeatureValues( - const string& name, const Example& example); - -template <> -protobuf::RepeatedPtrField* GetFeatureValues(const string& name, - Example* example); +// DEPRECATED: use HasFeature instead. +// TODO(gorban): update all clients in a followup CL. +template +bool ExampleHasFeature(const string& key, const Example& example) { + return HasFeature(key, example); +} } // namespace tensorflow #endif // TENSORFLOW_EXAMPLE_FEATURE_H_ diff --git a/tensorflow/core/example/feature_util_test.cc b/tensorflow/core/example/feature_util_test.cc index eb7b90af1b2..cd32dee306d 100644 --- a/tensorflow/core/example/feature_util_test.cc +++ b/tensorflow/core/example/feature_util_test.cc @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ - #include "tensorflow/core/example/feature_util.h" #include @@ -38,6 +37,16 @@ TEST(GetFeatureValuesInt64Test, ReadsASingleValue) { EXPECT_EQ(42, tag.Get(0)); } +TEST(GetFeatureValuesInt64Test, ReadsASingleValueFromFeature) { + Feature feature; + feature.mutable_int64_list()->add_value(42); + + auto values = GetFeatureValues(feature); + + ASSERT_EQ(1, values.size()); + EXPECT_EQ(42, values.Get(0)); +} + TEST(GetFeatureValuesInt64Test, WritesASingleValue) { Example example; @@ -48,25 +57,33 @@ TEST(GetFeatureValuesInt64Test, WritesASingleValue) { EXPECT_EQ(42, example.features().feature().at("tag").int64_list().value(0)); } +TEST(GetFeatureValuesInt64Test, WritesASingleValueToFeature) { + Feature feature; + + GetFeatureValues(&feature)->Add(42); + + ASSERT_EQ(1, feature.int64_list().value_size()); + EXPECT_EQ(42, feature.int64_list().value(0)); +} + TEST(GetFeatureValuesInt64Test, CheckUntypedFieldExistence) { Example example; - - EXPECT_FALSE(ExampleHasFeature("tag", example)); + ASSERT_FALSE(HasFeature("tag", example)); GetFeatureValues("tag", &example)->Add(0); - EXPECT_TRUE(ExampleHasFeature("tag", example)); + EXPECT_TRUE(HasFeature("tag", example)); } TEST(GetFeatureValuesInt64Test, CheckTypedFieldExistence) { Example example; GetFeatureValues("tag", &example)->Add(3.14); - ASSERT_FALSE(ExampleHasFeature("tag", example)); + ASSERT_FALSE(HasFeature("tag", example)); GetFeatureValues("tag", &example)->Add(42); - EXPECT_TRUE(ExampleHasFeature("tag", example)); + EXPECT_TRUE(HasFeature("tag", example)); auto tag_ro = GetFeatureValues("tag", example); ASSERT_EQ(1, tag_ro.size()); EXPECT_EQ(42, tag_ro.Get(0)); @@ -87,6 +104,16 @@ TEST(GetFeatureValuesInt64Test, CopyIterableToAField) { EXPECT_EQ(3, tag_ro.Get(2)); } +TEST(GetFeatureValuesFloatTest, ReadsASingleValueFromFeature) { + Feature feature; + feature.mutable_float_list()->add_value(3.14); + + auto values = GetFeatureValues(feature); + + ASSERT_EQ(1, values.size()); + EXPECT_NEAR(3.14, values.Get(0), kTolerance); +} + TEST(GetFeatureValuesFloatTest, ReadsASingleValue) { Example example; (*example.mutable_features()->mutable_feature())["tag"] @@ -99,6 +126,15 @@ TEST(GetFeatureValuesFloatTest, ReadsASingleValue) { EXPECT_NEAR(3.14, tag.Get(0), kTolerance); } +TEST(GetFeatureValuesFloatTest, WritesASingleValueToFeature) { + Feature feature; + + GetFeatureValues(&feature)->Add(3.14); + + ASSERT_EQ(1, feature.float_list().value_size()); + EXPECT_NEAR(3.14, feature.float_list().value(0), kTolerance); +} + TEST(GetFeatureValuesFloatTest, WritesASingleValue) { Example example; @@ -114,6 +150,20 @@ TEST(GetFeatureValuesFloatTest, WritesASingleValue) { TEST(GetFeatureValuesFloatTest, CheckTypedFieldExistence) { Example example; + GetFeatureValues("tag", &example)->Add(42); + ASSERT_FALSE(HasFeature("tag", example)); + + GetFeatureValues("tag", &example)->Add(3.14); + + EXPECT_TRUE(HasFeature("tag", example)); + auto tag_ro = GetFeatureValues("tag", example); + ASSERT_EQ(1, tag_ro.size()); + EXPECT_NEAR(3.14, tag_ro.Get(0), kTolerance); +} + +TEST(GetFeatureValuesFloatTest, CheckTypedFieldExistenceForDeprecatedMethod) { + Example example; + GetFeatureValues("tag", &example)->Add(42); ASSERT_FALSE(ExampleHasFeature("tag", example)); @@ -125,6 +175,16 @@ TEST(GetFeatureValuesFloatTest, CheckTypedFieldExistence) { EXPECT_NEAR(3.14, tag_ro.Get(0), kTolerance); } +TEST(GetFeatureValuesStringTest, ReadsASingleValueFromFeature) { + Feature feature; + feature.mutable_bytes_list()->add_value("FOO"); + + auto values = GetFeatureValues(feature); + + ASSERT_EQ(1, values.size()); + EXPECT_EQ("FOO", values.Get(0)); +} + TEST(GetFeatureValuesStringTest, ReadsASingleValue) { Example example; (*example.mutable_features()->mutable_feature())["tag"] @@ -137,6 +197,15 @@ TEST(GetFeatureValuesStringTest, ReadsASingleValue) { EXPECT_EQ("FOO", tag.Get(0)); } +TEST(GetFeatureValuesStringTest, WritesASingleValueToFeature) { + Feature feature; + + *GetFeatureValues(&feature)->Add() = "FOO"; + + ASSERT_EQ(1, feature.bytes_list().value_size()); + EXPECT_EQ("FOO", feature.bytes_list().value(0)); +} + TEST(GetFeatureValuesStringTest, WritesASingleValue) { Example example; @@ -148,15 +217,15 @@ TEST(GetFeatureValuesStringTest, WritesASingleValue) { example.features().feature().at("tag").bytes_list().value(0)); } -TEST(GetFeatureValuesBytesTest, CheckTypedFieldExistence) { +TEST(GetFeatureValuesStringTest, CheckTypedFieldExistence) { Example example; GetFeatureValues("tag", &example)->Add(42); - ASSERT_FALSE(ExampleHasFeature("tag", example)); + ASSERT_FALSE(HasFeature("tag", example)); *GetFeatureValues("tag", &example)->Add() = "FOO"; - EXPECT_TRUE(ExampleHasFeature("tag", example)); + EXPECT_TRUE(HasFeature("tag", example)); auto tag_ro = GetFeatureValues("tag", example); ASSERT_EQ(1, tag_ro.size()); EXPECT_EQ("FOO", tag_ro.Get(0)); @@ -228,5 +297,146 @@ TEST(AppendFeatureValuesTest, StringVariablesUsingInitializerList) { EXPECT_EQ("BAZ", tag_ro.Get(2)); } +TEST(SequenceExampleTest, ReadsASingleValueFromContext) { + SequenceExample se; + (*se.mutable_context()->mutable_feature())["tag"] + .mutable_int64_list() + ->add_value(42); + + auto values = GetFeatureValues("tag", se.context()); + + ASSERT_EQ(1, values.size()); + EXPECT_EQ(42, values.Get(0)); +} + +TEST(SequenceExampleTest, WritesASingleValueToContext) { + SequenceExample se; + + GetFeatureValues("tag", se.mutable_context())->Add(42); + + ASSERT_EQ(1, se.context().feature().at("tag").int64_list().value_size()); + EXPECT_EQ(42, se.context().feature().at("tag").int64_list().value(0)); +} + +TEST(SequenceExampleTest, AppendFeatureValuesToContextSingleArg) { + SequenceExample se; + + AppendFeatureValues({1.1, 2.2, 3.3}, "tag", se.mutable_context()); + + auto tag_ro = GetFeatureValues("tag", se.context()); + ASSERT_EQ(3, tag_ro.size()); + EXPECT_NEAR(1.1, tag_ro.Get(0), kTolerance); + EXPECT_NEAR(2.2, tag_ro.Get(1), kTolerance); + EXPECT_NEAR(3.3, tag_ro.Get(2), kTolerance); +} + +TEST(SequenceExampleTest, CheckTypedFieldExistence) { + SequenceExample se; + + GetFeatureValues("tag", se.mutable_context())->Add(3.14); + ASSERT_FALSE(HasFeature("tag", se.context())); + + GetFeatureValues("tag", se.mutable_context())->Add(42); + + EXPECT_TRUE(HasFeature("tag", se.context())); + auto tag_ro = GetFeatureValues("tag", se.context()); + ASSERT_EQ(1, tag_ro.size()); + EXPECT_EQ(42, tag_ro.Get(0)); +} + +TEST(SequenceExampleTest, ReturnsExistingFeatureLists) { + SequenceExample se; + (*se.mutable_feature_lists()->mutable_feature_list())["tag"] + .mutable_feature() + ->Add(); + + auto feature = GetFeatureList("tag", se); + + ASSERT_EQ(1, feature.size()); +} + +TEST(SequenceExampleTest, CreatesNewFeatureLists) { + SequenceExample se; + + GetFeatureList("tag", &se)->Add(); + + EXPECT_EQ(1, se.feature_lists().feature_list().at("tag").feature_size()); +} + +TEST(SequenceExampleTest, CheckFeatureListExistence) { + SequenceExample se; + ASSERT_FALSE(HasFeatureList("tag", se)); + + GetFeatureList("tag", &se)->Add(); + + ASSERT_TRUE(HasFeatureList("tag", se)); +} + +TEST(SequenceExampleTest, AppendFeatureValuesWithInitializerList) { + SequenceExample se; + + AppendFeatureValues({1, 2, 3}, "ids", se.mutable_context()); + AppendFeatureValues({"cam1-0", "cam2-0"}, + GetFeatureList("images", &se)->Add()); + AppendFeatureValues({"cam1-1", "cam2-2"}, + GetFeatureList("images", &se)->Add()); + + EXPECT_EQ(se.DebugString(), + "context {\n" + " feature {\n" + " key: \"ids\"\n" + " value {\n" + " int64_list {\n" + " value: 1\n" + " value: 2\n" + " value: 3\n" + " }\n" + " }\n" + " }\n" + "}\n" + "feature_lists {\n" + " feature_list {\n" + " key: \"images\"\n" + " value {\n" + " feature {\n" + " bytes_list {\n" + " value: \"cam1-0\"\n" + " value: \"cam2-0\"\n" + " }\n" + " }\n" + " feature {\n" + " bytes_list {\n" + " value: \"cam1-1\"\n" + " value: \"cam2-2\"\n" + " }\n" + " }\n" + " }\n" + " }\n" + "}\n"); +} + +TEST(SequenceExampleTest, AppendFeatureValuesWithVectors) { + SequenceExample se; + + std::vector readings{1.0, 2.5, 5.0}; + AppendFeatureValues(readings, GetFeatureList("movie_ratings", &se)->Add()); + + EXPECT_EQ(se.DebugString(), + "feature_lists {\n" + " feature_list {\n" + " key: \"movie_ratings\"\n" + " value {\n" + " feature {\n" + " float_list {\n" + " value: 1\n" + " value: 2.5\n" + " value: 5\n" + " }\n" + " }\n" + " }\n" + " }\n" + "}\n"); +} + } // namespace } // namespace tensorflow diff --git a/tensorflow/core/framework/cancellation.cc b/tensorflow/core/framework/cancellation.cc index 1cbed62939f..9da4828bbad 100644 --- a/tensorflow/core/framework/cancellation.cc +++ b/tensorflow/core/framework/cancellation.cc @@ -23,7 +23,9 @@ namespace tensorflow { const CancellationToken CancellationManager::kInvalidToken = -1; CancellationManager::CancellationManager() - : is_cancelling_(false), is_cancelled_(0), next_cancellation_token_(0) {} + : is_cancelling_(false), + is_cancelled_(false), + next_cancellation_token_(0) {} void CancellationManager::StartCancel() { gtl::FlatMap callbacks_to_run; diff --git a/tensorflow/core/framework/function.cc b/tensorflow/core/framework/function.cc index c2d3f37ab30..b788d6b7778 100644 --- a/tensorflow/core/framework/function.cc +++ b/tensorflow/core/framework/function.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/core/framework/function.h" +#include #include #include #include @@ -271,12 +272,17 @@ class FunctionInstantiationHelper { int nid = -1; const string node_name = input.substr(1); const string node_colon = node_name + ":"; - for (const auto& p : index_) { - if (p.first == node_name || - tensorflow::StringPiece(p.first).starts_with(node_colon)) { - nid = p.second.nid; + const string node_colon_bound = node_name + ";"; + // index_ is a map sorted lexicographically, so the key we are looking for + // must lie in the range [node_name, node_colon_bound). + auto it = index_.lower_bound(node_name); + while (it != index_.end() && it->first <= node_colon_bound) { + if (it->first == node_name || + tensorflow::StringPiece(it->first).starts_with(node_colon)) { + nid = it->second.nid; break; } + ++it; } if (nid == -1) { return errors::InvalidArgument("input[", i, "] == '", input, @@ -421,7 +427,7 @@ class FunctionInstantiationHelper { GetFunctionSignature get_function_; InstantiationResult& result_; // A small index for all names that can be used as a node's input arguments. - std::unordered_map index_; + std::map index_; // This contains information about a node in the new graph including the node // names and input nodes' indexes. struct NodeInfo { diff --git a/tensorflow/core/framework/variant_op_registry.cc b/tensorflow/core/framework/variant_op_registry.cc index 11756c356aa..9cc7530459e 100644 --- a/tensorflow/core/framework/variant_op_registry.cc +++ b/tensorflow/core/framework/variant_op_registry.cc @@ -88,7 +88,17 @@ bool DecodeUnaryVariant(Variant* variant) { if (decode_fn == nullptr) { return false; } - return (*decode_fn)(variant); + const string type_name = variant->TypeName(); + bool decoded = (*decode_fn)(variant); + if (!decoded) return false; + if (variant->TypeName() != type_name) { + LOG(ERROR) << "DecodeUnaryVariant: Variant type_name before decoding was: " + << type_name + << " but after decoding was: " << variant->TypeName() + << ". Treating this as a failure."; + return false; + } + return true; } // Add some basic registrations for use by others, e.g., for testing. @@ -101,15 +111,59 @@ string MaybeRemoveTFPrefix(const StringPiece& str) { } // namespace #define REGISTER_VARIANT_DECODE_TYPE(T) \ - REGISTER_UNARY_VARIANT_DECODE_FUNCTION(T, MaybeRemoveTFPrefix(TF_STR(T))); + REGISTER_UNARY_VARIANT_DECODE_FUNCTION(T, TF_STR(T)); // No encode/decode registered for std::complex<> and Eigen::half // objects yet. -TF_CALL_INTEGRAL_TYPES(REGISTER_VARIANT_DECODE_TYPE); -TF_CALL_float(REGISTER_VARIANT_DECODE_TYPE); -TF_CALL_double(REGISTER_VARIANT_DECODE_TYPE); -TF_CALL_bool(REGISTER_VARIANT_DECODE_TYPE); +REGISTER_VARIANT_DECODE_TYPE(int); +REGISTER_VARIANT_DECODE_TYPE(float); +REGISTER_VARIANT_DECODE_TYPE(bool); +REGISTER_VARIANT_DECODE_TYPE(double); #undef REGISTER_VARIANT_DECODE_TYPE +// Special casing ZerosLikeFn per device. +UnaryVariantOpRegistry::VariantZerosLikeFn* +UnaryVariantOpRegistry::GetZerosLikeFn(const string& device, + const string& type_name) { + auto found = zeros_like_fns.find(std::make_pair(device, type_name)); + if (found == zeros_like_fns.end()) return nullptr; + return &found->second; +} + +void UnaryVariantOpRegistry::RegisterZerosLikeFn( + const string& device, const string& type_name, + const VariantZerosLikeFn& zeros_like_fn) { + CHECK(!type_name.empty()) << "Need a valid name for UnaryVariantZerosLike"; + VariantZerosLikeFn* existing = GetZerosLikeFn(device, type_name); + CHECK_EQ(existing, nullptr) + << "Unary VariantZerosLikeFn for type_name: " << type_name + << " already registered for device type: " << device; + zeros_like_fns.insert( + std::pair, VariantZerosLikeFn>( + std::make_pair(device, type_name), zeros_like_fn)); +} + +namespace { + +template +Status ZerosLikeVariantPrimitiveType(OpKernelContext* ctx, const T& t, + T* t_out) { + *t_out = T(0); + return Status::OK(); +} +} // namespace + +#define REGISTER_VARIANT_ZEROS_LIKE_TYPE(T) \ + REGISTER_UNARY_VARIANT_ZEROS_LIKE_FUNCTION( \ + DEVICE_CPU, T, TF_STR(T), ZerosLikeVariantPrimitiveType); + +// No zeros_like registered for std::complex<> or Eigen::half objects yet. +REGISTER_VARIANT_ZEROS_LIKE_TYPE(int); +REGISTER_VARIANT_ZEROS_LIKE_TYPE(float); +REGISTER_VARIANT_ZEROS_LIKE_TYPE(double); +REGISTER_VARIANT_ZEROS_LIKE_TYPE(bool); + +#undef REGISTER_VARIANT_ZEROS_LIKE_TYPE + } // namespace tensorflow diff --git a/tensorflow/core/framework/variant_op_registry.h b/tensorflow/core/framework/variant_op_registry.h index 389b049fa01..37e54f82c0f 100644 --- a/tensorflow/core/framework/variant_op_registry.h +++ b/tensorflow/core/framework/variant_op_registry.h @@ -19,11 +19,13 @@ limitations under the License. #include #include +#include "tensorflow/core/framework/types.h" #include "tensorflow/core/framework/variant.h" #include "tensorflow/core/framework/variant_encode_decode.h" namespace tensorflow { +class OpKernelContext; // A global UnaryVariantOpRegistry is used to hold callback functions // for different variant types. To be used by ShapeOp, RankOp, and // SizeOp, decoding, etc. @@ -32,6 +34,8 @@ class UnaryVariantOpRegistry { public: typedef std::function VariantShapeFn; typedef std::function VariantDecodeFn; + typedef std::function + VariantZerosLikeFn; // Add a shape lookup function to the registry. void RegisterShapeFn(const string& type_name, const VariantShapeFn& shape_fn); @@ -46,11 +50,29 @@ class UnaryVariantOpRegistry { // Returns nullptr if no decode function was found for the given TypeName. VariantDecodeFn* GetDecodeFn(const string& type_name); + // Add a zeros-like function to the registry. + void RegisterZerosLikeFn(const string& device, const string& type_name, + const VariantZerosLikeFn& zeros_like_fn); + + // Returns nullptr if no zeros-like function was found for the given + // device and TypeName. + VariantZerosLikeFn* GetZerosLikeFn(const string& device, + const string& type_name); + static UnaryVariantOpRegistry* Global(); private: std::unordered_map shape_fns; std::unordered_map decode_fns; + // Map std::pair to function. + struct PairHash { + template + std::size_t operator()(const std::pair& x) const { + return std::hash()(x.first) ^ std::hash()(x.second); + } + }; + std::unordered_map, VariantZerosLikeFn, PairHash> + zeros_like_fns; }; // Gets a TensorShape from a Tensor containing a scalar Variant. @@ -72,6 +94,28 @@ Status GetUnaryVariantShape(const Tensor& variant_tensor, TensorShape* shape); // bool DecodeUnaryVariant(Variant* variant); +// Sets *z_out = zeros_like(v). The variant v must have a registered +// ZerosLike function for the given Device. Returns an Internal error +// if v does not have a registered zeros_like function for this device, or if +// ZerosLike fails. +// +// REQUIRES: +// v_out is not null. +// +template +Status CreateZerosLikeVariant(OpKernelContext* ctx, const Variant& v, + Variant* v_out) { + const string& device = DeviceName::value; + UnaryVariantOpRegistry::VariantZerosLikeFn* zeros_like_fn = + UnaryVariantOpRegistry::Global()->GetZerosLikeFn(device, v.TypeName()); + if (zeros_like_fn == nullptr) { + return errors::Internal( + "No unary variant zeros_like function found for Variant type_name: ", + v.TypeName(), " for device type: ", device); + } + return (*zeros_like_fn)(ctx, v, v_out); +} + namespace variant_op_registry_fn_registration { template @@ -120,6 +164,34 @@ class UnaryVariantDecodeRegistration { } }; +template +class UnaryVariantZerosLikeRegistration { + typedef std::function + LocalVariantZerosLikeFn; + + public: + UnaryVariantZerosLikeRegistration( + const string& device, const string& type_name, + const LocalVariantZerosLikeFn& zeros_like_fn) { + auto wrapped_fn = [type_name, zeros_like_fn](OpKernelContext* ctx, + const Variant& v, + Variant* v_out) -> Status { + CHECK_NOTNULL(v_out); + *v_out = T(); + if (v.get() == nullptr) { + return errors::Internal( + "VariantZerosLikeFn: Could not access object, type_name: ", + type_name); + } + const T& t = *v.get(); + T* t_out = v_out->get(); + return zeros_like_fn(ctx, t, t_out); + }; + UnaryVariantOpRegistry::Global()->RegisterZerosLikeFn(device, type_name, + wrapped_fn); + } +}; + }; // namespace variant_op_registry_fn_registration // Register a unary shape variant function with the signature: @@ -151,6 +223,26 @@ class UnaryVariantDecodeRegistration { T> \ register_unary_variant_op_decoder_fn_##ctr(type_name) +// Register a unary zeros_like variant function with the signature: +// Status ZerosLikeFn(OpKernelContext* ctx, const T& t, T* t_out); +// to Variants having TypeName type_name, for device string device. +#define REGISTER_UNARY_VARIANT_ZEROS_LIKE_FUNCTION(device, T, type_name, \ + zeros_like_function) \ + REGISTER_UNARY_VARIANT_ZEROS_LIKE_FUNCTION_UNIQ_HELPER( \ + __COUNTER__, device, T, type_name, zeros_like_function) + +#define REGISTER_UNARY_VARIANT_ZEROS_LIKE_FUNCTION_UNIQ_HELPER( \ + ctr, device, T, type_name, zeros_like_function) \ + REGISTER_UNARY_VARIANT_ZEROS_LIKE_FUNCTION_UNIQ(ctr, device, T, type_name, \ + zeros_like_function) + +#define REGISTER_UNARY_VARIANT_ZEROS_LIKE_FUNCTION_UNIQ( \ + ctr, device, T, type_name, zeros_like_function) \ + static variant_op_registry_fn_registration:: \ + UnaryVariantZerosLikeRegistration \ + register_unary_variant_op_decoder_fn_##ctr(device, type_name, \ + zeros_like_function) + } // end namespace tensorflow #endif // TENSORFLOW_FRAMEWORK_VARIANT_OP_REGISTRY_H_ diff --git a/tensorflow/core/framework/variant_op_registry_test.cc b/tensorflow/core/framework/variant_op_registry_test.cc index 86fef53dbe6..4e79180217a 100644 --- a/tensorflow/core/framework/variant_op_registry_test.cc +++ b/tensorflow/core/framework/variant_op_registry_test.cc @@ -15,13 +15,25 @@ limitations under the License. #include +#define EIGEN_USE_THREADS + +#if GOOGLE_CUDA +#define EIGEN_USE_GPU +#endif + #include "tensorflow/core/framework/variant_op_registry.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/types.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/test.h" namespace tensorflow { +typedef Eigen::ThreadPoolDevice CPUDevice; +typedef Eigen::GpuDevice GPUDevice; + namespace { struct VariantValue { @@ -33,7 +45,24 @@ struct VariantValue { *s = TensorShape({-0xdeadbeef}); return Status::OK(); } + static Status CPUZerosLikeFn(OpKernelContext* ctx, const VariantValue& v, + VariantValue* v_out) { + if (v.early_exit) { + return errors::InvalidArgument("early exit zeros_like!"); + } + v_out->zeros_like_set = 1; // CPU + return Status::OK(); + } + static Status GPUZerosLikeFn(OpKernelContext* ctx, const VariantValue& v, + VariantValue* v_out) { + if (v.early_exit) { + return errors::InvalidArgument("early exit zeros_like!"); + } + v_out->zeros_like_set = 2; // GPU + return Status::OK(); + } bool early_exit; + int zeros_like_set; }; REGISTER_UNARY_VARIANT_SHAPE_FUNCTION(VariantValue, "TEST VariantValue", @@ -41,6 +70,14 @@ REGISTER_UNARY_VARIANT_SHAPE_FUNCTION(VariantValue, "TEST VariantValue", REGISTER_UNARY_VARIANT_DECODE_FUNCTION(VariantValue, "TEST VariantValue"); +REGISTER_UNARY_VARIANT_ZEROS_LIKE_FUNCTION(DEVICE_CPU, VariantValue, + "TEST VariantValue", + VariantValue::CPUZerosLikeFn); + +REGISTER_UNARY_VARIANT_ZEROS_LIKE_FUNCTION(DEVICE_GPU, VariantValue, + "TEST VariantValue", + VariantValue::GPUZerosLikeFn); + } // namespace TEST(VariantOpShapeRegistryTest, TestBasic) { @@ -101,4 +138,67 @@ TEST(VariantOpDecodeRegistryTest, TestDuplicate) { "fjfjfj already registered"); } +TEST(VariantOpZerosLikeRegistryTest, TestBasicCPU) { + EXPECT_EQ(UnaryVariantOpRegistry::Global()->GetZerosLikeFn( + DEVICE_CPU, "YOU SHALL NOT PASS"), + nullptr); + + VariantValue vv_early_exit{true /* early_exit */, 0 /* zeros_like_set */}; + Variant v = vv_early_exit; + Variant v_out = VariantValue(); + + OpKernelContext* null_context_pointer = nullptr; + Status s0 = + CreateZerosLikeVariant(null_context_pointer, v, &v_out); + EXPECT_FALSE(s0.ok()); + EXPECT_TRUE( + StringPiece(s0.error_message()).contains("early exit zeros_like")); + + VariantValue vv_ok{false /* early_exit */, 0 /* zeros_like_set */}; + v = vv_ok; + TF_EXPECT_OK( + CreateZerosLikeVariant(null_context_pointer, v, &v_out)); + VariantValue* vv_out = CHECK_NOTNULL(v_out.get()); + EXPECT_EQ(vv_out->zeros_like_set, 1); // CPU +} + +#if GOOGLE_CUDA +TEST(VariantOpZerosLikeRegistryTest, TestBasicGPU) { + EXPECT_EQ(UnaryVariantOpRegistry::Global()->GetZerosLikeFn( + DEVICE_GPU, "YOU SHALL NOT PASS"), + nullptr); + + VariantValue vv_early_exit{true /* early_exit */, 0 /* zeros_like_set */}; + Variant v = vv_early_exit; + Variant v_out = VariantValue(); + + OpKernelContext* null_context_pointer = nullptr; + Status s0 = + CreateZerosLikeVariant(null_context_pointer, v, &v_out); + EXPECT_FALSE(s0.ok()); + EXPECT_TRUE( + StringPiece(s0.error_message()).contains("early exit zeros_like")); + + VariantValue vv_ok{false /* early_exit */, 0 /* zeros_like_set */}; + v = vv_ok; + TF_EXPECT_OK( + CreateZerosLikeVariant(null_context_pointer, v, &v_out)); + VariantValue* vv_out = CHECK_NOTNULL(v_out.get()); + EXPECT_EQ(vv_out->zeros_like_set, 2); // GPU +} +#endif // GOOGLE_CUDA + +TEST(VariantOpZerosLikeRegistryTest, TestDuplicate) { + UnaryVariantOpRegistry registry; + UnaryVariantOpRegistry::VariantZerosLikeFn f; + + registry.RegisterZerosLikeFn(DEVICE_CPU, "fjfjfj", f); + EXPECT_DEATH(registry.RegisterZerosLikeFn(DEVICE_CPU, "fjfjfj", f), + "fjfjfj already registered"); + + registry.RegisterZerosLikeFn(DEVICE_GPU, "fjfjfj", f); + EXPECT_DEATH(registry.RegisterZerosLikeFn(DEVICE_GPU, "fjfjfj", f), + "fjfjfj already registered"); +} + } // namespace tensorflow diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc index 7d938365c5a..a274c799704 100644 --- a/tensorflow/core/graph/graph.cc +++ b/tensorflow/core/graph/graph.cc @@ -523,6 +523,17 @@ Status Graph::IsValidNode(const Node* node) const { return Status::OK(); } +Status Graph::IsValidOutputTensor(const Node* node, int idx) const { + TF_RETURN_IF_ERROR(IsValidNode(node)); + if (idx >= node->num_outputs()) { + return errors::InvalidArgument("Node '", node->name(), "' (type: '", + node->op_def().name(), + "', num of outputs: ", node->num_outputs(), + ") does not have ", "output ", idx); + } + return Status::OK(); +} + Node* Graph::AllocateNode(std::shared_ptr props, const Node* cost_node) { Node* node = nullptr; @@ -572,7 +583,7 @@ int Graph::InternDeviceName(const string& device_name) { } string Edge::DebugString() const { - return strings::Printf("Edge %d %s:%d -> %s:%d", id_, src_->name().c_str(), + return strings::Printf("[id=%d %s:%d -> %s:%d]", id_, src_->name().c_str(), src_output_, dst_->name().c_str(), dst_input_); } diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h index 51ede642d27..25875185e47 100644 --- a/tensorflow/core/graph/graph.h +++ b/tensorflow/core/graph/graph.h @@ -519,6 +519,10 @@ class Graph { // Returns OK if `node` is non-null and belongs to this graph Status IsValidNode(const Node* node) const; + // Returns OK if IsValidNode(`node`) and `idx` is less than + // node->num_outputs() + Status IsValidOutputTensor(const Node* node, int idx) const; + // TODO(josh11b): uint64 hash() const; private: diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index 0ab6aff250b..1b1c88f2df4 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -396,6 +396,18 @@ Status GraphProperties::InferStatically() { } input_properties.push_back(properties); } + for (const auto& edge : node->in_edges()) { + if (!edge->src()->IsConstant()) { + continue; + } + const int input_id = edge->dst_input(); + if (input_id >= input_properties.size()) { + continue; + } + const NodeDef& node = edge->src()->def(); + const TensorProto& raw_val = node.attr().at("value").tensor(); + *input_properties[input_id].mutable_value() = raw_val; + } input_properties_[node->name()] = input_properties; // TODO(bsteiner): share this code with the input processing above. diff --git a/tensorflow/core/grappler/costs/graph_properties_test.cc b/tensorflow/core/grappler/costs/graph_properties_test.cc index 954c5ead8fc..461e58cf736 100644 --- a/tensorflow/core/grappler/costs/graph_properties_test.cc +++ b/tensorflow/core/grappler/costs/graph_properties_test.cc @@ -345,6 +345,15 @@ TEST_F(GraphPropertiesTest, MergeWithoutLoops) { EXPECT_EQ(DT_FLOAT, prop.dtype()); EXPECT_EQ(expected_outputs[i], PropToString(prop)); } + + // The "Less" node should be fed by 2 int32 scalar constant values. + const auto props = properties.GetInputProperties("Less"); + EXPECT_EQ(2, props.size()); + for (int i = 0; i < props.size(); ++i) { + EXPECT_EQ(DT_INT32, props[i].dtype()); + EXPECT_TRUE(props[i].has_value()); + EXPECT_EQ("int32: []", PropToString(props[i])); + } } TEST_F(GraphPropertiesTest, WhileLoop) { diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 0893a012047..174ccde8b7a 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -259,19 +259,13 @@ cc_library( cc_library( name = "conv_ops_gpu_hdrs", hdrs = ["conv_ops_gpu.h"], - deps = [ - ":eigen_helpers", - "//third_party/eigen3", - ], + deps = ["//third_party/eigen3"], ) cc_library( name = "gpu_util_hdrs", hdrs = ["gpu_utils.h"], - deps = [ - ":eigen_helpers", - "//third_party/eigen3", - ], + deps = ["//third_party/eigen3"], ) tf_cc_test( @@ -2587,8 +2581,9 @@ tf_kernel_library( tf_kernel_library( name = "reduction_ops", + srcs = ["reduction_ops_gpu_kernels.h"], prefix = "reduction_ops", - deps = MATH_DEPS, + deps = MATH_DEPS + if_cuda(["@cub_archive//:cub"]), ) tf_kernel_library( @@ -3070,14 +3065,16 @@ tf_kernel_library( tf_kernel_library( name = "l2loss_op", prefix = "l2loss_op", + #srcs = ["reduction_ops_gpu_kernels.h"], deps = [ + ":reduction_ops", + "//third_party/eigen3", "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:nn_grad", "//tensorflow/core:nn_ops_op_lib", - "//third_party/eigen3", - ], + ] + if_cuda(["@cub_archive//:cub"]), ) tf_cuda_cc_test( @@ -4668,6 +4665,8 @@ filegroup( "whole_file_read_ops.*", "sample_distorted_bounding_box_op.*", "ctc_loss_op.*", + "summary_interface.*", + "summary_kernels.*", "spectrogram_convert_test_data.cc", "sql_dataset_ops.cc", # Excluded due to experimental status: @@ -5957,6 +5956,43 @@ tf_kernel_library( ], ) +cc_library( + name = "summary_interface", + srcs = ["summary_interface.cc"], + hdrs = ["summary_interface.h"], + deps = [ + "//tensorflow/compiler/xla:util", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:proto_text", + "//tensorflow/core:protos_all_cc", + ], +) + +cc_test( + name = "summary_interface_test", + srcs = ["summary_interface_test.cc"], + deps = [ + ":summary_interface", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + +tf_kernel_library( + name = "summary_kernels", + srcs = ["summary_kernels.cc"], + deps = [ + ":summary_interface", + "//tensorflow/core:framework", + "//tensorflow/core:summary_ops_op_lib", + ], +) + # ----------------------------------------------------------------------------- # Google-internal targets. These must be at the end for syncrepo. diff --git a/tensorflow/core/kernels/bias_op_gpu.cu.cc b/tensorflow/core/kernels/bias_op_gpu.cu.cc index e07ca5e0c4c..ddc2d457b0e 100644 --- a/tensorflow/core/kernels/bias_op_gpu.cu.cc +++ b/tensorflow/core/kernels/bias_op_gpu.cu.cc @@ -142,9 +142,9 @@ __global__ void BiasGradNCHW_SharedAtomics(const T* output_backprop, int group_size) { // Initialize the shared memory. typedef typename AccumulatorType::type AccT; - __shared__ AccT s_data[32]; - int32 s_data_size = sizeof(s_data) / sizeof(T); - for (int32 index = threadIdx.x; index < s_data_size; index += blockDim.x) { + const int32 kSDataSize = 32; + __shared__ AccT s_data[kSDataSize]; + for (int32 index = threadIdx.x; index < kSDataSize; index += blockDim.x) { s_data[index] = AccT(0); } __syncthreads(); diff --git a/tensorflow/core/kernels/constant_op.cc b/tensorflow/core/kernels/constant_op.cc index b4b37dd4b8e..cdc11452827 100644 --- a/tensorflow/core/kernels/constant_op.cc +++ b/tensorflow/core/kernels/constant_op.cc @@ -17,6 +17,10 @@ limitations under the License. #define EIGEN_USE_THREADS +#if GOOGLE_CUDA +#define EIGEN_USE_GPU +#endif + #include "tensorflow/core/kernels/constant_op.h" #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" @@ -26,13 +30,14 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/variant_op_registry.h" #include "tensorflow/core/kernels/bounds_check.h" #include "tensorflow/core/kernels/fill_functor.h" #include "tensorflow/core/platform/macros.h" #ifdef TENSORFLOW_USE_SYCL #include "tensorflow/core/common_runtime/sycl/sycl_util.h" -#endif // TENSORFLOW_USE_SYCL +#endif // TENSORFLOW_USE_SYCL namespace tensorflow { @@ -40,9 +45,8 @@ ConstantOp::ConstantOp(OpKernelConstruction* ctx) : OpKernel(ctx), tensor_(ctx->output_type(0)) { const TensorProto* proto = nullptr; OP_REQUIRES_OK(ctx, ctx->GetAttr("value", &proto)); - OP_REQUIRES_OK(ctx, - ctx->device()->MakeTensorFromProto( - *proto, AllocatorAttributes(), &tensor_)); + OP_REQUIRES_OK(ctx, ctx->device()->MakeTensorFromProto( + *proto, AllocatorAttributes(), &tensor_)); OP_REQUIRES( ctx, ctx->output_type(0) == tensor_.dtype(), errors::InvalidArgument("Type mismatch between value (", @@ -85,9 +89,9 @@ REGISTER_KERNEL(GPU, bool); #endif #ifdef TENSORFLOW_USE_SYCL -#define REGISTER_SYCL_KERNEL(D, TYPE) \ - REGISTER_KERNEL_BUILDER( \ - Name("Const").Device(DEVICE_##D).TypeConstraint("dtype"), \ +#define REGISTER_SYCL_KERNEL(D, TYPE) \ + REGISTER_KERNEL_BUILDER( \ + Name("Const").Device(DEVICE_##D).TypeConstraint("dtype"), \ ConstantOp); REGISTER_SYCL_KERNEL(SYCL, float); REGISTER_SYCL_KERNEL(SYCL, double); @@ -194,18 +198,18 @@ struct FillFunctor { void operator()(const SYCLDevice& d, typename TTypes::Flat out, typename TTypes::ConstScalar in) { #if !defined(EIGEN_HAS_INDEX_LIST) - Eigen::array rank1{1}; + Eigen::array rank1{1}; #else - Eigen::IndexList> rank1; + Eigen::IndexList > rank1; #endif - const int size = out.dimension(0); - Eigen::array broadcast_dims{size}; + const int size = out.dimension(0); + Eigen::array broadcast_dims{size}; - To32Bit(out).device(d) = in.reshape(rank1).broadcast(broadcast_dims); + To32Bit(out).device(d) = in.reshape(rank1).broadcast(broadcast_dims); } }; -} -#endif // TENSORFLOW_USE_SYCL +} // namespace functor +#endif // TENSORFLOW_USE_SYCL #define REGISTER_KERNEL(D, TYPE) \ REGISTER_KERNEL_BUILDER(Name("Fill") \ @@ -273,11 +277,23 @@ class ZerosLikeOp : public OpKernel { void Compute(OpKernelContext* ctx) override { const Tensor& input = ctx->input(0); - Tensor* out = nullptr; - OP_REQUIRES_OK(ctx, ctx->forward_input_or_allocate_output( - {0}, 0, input.shape(), &out)); - functor::SetZeroFunctor f; - f(ctx->eigen_device(), out->flat()); + const Device& d = ctx->eigen_device(); + if (std::is_same::value) { + OP_REQUIRES(ctx, input.dims() == 0, + errors::InvalidArgument( + "ZerosLike of non-unary Variant not supported.")); + const Variant& v = input.scalar()(); + Tensor out(cpu_allocator(), DT_VARIANT, TensorShape({})); + Variant* out_v = &(out.scalar()()); + OP_REQUIRES_OK(ctx, CreateZerosLikeVariant(ctx, v, out_v)); + ctx->set_output(0, out); + } else { + Tensor* out = nullptr; + OP_REQUIRES_OK(ctx, ctx->forward_input_or_allocate_output( + {0}, 0, input.shape(), &out)); + functor::SetZeroFunctor f; + f(d, out->flat()); + } } }; @@ -288,6 +304,7 @@ class ZerosLikeOp : public OpKernel { #define REGISTER_CPU(type) REGISTER_KERNEL(type, CPU) TF_CALL_POD_STRING_TYPES(REGISTER_CPU); +REGISTER_CPU(Variant); #undef REGISTER_CPU #ifdef TENSORFLOW_USE_SYCL @@ -315,6 +332,14 @@ REGISTER_KERNEL_BUILDER(Name("ZerosLike") .TypeConstraint("T") .HostMemory("y"), ZerosLikeOp); +// TODO(ebrevdo): Once rendezvous has been properly set up for +// Variants, we'll no longer need a HostMemory attribute for this case. +REGISTER_KERNEL_BUILDER(Name("ZerosLike") + .Device(DEVICE_GPU) + .TypeConstraint("T") + .HostMemory("x") + .HostMemory("y"), + ZerosLikeOp); #endif // GOOGLE_CUDA #undef REGISTER_KERNEL diff --git a/tensorflow/core/kernels/conv_2d.h b/tensorflow/core/kernels/conv_2d.h index 4bb0b7f3b41..8de8f1b2650 100644 --- a/tensorflow/core/kernels/conv_2d.h +++ b/tensorflow/core/kernels/conv_2d.h @@ -225,13 +225,13 @@ struct PadInput { const std::array& padding_right, typename TTypes::Tensor out, TensorFormat format) { - Eigen::array, NDIMS> padding; - padding[GetTensorDimIndex(format, 'N')] = std::make_pair(0, 0); + Eigen::array, NDIMS> padding; + padding[GetTensorDimIndex(format, 'N')] = {0, 0}; for (int i = 0; i < NDIMS - 2; ++i) { - padding[GetTensorDimIndex(format, '0' + i)] = - std::make_pair(padding_left[i], padding_right[i]); + padding[GetTensorDimIndex(format, '0' + i)] = { + padding_left[i], padding_right[i]}; } - padding[GetTensorDimIndex(format, 'C')] = std::make_pair(0, 0); + padding[GetTensorDimIndex(format, 'C')] = {0, 0}; out.device(d) = in.pad(padding); } }; diff --git a/tensorflow/core/kernels/cuda_solvers.cc b/tensorflow/core/kernels/cuda_solvers.cc index 3a8ccfe6b74..5c6b5eec829 100644 --- a/tensorflow/core/kernels/cuda_solvers.cc +++ b/tensorflow/core/kernels/cuda_solvers.cc @@ -30,10 +30,13 @@ #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/gtl/inlined_vector.h" +#include "tensorflow/core/platform/cuda.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" +using ::perftools::gputools::cuda::ScopedActivateExecutorContext; + namespace tensorflow { namespace { @@ -148,7 +151,12 @@ Status CudaSolver::CopyLapackInfoToHostAsync( // This callback checks that all batch items in all calls were processed // successfully and passes status to the info_checker_callback accordingly. auto wrapped_info_checker_callback = - [info_checker_callback](std::vector host_lapack_infos) { + [](OpKernelContext* context, + std::function&)> + info_checker_callback, + std::vector host_lapack_infos) { + auto stream = context->op_device_context()->stream(); + ScopedActivateExecutorContext scoped_activation{stream->parent()}; Status status; for (const auto& host_lapack_info : host_lapack_infos) { for (int i = 0; i < host_lapack_info.size() && status.ok(); ++i) { @@ -166,8 +174,10 @@ Status CudaSolver::CopyLapackInfoToHostAsync( } info_checker_callback(status, host_lapack_infos); }; + auto cb = - std::bind(wrapped_info_checker_callback, std::move(host_lapack_infos)); + std::bind(wrapped_info_checker_callback, context_, + std::move(info_checker_callback), std::move(host_lapack_infos)); auto stream = context_->op_device_context()->stream(); context_->device()->tensorflow_gpu_device_info()->event_mgr->ThenExecute( stream, std::move(cb)); diff --git a/tensorflow/core/kernels/debug_ops_test.cc b/tensorflow/core/kernels/debug_ops_test.cc index 89bcbc9c373..37c94865942 100644 --- a/tensorflow/core/kernels/debug_ops_test.cc +++ b/tensorflow/core/kernels/debug_ops_test.cc @@ -573,7 +573,8 @@ TEST_F(DebugNumericSummaryOpTest, UInt8Success) { TEST_F(DebugNumericSummaryOpTest, BoolSuccess) { TF_ASSERT_OK(Init(DT_BOOL)); - AddInputFromArray(TensorShape({2, 3}), {0, 0, 1, 1, 1, 0}); + AddInputFromArray(TensorShape({2, 3}), + {false, false, true, true, true, false}); TF_ASSERT_OK(RunOpKernel()); Tensor expected(allocator(), DT_DOUBLE, TensorShape({16})); diff --git a/tensorflow/core/kernels/group_by_window_dataset_op.cc b/tensorflow/core/kernels/group_by_window_dataset_op.cc index a53e9456ad2..a4f9608b1fa 100644 --- a/tensorflow/core/kernels/group_by_window_dataset_op.cc +++ b/tensorflow/core/kernels/group_by_window_dataset_op.cc @@ -36,20 +36,14 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel { graph_def_version_(ctx->graph_def_version()) { OP_REQUIRES_OK(ctx, ctx->GetAttr("key_func", &key_func_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("reduce_func", &reduce_func_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("window_size_func", &window_size_func_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("output_types", &output_types_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("output_shapes", &output_shapes_)); } void MakeDataset(OpKernelContext* ctx, DatasetBase* input, DatasetBase** output) override { - int64 window_size = 0; - OP_REQUIRES_OK( - ctx, ParseScalarArgument(ctx, "window_size", &window_size)); - OP_REQUIRES( - ctx, window_size > 0, - errors::InvalidArgument("Window size must be greater than zero.")); - - // Get captured inputs for the key and reduce functions. + // Get captured inputs for the key, reduce, and window_size functions. OpInputList key_func_other_argument_inputs; OP_REQUIRES_OK(ctx, ctx->input_list("key_func_other_arguments", &key_func_other_argument_inputs)); @@ -67,6 +61,16 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel { for (const Tensor& t : reduce_func_other_argument_inputs) { reduce_func_other_arguments.push_back(t); } + OpInputList window_size_func_other_argument_inputs; + OP_REQUIRES_OK(ctx, + ctx->input_list("window_size_func_other_arguments", + &window_size_func_other_argument_inputs)); + std::vector window_size_func_other_arguments; + window_size_func_other_arguments.reserve( + window_size_func_other_argument_inputs.size()); + for (const Tensor& t : window_size_func_other_argument_inputs) { + window_size_func_other_arguments.push_back(t); + } // TODO(mrry): Refactor CapturedFunction to share the runtime // state between multiple functions? std::unique_ptr captured_key_func; @@ -79,24 +83,30 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel { ctx, CapturedFunction::Create(ctx, reduce_func_, graph_def_version_, std::move(reduce_func_other_arguments), &captured_reduce_func)); + std::unique_ptr captured_window_size_func; + OP_REQUIRES_OK(ctx, CapturedFunction::Create( + ctx, window_size_func_, graph_def_version_, + std::move(window_size_func_other_arguments), + &captured_window_size_func)); - *output = new Dataset(input, window_size, std::move(captured_key_func), - std::move(captured_reduce_func), output_types_, - output_shapes_); + *output = new Dataset( + input, std::move(captured_key_func), std::move(captured_reduce_func), + std::move(captured_window_size_func), output_types_, output_shapes_); } private: class Dataset : public DatasetBase { public: - Dataset(const DatasetBase* input, int64 window_size, + Dataset(const DatasetBase* input, std::unique_ptr captured_key_func, std::unique_ptr captured_reduce_func, + std::unique_ptr captured_window_size_func, const DataTypeVector& output_types, const std::vector& output_shapes) : input_(input), - window_size_(window_size), captured_key_func_(std::move(captured_key_func)), captured_reduce_func_(std::move(captured_reduce_func)), + captured_window_size_func_(std::move(captured_window_size_func)), output_types_(output_types), output_shapes_(output_shapes) { input_->Ref(); @@ -182,10 +192,44 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel { } const int64 key = key_func_output[0].scalar()(); + if (window_sizes_.find(key) == window_sizes_.end()) { + // Run window_size function + FunctionLibraryRuntime::Options opts2; + opts2.step_id = CapturedFunction::generate_step_id(); + opts2.runner = ctx->runner(); + ScopedStepContainer step_container2( + opts2.step_id, [this, ctx](const string& name) { + dataset() + ->captured_window_size_func_->resource_manager() + ->Cleanup(name) + .IgnoreError(); + }); + opts2.step_container = &step_container2; + + // Run the window size function on the key to identify its + // window size. + std::vector window_size_func_output; + TF_RETURN_IF_ERROR(dataset()->captured_window_size_func_->Run( + opts2, key_func_output, &window_size_func_output)); + + if (window_size_func_output.size() != 1 || + window_size_func_output[0].dtype() != DT_INT64 || + window_size_func_output[0].NumElements() != 1) { + // TODO(mrry): Support non-int64 window sizes. + return errors::InvalidArgument( + "`window_size_func` must return a scalar int64."); + } + const int64 window_size = + window_size_func_output[0].scalar()(); + window_sizes_[key] = window_size; + } + + const int64 window_size = window_sizes_[key]; + std::vector>& group = groups_[key]; group.push_back(std::move(next_input_element)); - if (group.size() == dataset()->window_size_) { + if (group.size() == window_size) { TF_RETURN_IF_ERROR(StartFlushingGroup(ctx, key)); break; } @@ -297,6 +341,7 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel { bool end_of_input_ GUARDED_BY(mu_) = false; std::map>> groups_ GUARDED_BY(mu_); std::unique_ptr current_group_iterator_ GUARDED_BY(mu_); + std::map window_sizes_ GUARDED_BY(mu_); }; // A resource name for the temporary window dataset that is @@ -304,9 +349,9 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel { static constexpr const char* kWindowResourceName = "__window_dataset"; const DatasetBase* const input_; - const int64 window_size_; const std::unique_ptr captured_key_func_; const std::unique_ptr captured_reduce_func_; + const std::unique_ptr captured_window_size_func_; const DataTypeVector output_types_; const std::vector output_shapes_; }; @@ -316,6 +361,7 @@ class GroupByWindowDatasetOp : public UnaryDatasetOpKernel { std::vector output_shapes_; const NameAttrList* key_func_; const NameAttrList* reduce_func_; + const NameAttrList* window_size_func_; }; REGISTER_KERNEL_BUILDER(Name("GroupByWindowDataset").Device(DEVICE_CPU), diff --git a/tensorflow/core/kernels/l2loss_op.cc b/tensorflow/core/kernels/l2loss_op.cc index 9875cd027d5..f8ed9351579 100644 --- a/tensorflow/core/kernels/l2loss_op.cc +++ b/tensorflow/core/kernels/l2loss_op.cc @@ -27,10 +27,9 @@ limitations under the License. namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; -typedef Eigen::GpuDevice GPUDevice; -template -class L2LossOp : public OpKernel { +template +class L2LossOp : public OpKernel { public: explicit L2LossOp(OpKernelConstruction* context) : OpKernel(context) {} @@ -42,8 +41,9 @@ class L2LossOp : public OpKernel { Tensor* output = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, TensorShape({}), &output)); - functor::L2Loss()(context->eigen_device(), - input.flat(), output->scalar()); + const CPUDevice& d = context->eigen_device(); + output->scalar().device(d) = + (input.flat().square() * static_cast(0.5)).sum(); } }; @@ -57,33 +57,4 @@ REGISTER_KERNEL(double); REGISTER_KERNEL(Eigen::half); #undef REGISTER_KERNEL -#if GOOGLE_CUDA -// Forward declarations of the functor specializations for GPU. -namespace functor { -#define DECLARE_GPU_SPEC(T) \ - template <> \ - void L2Loss::operator()(const GPUDevice& d, \ - typename TTypes::ConstTensor input, \ - typename TTypes::Scalar output); \ - extern template struct L2Loss; - -DECLARE_GPU_SPEC(float); -DECLARE_GPU_SPEC(double); -DECLARE_GPU_SPEC(Eigen::half); -#undef DECLARE_GPU_SPEC -} // namespace functor - -// Registration of the GPU implementations. -#define REGISTER_GPU_KERNEL(T) \ - REGISTER_KERNEL_BUILDER( \ - Name("L2Loss").Device(DEVICE_GPU).TypeConstraint("T"), \ - L2LossOp); - -REGISTER_GPU_KERNEL(float); -REGISTER_GPU_KERNEL(double); -REGISTER_GPU_KERNEL(Eigen::half); -#undef REGISTER_GPU_KERNEL - -#endif // GOOGLE_CUDA - } // namespace tensorflow diff --git a/tensorflow/core/kernels/l2loss_op.h b/tensorflow/core/kernels/l2loss_op.h index f7204cefdd4..4953aa237cd 100644 --- a/tensorflow/core/kernels/l2loss_op.h +++ b/tensorflow/core/kernels/l2loss_op.h @@ -15,25 +15,19 @@ limitations under the License. #ifndef TENSORFLOW_KERNELS_L2LOSS_OP_H_ #define TENSORFLOW_KERNELS_L2LOSS_OP_H_ -// Functor definition for L2LossOp, must be compilable by nvcc. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_types.h" namespace tensorflow { -namespace functor { -// Functor used by L2LossOp to do the computations. template -struct L2Loss { - void operator()(const Device& d, typename TTypes::ConstTensor input, - typename TTypes::Scalar output) { - // We flatten the input tensor and reduce on dimension 0, producing - // a single number which is Mul(Sum(x^2), 0.5). - output.device(d) = (input.square() * static_cast(0.5)).sum(); - } +struct L2LossOp : public OpKernel { + explicit L2LossOp(OpKernelConstruction* context) : OpKernel(context) {} + + void Compute(OpKernelContext* context) {} }; -} // namespace functor } // namespace tensorflow #endif // TENSORFLOW_KERNELS_L2LOSS_OP_H_ diff --git a/tensorflow/core/kernels/l2loss_op_gpu.cu.cc b/tensorflow/core/kernels/l2loss_op_gpu.cu.cc index 420df370865..73b6472254c 100644 --- a/tensorflow/core/kernels/l2loss_op_gpu.cu.cc +++ b/tensorflow/core/kernels/l2loss_op_gpu.cu.cc @@ -21,12 +21,55 @@ limitations under the License. #include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/kernels/reduction_ops_common.h" +#include "tensorflow/core/kernels/reduction_ops_gpu_kernels.h" + namespace tensorflow { typedef Eigen::GpuDevice GPUDevice; -template struct functor::L2Loss; -template struct functor::L2Loss; -template struct functor::L2Loss; + +// TODO(eriche): can add specialization for half2 +template +struct squareHalf { + __host__ __device__ T operator()(const T& x) const { + return static_cast(0.5) * x * x; + } +}; + +template +class L2LossOp : public OpKernel { + public: + explicit L2LossOp(OpKernelConstruction* context) : OpKernel(context) {} + + void Compute(OpKernelContext* context) override { + // The input tensor can be of any number of dimensions, even though it's + // 2D in most typical applications. + const Tensor& input = context->input(0); + // The output is a single number. + Tensor* output = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output(0, TensorShape({}), &output)); + typedef cub::TransformInputIterator, T*> inputIterType; + inputIterType input_itr((T*)input.flat().data(), squareHalf()); + typedef const Eigen::array::Tensor::Index, 1>& ReductionAxes; + + Constants constants; + functor::ReduceImpl( + context, (T*)output->flat().data(), input_itr, 1, + input.flat().size(), 1, 1, 0, constants.kZero, cub::Sum(), T(0)); + } +}; + +// Registration of the GPU implementations. +#define REGISTER_GPU_KERNEL(T) \ + REGISTER_KERNEL_BUILDER( \ + Name("L2Loss").Device(DEVICE_GPU).TypeConstraint("T"), \ + L2LossOp); + +REGISTER_GPU_KERNEL(float); +REGISTER_GPU_KERNEL(double); +REGISTER_GPU_KERNEL(Eigen::half); +#undef REGISTER_GPU_KERNEL } // namespace tensorflow diff --git a/tensorflow/core/kernels/ops_util.cc b/tensorflow/core/kernels/ops_util.cc index 130939263be..efacd05dd39 100644 --- a/tensorflow/core/kernels/ops_util.cc +++ b/tensorflow/core/kernels/ops_util.cc @@ -37,11 +37,6 @@ Eigen::PaddingType BrainPadding2EigenPadding(Padding padding) { Status GetBroadcastSize(const int index, const int in_size, const int ksize, const int stride, const int pad_size, int* bindex, int* bsize) { - // Cannot have strides larger than the patch size. - if (stride > ksize) { - return errors::InvalidArgument( - "stride must be less than or equal to kernel size"); - } // Cannot have index beyond the input size. if (index * stride > in_size) { return errors::InvalidArgument( diff --git a/tensorflow/core/kernels/ops_util_test.cc b/tensorflow/core/kernels/ops_util_test.cc index 42ffef6735b..9d53882deef 100644 --- a/tensorflow/core/kernels/ops_util_test.cc +++ b/tensorflow/core/kernels/ops_util_test.cc @@ -173,12 +173,6 @@ TEST_F(OpsUtilTest, Get2dOutputSizeVerbose) { VerifyGet2dOutputVerboseSizeValues(pad_struct2, error::OK); } -// Test stride > ksize fails with INVALID_ARGUMENT. -TEST_F(OpsUtilTest, GetBroadcastTest3_1_2_0) { - bcast_struct bcast = {{0, 3, 1, 2, 0}, {0, 3}}; - VerifyBoundaries(bcast, error::INVALID_ARGUMENT); -} - // Test index * stride > in_size fails with INVALID_ARGUMENT. TEST_F(OpsUtilTest, GetBroadcastTestBadIndex) { bcast_struct bcast = {{2, 3, 1, 2, 0}, {0, 3}}; @@ -281,6 +275,38 @@ TEST_F(OpsUtilTest, GetBroadcastTest3_3_3_2) { } } +// in_size = 3, ksize = 1, stride = 2, pad_size = 0 +TEST_F(OpsUtilTest, GetBroadcastTest3_1_2_0) { + bcast_struct bcast[] = { + {{0, 3, 1, 2, 0}, {0, 1}}, + {{1, 3, 1, 2, 0}, {2, 1}}, + }; + for (size_t i = 0; i < sizeof(bcast) / sizeof(bcast[0]); ++i) { + VerifyBcastValues(bcast[i]); + } +} + +// in_size = 3, ksize = 2, stride = 3, pad_size = 0 +TEST_F(OpsUtilTest, GetBroadcastTest3_2_3_0) { + bcast_struct bcast[] = { + {{0, 3, 2, 3, 0}, {0, 2}}, + }; + for (size_t i = 0; i < sizeof(bcast) / sizeof(bcast[0]); ++i) { + VerifyBcastValues(bcast[i]); + } +} + +// in_size = 3, ksize = 2, stride = 3, pad_size = 1 +TEST_F(OpsUtilTest, GetBroadcastTest3_2_3_1) { + bcast_struct bcast[] = { + {{0, 3, 2, 3, 1}, {0, 1}}, + {{1, 3, 2, 3, 1}, {2, 1}}, + }; + for (size_t i = 0; i < sizeof(bcast) / sizeof(bcast[0]); ++i) { + VerifyBcastValues(bcast[i]); + } +} + TEST_F(OpsUtilTest, SanitizeThreadSuffix) { EXPECT_EQ("_aBc123_-___", SanitizeThreadSuffix("/aBc123_- /")); } diff --git a/tensorflow/core/kernels/pad_op.cc b/tensorflow/core/kernels/pad_op.cc index 6e8b09d0500..6196c5ed93e 100644 --- a/tensorflow/core/kernels/pad_op.cc +++ b/tensorflow/core/kernels/pad_op.cc @@ -146,9 +146,9 @@ class PadOp : public OpKernel { Tensor* output) { CHECK_EQ(Dims, paddings.dimension(0)); CHECK_EQ(2, paddings.dimension(1)); - Eigen::array, Dims> paddings_array; + Eigen::array, Dims> paddings_array; for (int i = 0; i < Dims; ++i) { - paddings_array[i] = std::make_pair(paddings(i, 0), paddings(i, 1)); + paddings_array[i] = {paddings(i, 0), paddings(i, 1)}; } functor::Pad functor; functor(context->eigen_device(), output->tensor(), input, @@ -180,7 +180,7 @@ namespace functor { void Pad::operator()( \ const GPUDevice& d, typename TTypes::Tensor output, \ typename TTypes::ConstTensor input, \ - Eigen::array, Dims> paddings, T pad_value); \ + Eigen::array, Dims> paddings, T pad_value); \ extern template struct Pad; #define DECLARE_GPU_SPECS(T) \ diff --git a/tensorflow/core/kernels/pad_op.h b/tensorflow/core/kernels/pad_op.h index 6a973833e2d..95a7c9a3ae5 100644 --- a/tensorflow/core/kernels/pad_op.h +++ b/tensorflow/core/kernels/pad_op.h @@ -31,7 +31,7 @@ struct Pad { // See pad_op.cc for details. void operator()(const Device& d, typename TTypes::Tensor output, typename TTypes::ConstTensor input, - Eigen::array, Dims> paddings, + Eigen::array, Dims> paddings, T pad_value) { if (Eigen::internal::is_same::value && (output.size() <= std::numeric_limits::max())) { @@ -47,7 +47,7 @@ struct Pad { // In the scalar case we simply copy the input. void operator()(const Device& d, typename TTypes::Tensor output, typename TTypes::ConstTensor input, - Eigen::array, 0>, T) { + Eigen::array, 0>, T) { output.device(d) = input; } }; diff --git a/tensorflow/core/kernels/reduction_ops.h b/tensorflow/core/kernels/reduction_ops.h index 5db9e6032e0..e43d2828f30 100644 --- a/tensorflow/core/kernels/reduction_ops.h +++ b/tensorflow/core/kernels/reduction_ops.h @@ -20,6 +20,7 @@ limitations under the License. #include #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_types.h" namespace tensorflow { @@ -67,7 +68,7 @@ void FillIdentityEigenImpl(const Device& d, OUT_T out, const Reducer& reducer) { template struct ReduceFunctor { template - static void Reduce(const Device& d, OUT_T out, IN_T in, + static void Reduce(OpKernelContext* ctx, OUT_T out, IN_T in, const ReductionAxes& reduction_axes, const Reducer& reducer); diff --git a/tensorflow/core/kernels/reduction_ops_common.h b/tensorflow/core/kernels/reduction_ops_common.h index 553f8895232..71af9d88dc1 100644 --- a/tensorflow/core/kernels/reduction_ops_common.h +++ b/tensorflow/core/kernels/reduction_ops_common.h @@ -190,24 +190,24 @@ class ReductionOp : public OpKernel { Functor::FillIdentity(d, tmp_out.flat(), reducer); } else if ((helper.ndims() == 1) && helper.reduce_first_axis()) { // Reduce to a scalar. - Functor::Reduce(d, helper.out(&tmp_out), helper.in(data), + Functor::Reduce(ctx, helper.out(&tmp_out), helper.in(data), constants.kZero, reducer); } else if ((helper.ndims() == 2) && helper.reduce_first_axis()) { // Can be viewed as a reduction of a matrix along 1st dimension. - Functor::Reduce(d, helper.out(&tmp_out), helper.in(data), + Functor::Reduce(ctx, helper.out(&tmp_out), helper.in(data), constants.kZero, reducer); } else if ((helper.ndims() == 2) && !helper.reduce_first_axis()) { // Can be viewed as a reduction of a matrix along 2nd dimension. - Functor::Reduce(d, helper.out(&tmp_out), helper.in(data), + Functor::Reduce(ctx, helper.out(&tmp_out), helper.in(data), constants.kOne, reducer); } else if ((helper.ndims() == 3) && helper.reduce_first_axis()) { // Can be viewed as a reduction of a 3D tensor along 1st and 3rd // dimensions. - Functor::Reduce(d, helper.out(&tmp_out), helper.in(data), + Functor::Reduce(ctx, helper.out(&tmp_out), helper.in(data), constants.kZeroTwo, reducer); } else if ((helper.ndims() == 3) && !helper.reduce_first_axis()) { // Can be viewed as a reduction of a 3D tensor along 2nd dimension. - Functor::Reduce(d, helper.out(&tmp_out), helper.in(data), + Functor::Reduce(ctx, helper.out(&tmp_out), helper.in(data), constants.kOne, reducer); } else { // If we don't hit one of the cases above, transpose the data so that @@ -223,7 +223,7 @@ class ReductionOp : public OpKernel { const int64 unreduced = tmp_out.NumElements(); const int64 reduced = shuffled.NumElements() / unreduced; const Tensor& const_shuffled = shuffled; - Functor::Reduce(d, tmp_out.flat(), + Functor::Reduce(ctx, tmp_out.flat(), const_shuffled.shaped({unreduced, reduced}), constants.kOne, reducer); } @@ -258,9 +258,10 @@ namespace functor { template struct ReduceFunctorBase { template - static void Reduce(const Device& d, OUT_T out, IN_T in, + static void Reduce(OpKernelContext* ctx, OUT_T out, IN_T in, const ReductionAxes& reduction_axes, const Reducer& reducer) { + const Device& d = ctx->eigen_device(); ReduceEigenImpl(d, out, in, reduction_axes, reducer); } diff --git a/tensorflow/core/kernels/reduction_ops_gpu.cu.cc b/tensorflow/core/kernels/reduction_ops_gpu.cu.cc index ec4490db83f..8fd9165eb9f 100644 --- a/tensorflow/core/kernels/reduction_ops_gpu.cu.cc +++ b/tensorflow/core/kernels/reduction_ops_gpu.cu.cc @@ -17,8 +17,7 @@ limitations under the License. #define EIGEN_USE_GPU -#include "tensorflow/core/framework/numeric_types.h" -#include "tensorflow/core/kernels/reduction_ops.h" +#include "tensorflow/core/kernels/reduction_ops_gpu_kernels.h" namespace tensorflow { namespace functor { @@ -33,15 +32,27 @@ typedef TTypes::Tensor::Index Index; template struct ReduceFunctor { template - static void Reduce(const GPUDevice& d, OUT_T out, IN_T in, + static void Reduce(OpKernelContext* ctx, OUT_T out, IN_T in, const ReductionAxes& reduction_axes, - const Reducer& reducer) { - ReduceEigenImpl(d, To32Bit(out), To32Bit(in), reduction_axes, reducer); + const Reducer& reducer); +}; + +template +struct ReduceFunctor> { + template + static void Reduce(OpKernelContext* ctx, OUT_T out, IN_T in, + const ReductionAxes& reduction_axes, + const Eigen::internal::SumReducer& reducer) { + ReduceImpl( + ctx, (T*)out.data(), (T*)in.data(), in.rank(), in.dimension(0), + in.rank() >= 2 ? in.dimension(1) : 1, + in.rank() >= 3 ? in.dimension(2) : 1, out.rank(), reduction_axes, + cub::Sum(), T(0)); } template static void FillIdentity(const GPUDevice& d, OUT_T out, - const Reducer& reducer) { + const Eigen::internal::SumReducer& reducer) { FillIdentityEigenImpl(d, To32Bit(out), reducer); } }; @@ -49,19 +60,30 @@ struct ReduceFunctor { template struct ReduceFunctor> { template - static void Reduce(const GPUDevice& d, OUT_T out, IN_T in, + static void Reduce(OpKernelContext* ctx, OUT_T out, IN_T in, const ReductionAxes& reduction_axes, const Eigen::internal::MeanReducer& reducer) { - typedef typename IN_T::Index Index; - // Eigen sum reductions are much faster on GPU than mean reductions: - // Simply trigger them by computing the sum of the weighted inputs. - Index num_coeffs_to_reduce = 1; - for (int i = 0; i < Eigen::internal::array_size::value; - ++i) { - num_coeffs_to_reduce *= in.dimension(reduction_axes[i]); - } - T scale = T(1.0 / num_coeffs_to_reduce); - out.device(d) = (in * scale).sum(reduction_axes); + int divisor = 1; + if (out.rank() == 0) + divisor = in.size(); + else if (out.rank() == 1 && in.rank() == 2 && reduction_axes[0] == 0) + divisor = in.dimension(0); + else if (out.rank() == 1 && in.rank() == 2 && reduction_axes[0] == 1) + divisor = in.dimension(1); + else if (out.rank() == 1 && in.rank() == 3 && reduction_axes[0] == 0 && + reduction_axes[1] == 2) + divisor = in.dimension(0) * in.dimension(2); + else if (out.rank() == 2 && in.rank() == 3 && reduction_axes[0] == 1) + divisor = in.dimension(1); + + DividesBy div_op(static_cast(divisor)); + TransformOutputIterator> itr((T*)out.data(), div_op); + ReduceImpl>, T*, + ReductionAxes>(ctx, itr, (T*)in.data(), in.rank(), + in.dimension(0), + in.rank() >= 2 ? in.dimension(1) : 1, + in.rank() >= 3 ? in.dimension(2) : 1, out.rank(), + reduction_axes, cub::Sum(), T(0)); } template @@ -71,15 +93,159 @@ struct ReduceFunctor> { } }; +template <> +struct ReduceFunctor> { + template + static void Reduce(OpKernelContext* ctx, OUT_T out, IN_T in, + const ReductionAxes& reduction_axes, + const Eigen::internal::MeanReducer& reducer) { + float divisor = 1.f; + if (out.rank() == 0) + divisor = in.size(); + else if (out.rank() == 1 && in.rank() == 2 && reduction_axes[0] == 0) + divisor = in.dimension(0); + else if (out.rank() == 1 && in.rank() == 2 && reduction_axes[0] == 1) + divisor = in.dimension(1); + else if (out.rank() == 1 && in.rank() == 3 && reduction_axes[0] == 0 && + reduction_axes[1] == 2) + divisor = in.dimension(0) * in.dimension(2); + else if (out.rank() == 2 && in.rank() == 3 && reduction_axes[0] == 1) + divisor = in.dimension(1); + DividesBy div_op(divisor); + + typedef cub::TransformInputIterator + inputIterType; + inputIterType input_itr((Eigen::half*)in.data(), HalfToFloat()); + + typedef TransformOutputIterator> + outputIterType; + outputIterType itr((Eigen::half*)out.data(), div_op); + + ReduceImpl( + ctx, itr, input_itr, in.rank(), in.dimension(0), + in.rank() >= 2 ? in.dimension(1) : 1, + in.rank() >= 3 ? in.dimension(2) : 1, out.rank(), reduction_axes, + cub::Sum(), 0.f); + } + + template + static void FillIdentity( + const GPUDevice& d, OUT_T out, + const Eigen::internal::MeanReducer& reducer) { + FillIdentityEigenImpl(d, To32Bit(out), reducer); + } +}; + +template +struct ReduceFunctor> { + template + static void Reduce(OpKernelContext* ctx, OUT_T out, IN_T in, + const ReductionAxes& reduction_axes, + const Eigen::internal::MaxReducer& reducer) { + ReduceImpl( + ctx, (T*)out.data(), (T*)in.data(), in.rank(), in.dimension(0), + in.rank() >= 2 ? in.dimension(1) : 1, + in.rank() >= 3 ? in.dimension(2) : 1, out.rank(), reduction_axes, + cub::Max(), std::numeric_limits::lowest()); + } + + template + static void FillIdentity(const GPUDevice& d, OUT_T out, + const Eigen::internal::MaxReducer& reducer) { + FillIdentityEigenImpl(d, To32Bit(out), reducer); + } +}; + +template +struct ReduceFunctor> { + template + static void Reduce(OpKernelContext* ctx, OUT_T out, IN_T in, + const ReductionAxes& reduction_axes, + const Eigen::internal::MinReducer& reducer) { + ReduceImpl( + ctx, (T*)out.data(), (T*)in.data(), in.rank(), in.dimension(0), + in.rank() >= 2 ? in.dimension(1) : 1, + in.rank() >= 3 ? in.dimension(2) : 1, out.rank(), reduction_axes, + cub::Min(), std::numeric_limits::max()); + } + + template + static void FillIdentity(const GPUDevice& d, OUT_T out, + const Eigen::internal::MinReducer& reducer) { + FillIdentityEigenImpl(d, To32Bit(out), reducer); + } +}; + +template +struct ReduceFunctor> { + template + static void Reduce(OpKernelContext* ctx, OUT_T out, IN_T in, + const ReductionAxes& reduction_axes, + const Eigen::internal::ProdReducer& reducer) { + ReduceImpl, T*, T*, ReductionAxes>( + ctx, (T*)out.data(), (T*)in.data(), in.rank(), in.dimension(0), + in.rank() >= 2 ? in.dimension(1) : 1, + in.rank() >= 3 ? in.dimension(2) : 1, out.rank(), reduction_axes, + Prod(), T(1)); + } + + template + static void FillIdentity(const GPUDevice& d, OUT_T out, + const Eigen::internal::ProdReducer& reducer) { + FillIdentityEigenImpl(d, To32Bit(out), reducer); + } +}; + +template <> +struct ReduceFunctor { + template + static void Reduce(OpKernelContext* ctx, OUT_T out, IN_T in, + const ReductionAxes& reduction_axes, + const Eigen::internal::AndReducer& reducer) { + ReduceImpl( + ctx, (bool*)out.data(), (bool*)in.data(), in.rank(), in.dimension(0), + in.rank() >= 2 ? in.dimension(1) : 1, + in.rank() >= 3 ? in.dimension(2) : 1, out.rank(), reduction_axes, And(), + true); + } + + template + static void FillIdentity(const GPUDevice& d, OUT_T out, + const Eigen::internal::AndReducer& reducer) { + FillIdentityEigenImpl(d, To32Bit(out), reducer); + } +}; + +template <> +struct ReduceFunctor { + template + static void Reduce(OpKernelContext* ctx, OUT_T out, IN_T in, + const ReductionAxes& reduction_axes, + const Eigen::internal::OrReducer& reducer) { + ReduceImpl( + ctx, (bool*)out.data(), (bool*)in.data(), in.rank(), in.dimension(0), + in.rank() >= 2 ? in.dimension(1) : 1, + in.rank() >= 3 ? in.dimension(2) : 1, out.rank(), reduction_axes, Or(), + false); + } + + template + static void FillIdentity(const GPUDevice& d, OUT_T out, + const Eigen::internal::OrReducer& reducer) { + FillIdentityEigenImpl(d, To32Bit(out), reducer); + } +}; + // T: the data type // REDUCER: the reducer functor // NUM_AXES: the number of axes to reduce // IN_DIMS: the number of dimensions of the input tensor -#define DEFINE(T, REDUCER, IN_DIMS, NUM_AXES) \ - template void ReduceFunctor::Reduce( \ - const GPUDevice& d, TTypes::Tensor out, \ - TTypes::ConstTensor in, \ - const Eigen::array& reduction_axes, \ +#define DEFINE(T, REDUCER, IN_DIMS, NUM_AXES) \ + template void ReduceFunctor::Reduce( \ + OpKernelContext* ctx, TTypes::Tensor out, \ + TTypes::ConstTensor in, \ + const Eigen::array& reduction_axes, \ const REDUCER& reducer); #define DEFINE_IDENTITY(T, REDUCER) \ diff --git a/tensorflow/core/kernels/reduction_ops_gpu_kernels.h b/tensorflow/core/kernels/reduction_ops_gpu_kernels.h new file mode 100644 index 00000000000..ce471c672c7 --- /dev/null +++ b/tensorflow/core/kernels/reduction_ops_gpu_kernels.h @@ -0,0 +1,713 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#if GOOGLE_CUDA + +#define EIGEN_USE_GPU + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "external/cub_archive/cub/device/device_reduce.cuh" +#include "external/cub_archive/cub/device/device_segmented_reduce.cuh" +#include "external/cub_archive/cub/iterator/counting_input_iterator.cuh" +#include "external/cub_archive/cub/iterator/transform_input_iterator.cuh" +#include "external/cub_archive/cub/warp/warp_reduce.cuh" +#include "cuda/include/cuComplex.h" +#include "tensorflow/core/framework/numeric_types.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/kernels/reduction_ops.h" +#include "tensorflow/core/lib/core/bits.h" +#include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/cuda_kernel_helper.h" +#include "tensorflow/core/util/permutation_input_iterator.h" +#include "tensorflow/core/util/transform_output_iterator.h" + +#include + +namespace tensorflow { +namespace functor { + +typedef Eigen::GpuDevice GPUDevice; + +template +struct Prod { + __host__ __device__ T operator()(const T& a, const T& b) const { + return a * b; + } +}; + +// needed to work around a compiler bug in nvcc - it doesn't seem to like +// the overloaded multiply op for std::complex +template <> +struct Prod> { + __host__ __device__ std::complex operator()( + const std::complex& a, const std::complex& b) const { + auto result = cuCmulf(make_cuComplex(a.real(), a.imag()), + make_cuComplex(b.real(), b.imag())); + return std::complex(result.x, result.y); + } +}; + +template <> +struct Prod> { + __host__ __device__ std::complex operator()( + const std::complex& a, const std::complex& b) const { + auto result = cuCmul(make_cuDoubleComplex(a.real(), a.imag()), + make_cuDoubleComplex(b.real(), b.imag())); + return std::complex(result.x, result.y); + } +}; + +template +struct DividesBy { + T divisor; + + __host__ __device__ explicit DividesBy(T divisor) : divisor(divisor) {} + + __host__ __device__ outT operator()(const T& x) const { return x / divisor; } +}; + +// needed to work around a compiler bug in nvcc - it doesn't seem to like +// the overloaded ops for std::complex +template <> +struct DividesBy> { + cuFloatComplex divisor; + + __host__ __device__ explicit DividesBy(std::complex divisor) + : divisor(make_cuComplex(divisor.real(), divisor.imag())) {} + + // implements + __host__ __device__ std::complex operator()( + const std::complex& x) const { + auto result = cuCdivf(make_cuComplex(x.real(), x.imag()), divisor); + return std::complex(result.x, result.y); + } +}; + +template <> +struct DividesBy> { + cuDoubleComplex divisor; + + __host__ __device__ explicit DividesBy(std::complex divisor) + : divisor(make_cuDoubleComplex(divisor.real(), divisor.imag())) {} + + // implements + __host__ __device__ std::complex operator()( + const std::complex& x) const { + auto result = cuCdiv(make_cuDoubleComplex(x.real(), x.imag()), divisor); + return std::complex(result.x, result.y); + } +}; + +template <> +struct DividesBy { + float divisor; + + __host__ __device__ explicit DividesBy(float divisor) : divisor(divisor) {} + + __host__ __device__ Eigen::half operator()(const float& x) const { + return Eigen::half(x / divisor); + } +}; + +struct HalfToFloat { + __host__ __device__ float operator()(const Eigen::half& x) const { + return Eigen::half_impl::half_to_float(x); + } +}; + +struct FloatToHalf { + __host__ __device__ Eigen::half operator()(const float& x) const { + return Eigen::half_impl::float_to_half_rtne(x); + } +}; + +struct And { + __host__ __device__ bool operator()(const bool& a, const bool& b) const { + return a && b; + } +}; + +struct Or { + __host__ __device__ bool operator()(const bool& a, const bool& b) const { + return a || b; + } +}; + +// each block does a grid strided loop and reduces its values locally +// the case of one block is used for low latency small reductions to scalars +template +__global__ void BlockReduceKernel( + T in, outT out, int num_elems, Op op, + typename std::iterator_traits::value_type initVal) { + const int bid = blockIdx.x; + const int tid = threadIdx.x; + + const int gid = bid * blockDim.x + tid; + const int stride = blockDim.x * gridDim.x; + + typedef typename std::iterator_traits::value_type value_type; + + value_type sum = initVal; + if (gid < num_elems) { + sum = in[gid]; + for (int pos = gid + stride; pos < num_elems; pos += stride) { + sum = op(sum, in[pos]); + } + } + + typedef cub::BlockReduce BlockReduce; + + __shared__ typename BlockReduce::TempStorage temp_storage; + + // only include input values in the reduction + // + // elements: ----------------- + // grid: |====|====|====|====|====| + const int num_elements_to_reduce = + max(min(num_elems - bid * blockDim.x, num_threads), 0); + + sum = BlockReduce(temp_storage) + .template Reduce(sum, op, num_elements_to_reduce); + + if (tid == 0) out[bid] = sum; +} + +// maps a warp to each row +template +__global__ void RowReduceKernel( + T in, outT out, int num_rows, int num_cols, Op op, + typename std::iterator_traits::value_type initVal) { + typedef typename std::iterator_traits::value_type value_type; + const int row = (blockIdx.x * blockDim.x + threadIdx.x) / 32; + const int lane = threadIdx.x % 32; + + if (num_cols == 1) { + int gid = threadIdx.x + blockIdx.x * blockDim.x; + if (gid < num_rows) out[gid] = in[gid]; + return; + } + + value_type sum = initVal; + int col = lane; + + if (row < num_rows && col < num_cols) { + sum = in[row * num_cols + col]; + col += 32; + for (; col < num_cols; col += 32) { + sum = op(sum, in[row * num_cols + col]); + } + } + + typedef cub::WarpReduce WarpReduce; + + __shared__ typename WarpReduce::TempStorage temp_storage; + + sum = WarpReduce(temp_storage).template Reduce(sum, op, min(num_cols, 32)); + + if (row < num_rows && lane == 0) out[row] = sum; +} + +// Works only if there are <= 16 columns +// each warps sums over multiple rows at once +template +__global__ void ColumnReduceMax16ColumnsKernel( + T in, outT out, int num_rows, int num_cols, Op op, + typename std::iterator_traits::value_type initVal) { + typedef typename std::iterator_traits::value_type value_type; + int rows_per_warp = 32 / num_cols; + + const int lane = threadIdx.x % 32; + const int lane_row = lane / num_cols; + + const int start_row_warp = + rows_per_warp * (blockIdx.y * blockDim.y + threadIdx.y); + const int start_row_lane = start_row_warp + lane_row; + int row = start_row_lane; + int col = lane % num_cols; + + value_type sum = initVal; + if (row * num_cols + col < num_rows * num_cols) + sum = in[row * num_cols + col]; + + __shared__ value_type partial_sums[32][33]; + + row += rows_per_warp * gridDim.y * blockDim.y; + for (; row < num_rows; row += rows_per_warp * gridDim.y * blockDim.y) { + int global_pos = row * num_cols + col; + if (global_pos < (num_rows * num_cols)) + sum = op(sum, in[row * num_cols + col]); + } + + const int rows_in_this_warp = min(rows_per_warp, num_rows - start_row_warp); + // not the most efficient way to do this sum + for (int i = 1; i < rows_in_this_warp; ++i) { + value_type tmp = + cub::ShuffleIndex(sum, threadIdx.x + i * num_cols, 32, 0xffffffff); + if (lane < num_cols) sum = op(sum, tmp); + } + + if (lane < num_cols) partial_sums[lane][threadIdx.y] = sum; + + __syncthreads(); + + if (threadIdx.y == 0 && threadIdx.x < num_cols) { + value_type s = partial_sums[threadIdx.x][0]; + + if (blockDim.y > 1) { + for (int row = 1; row < blockDim.y; ++row) { + s = op(s, partial_sums[threadIdx.x][row]); + } + } + + out[col * gridDim.y + blockIdx.y] = s; + } +} + +// Maps each block to a column range 32 wide +template +__global__ void ColumnReduceKernel( + T in, outT out, int num_rows, int num_cols, Op op, + typename std::iterator_traits::value_type initVal) { + typedef typename std::iterator_traits::value_type value_type; + int row = blockIdx.y * blockDim.y + threadIdx.y; + int col = blockIdx.x * 32 + threadIdx.x; + + value_type sum = initVal; + if (row < num_rows && col < num_cols) + sum = in[row * num_cols + col]; + + __shared__ value_type partial_sums[32][33]; + + row += gridDim.y * blockDim.y; + + if (col < num_cols) { + for (; row < num_rows; row += gridDim.y * blockDim.y) { + sum = op(sum, in[row * num_cols + col]); + } + } + + partial_sums[threadIdx.x][threadIdx.y] = sum; + + __syncthreads(); + + if (threadIdx.y == 0 && col < num_cols) { + value_type s = partial_sums[threadIdx.x][0]; + + // only include input values in the reduction + // elem block_rows + // - = + // - = + // # # block boundary + // - = + // - = + // # # block boundary + // - = + // = + const int numRowsThisBlock = + min(blockDim.y, num_rows - blockIdx.y * blockDim.y); + + for (int row = 1; row < numRowsThisBlock; ++row) { + s = op(s, partial_sums[threadIdx.x][row]); + } + + out[col * gridDim.y + blockIdx.y] = s; + } +} + +// does multiple warp size segmented reductions in parallel +// segments cannot cross warp boundaries (mainly used for reducing the segments +// that come from the Max16Columns column reduction kernel) +template +__global__ void CleanupSegments( + T partial_sums, outT out, int num_rows, int num_cols, int segment_size, + Op op, typename std::iterator_traits::value_type initVal) { + typedef typename std::iterator_traits::value_type value_type; + const int tid = threadIdx.x + blockIdx.x * blockDim.x; + + value_type val = initVal; + if (tid < segment_size * num_cols) + val = partial_sums[tid]; + + typedef cub::WarpReduce WarpReduce; + + __shared__ typename WarpReduce::TempStorage temp_storage; + + const bool head_flag = (threadIdx.x % segment_size) == 0; + value_type sum = + WarpReduce(temp_storage).HeadSegmentedReduce(val, head_flag, op); + + if (head_flag && tid < segment_size * num_cols) { + out[tid / segment_size] = sum; + } +} + +// assigns one thread to a column +template +__global__ void ColumnReduceSimpleKernel(T in, outT out, int num_planes, + int num_rows, int num_cols, Op op) { + typedef typename std::iterator_traits::value_type value_type; + const int gid = threadIdx.x + blockIdx.x * blockDim.x; + const int elems_per_plane = num_rows * num_cols; + + const int plane = gid / num_cols; + const int col = gid % num_cols; + + if (plane >= num_planes) return; + + if (num_rows == 1) { + out[plane * elems_per_plane + col] = in[plane * elems_per_plane + col]; + return; + } + + value_type sum = op(in[plane * elems_per_plane + col], + in[plane * elems_per_plane + num_cols + col]); + for (int row = 2; row < num_rows; ++row) { + sum = op(sum, in[plane * elems_per_plane + row * num_cols + col]); + } + + out[plane * num_cols + col] = sum; +} + +struct RowOffset { + __host__ __device__ explicit RowOffset(const int& cols) : cols_(cols) {} + + __host__ __device__ int operator()(const int& x) const { return cols_ * x; } + + int cols_; +}; + +struct GatherOp { + __host__ __device__ GatherOp(const int& extent_x, const int& extent_y, + const int& extent_z, bool kOne) + : extent_x_(extent_x), + extent_y_(extent_y), + extent_z_(extent_z), + kOne_(kOne) { + if (kOne_) + group_size_ = extent_y_; + else + group_size_ = extent_x_ * extent_z_; + } + + __host__ __device__ int operator()(const int& ind) const { + const int group = kOne_ ? ind / group_size_ : ind % group_size_; + const int offset = kOne_ ? ind % group_size_ : ind / group_size_; + + const int x = group / extent_z_; + const int z = group % extent_z_; + + return x * extent_y_ * extent_z_ + z + offset * extent_z_; + } + + int extent_x_; + int extent_y_; + int extent_z_; + bool kOne_; + int group_size_; +}; + +template +void LaunchScalarReduction(OpKernelContext* ctx, OUT_T out, IN_T in, + int in_size, Op op, T init, + const cudaStream_t& cu_stream) { + // handle situations where low latency is important better than CUB + if (in_size <= 4096) { + const int num_blocks = 1; + const int num_threads = 256; + BlockReduceKernel + <<>>(in, out, in_size, op, init); + return; + } else if (in_size <= 1 << 19) { + const int num_threads = 256; + const int num_blocks = min(32, Eigen::divup(in_size, num_threads)); + // it seems like tailoring this to the GPU + // would be more effective, but all attempts + // at making this a multiple of the number of + // multiprocessors have lead to lower perf + // in general + // TODO(eriche) investigate this more + + Tensor temp_storage; + OP_REQUIRES_OK( + ctx, + ctx->allocate_temp( + DT_INT8, TensorShape({static_cast(num_blocks * sizeof(T))}), + &temp_storage)); + + BlockReduceKernel + <<>>( + in, (T*)temp_storage.flat().data(), in_size, op, init); + + // take care that we only reduce blocks that had some valid elements in them + // TODO(eriche): CUB currently has a bug in HeadSegmentedReduce that + // requires it to be used with a full warp. Can reduce 32 -> num_blocks + // when this is fixed. + CleanupSegments<<<1, 32, 0, cu_stream>>>( + (T*)temp_storage.flat().data(), out, 1, 1, num_blocks, op, + init); + return; + } + std::size_t temp_storage_bytes = 0; + + Tensor temp_storage; + // written as a loop because it reduces clutter + // first pass allocates memory, second launches kernel(s) + for (int i = 0; i < 2; ++i) { + auto success = cub::DeviceReduce::Reduce( + i == 0 ? nullptr : temp_storage.flat().data(), + temp_storage_bytes, in, out, in_size, op, init, cu_stream); + + OP_REQUIRES( + ctx, success == 0, + errors::Internal("CUB reduce error", cudaGetErrorString(success))); + + if (i == 0) + OP_REQUIRES_OK( + ctx, + ctx->allocate_temp( + DT_INT8, TensorShape({static_cast(temp_storage_bytes)}), + &temp_storage)); + } +} + +template +void LaunchRowReduction(OpKernelContext* ctx, OUT_T out, IN_T in, int num_rows, + int num_cols, Op op, T init, + const cudaStream_t& cu_stream) { + if (num_cols < 1024) { + const int threads_per_block = 128; + const int warps_per_block = threads_per_block / 32; + int num_blocks = (num_rows + warps_per_block - 1) / warps_per_block; + + RowReduceKernel<<>>( + in, out, num_rows, num_cols, op, init); + return; + } + + // setup segment offsets with counting and transform iterator + RowOffset row_offset_op(num_cols); + cub::CountingInputIterator counting_iter(0); + cub::TransformInputIterator> + transform_iter(counting_iter, row_offset_op); + + std::size_t temp_storage_bytes = 0; + Tensor temp_storage; + for (int i = 0; i < 2; ++i) { + auto success = cub::DeviceSegmentedReduce::Reduce( + i == 0 ? nullptr : temp_storage.flat().data(), + temp_storage_bytes, in, out, num_rows, transform_iter, + transform_iter + 1, op, init, cu_stream); + + OP_REQUIRES(ctx, success == 0, + errors::Internal("CUB segmented reduce error", + cudaGetErrorString(success))); + + if (i == 0) + OP_REQUIRES_OK( + ctx, + ctx->allocate_temp( + DT_INT8, TensorShape({static_cast(temp_storage_bytes)}), + &temp_storage)); + } +} + +template +void LaunchColumnReduction_LTE16Cols(OpKernelContext* ctx, OUT_T out, IN_T in, + int extent_x, int extent_y, Op op, T init, + const cudaStream_t& cu_stream) { + int rows_per_warp = 32 / extent_y; + dim3 block_dim(32, min(Eigen::divup(extent_x, rows_per_warp), 32), 1); + dim3 grid_dim(1, + Eigen::divup(static_cast(extent_x), + rows_per_warp * block_dim.y), + 1); + + grid_dim.y = min((int)grid_dim.y, 32); + + if (grid_dim.y > 2 && grid_dim.y < 32) { + int log2 = Log2Floor(grid_dim.y); + grid_dim.y = 1 << log2; + } + + if (grid_dim.y == 1) { + ColumnReduceMax16ColumnsKernel<<>>( + in, out, extent_x, extent_y, op, init); + } else { + Tensor temp_storage; + OP_REQUIRES_OK(ctx, + ctx->allocate_temp(DT_INT8, + TensorShape({static_cast( + sizeof(T) * extent_y * grid_dim.y)}), + &temp_storage)); + ColumnReduceMax16ColumnsKernel<<>>( + in, (T*)temp_storage.flat().data(), extent_x, extent_y, op, + init); + + dim3 new_grid_dim((grid_dim.y * extent_y + 31) / 32, 1, 1); + dim3 num_threads(128, 1, 1); + CleanupSegments<<>>( + (T*)temp_storage.flat().data(), out, extent_x, extent_y, + grid_dim.y, op, init); + } +} + +template +void LaunchColumnReduction_LTE4096Cols(OpKernelContext* ctx, OUT_T out, IN_T in, + int extent_x, int extent_y, Op op, + T init, const cudaStream_t& cu_stream) { + dim3 block_dim(32, min(extent_x, 32), 1); + dim3 grid_dim((extent_y + 31) / 32, 1, 1); + + if (grid_dim.x < 16) grid_dim.y = min((extent_x + 31) / 32, 32); + + if (grid_dim.y > 2 && grid_dim.y < 32) { + int log2 = Log2Floor(grid_dim.y); + grid_dim.y = 1 << log2; + } + + if (grid_dim.y == 1) { + ColumnReduceKernel<<>>( + in, out, extent_x, extent_y, op, init); + } else { + Tensor temp_storage; + OP_REQUIRES_OK(ctx, + ctx->allocate_temp(DT_INT8, + TensorShape({static_cast( + sizeof(T) * extent_y * grid_dim.y)}), + &temp_storage)); + + ColumnReduceKernel<<>>( + in, (T*)temp_storage.flat().data(), extent_x, extent_y, op, + init); + + dim3 new_grid_dim((grid_dim.y * extent_y + 31) / 32, 1, 1); + dim3 num_threads(128, 1, 1); + CleanupSegments<<>>( + (T*)temp_storage.flat().data(), out, extent_x, extent_y, + grid_dim.y, op, init); + } +} + +template +void LaunchColumnReduction(OpKernelContext* ctx, OUT_T out, IN_T in, + int extent_x, int extent_y, Op op, T init, + const cudaStream_t& cu_stream) { + if (extent_y <= 16) { + LaunchColumnReduction_LTE16Cols(ctx, out, in, extent_x, extent_y, op, init, + cu_stream); + } else if (extent_y <= 4096) { + LaunchColumnReduction_LTE4096Cols(ctx, out, in, extent_x, extent_y, op, + init, cu_stream); + } else { + int threads_per_block = 128; + int num_blocks = Eigen::divup(extent_y, threads_per_block); + + ColumnReduceSimpleKernel<<>>( + in, out, 1, extent_x, extent_y, op); + } +} + +template +void Launch3DYReduction(OpKernelContext* ctx, OUT_T out, IN_T in, int extent_x, + int extent_y, int extent_z, Op op, T init, + const cudaStream_t& cu_stream) { + int threads_per_block = 128; + int num_blocks = + (extent_x * extent_z + threads_per_block - 1) / threads_per_block; + + // TODO(eriche): this won't be very good in the case of small x + // small z and large y. + ColumnReduceSimpleKernel<<>>( + in, out, extent_x, extent_y, extent_z, op); +} + +template +void Launch3DXZReduction(OpKernelContext* ctx, OUT_T out, IN_T in, int extent_x, + int extent_y, int extent_z, Op op, T init, + const cudaStream_t& cu_stream) { + // setup segment offsets with counting and transform iterator + RowOffset row_offset_op(extent_x * extent_z); + cub::CountingInputIterator counting_iter(0); + cub::TransformInputIterator> + transform_iter(counting_iter, row_offset_op); + + GatherOp gather_op(extent_x, extent_y, extent_z, false); + typedef cub::TransformInputIterator> + gatherIterType; + gatherIterType gather_iter(counting_iter, gather_op); + + PermutationInputIterator permute_iter(in, + gather_iter); + + std::size_t temp_storage_bytes = 0; + Tensor temp_storage; + + for (int i = 0; i < 2; ++i) { + auto success = cub::DeviceSegmentedReduce::Reduce( + i == 0 ? nullptr : temp_storage.flat().data(), + temp_storage_bytes, permute_iter, out, extent_y, transform_iter, + transform_iter + 1, op, init, cu_stream); + + OP_REQUIRES(ctx, success == 0, + errors::Internal("CUB segmented reduce error", + cudaGetErrorString(success))); + + if (i == 0) + OP_REQUIRES_OK( + ctx, + ctx->allocate_temp( + DT_INT8, TensorShape({static_cast(temp_storage_bytes)}), + &temp_storage)); + } +} + +template +void ReduceImpl(OpKernelContext* ctx, OUT_T out, IN_T in, int in_rank, + int in_dim0, int in_dim1, int in_dim2, int out_rank, + const ReductionAxes& reduction_axes, Op op, T init) { + const cudaStream_t& cu_stream = GetCudaStream(ctx); + if (out_rank == 0) { + const int in_size = in_dim0 * in_dim1 * in_dim2; + LaunchScalarReduction(ctx, out, in, in_size, op, init, cu_stream); + } else if (in_rank == 2 && out_rank == 1 && + reduction_axes[0] == 1) { // row reduction + LaunchRowReduction(ctx, out, in, in_dim0, in_dim1, op, init, cu_stream); + } else if (in_rank == 2 && out_rank == 1 && + reduction_axes[0] == 0) { // column reduction + LaunchColumnReduction(ctx, out, in, in_dim0, in_dim1, op, init, cu_stream); + } else if (in_rank == 3 && out_rank == 2 && reduction_axes[0] == 1) { + Launch3DYReduction(ctx, out, in, in_dim0, in_dim1, in_dim2, op, init, + cu_stream); + } else if (in_rank == 3 && out_rank == 1 && reduction_axes[0] == 0 && + reduction_axes[1] == 2) { + Launch3DXZReduction(ctx, out, in, in_dim0, in_dim1, in_dim2, op, init, + cu_stream); + } else { + std::stringstream ss; + ss << "Invalid reduction requested: in_rank, out_rank, axes " << in_rank + << " " << out_rank; + if (out_rank == 1) ss << " " << reduction_axes[0]; + if (out_rank == 2) ss << " " << reduction_axes[1]; + LOG(FATAL) << ss.str(); + } +} + +} // namespace functor +} // namespace tensorflow + +#endif diff --git a/tensorflow/core/kernels/reduction_ops_test.cc b/tensorflow/core/kernels/reduction_ops_test.cc index 9cdebdd4f23..9bbe993a2f9 100644 --- a/tensorflow/core/kernels/reduction_ops_test.cc +++ b/tensorflow/core/kernels/reduction_ops_test.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h" #include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/types.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/test_benchmark.h" @@ -22,14 +23,59 @@ namespace tensorflow { // Creates a Graph which "reduce"s a 3D float tensor of "num" elements // into a scalar. -static Graph* ToScalar(const string& reduce, int num) { - Graph* g = new Graph(OpRegistry::Global()); - Tensor data(DT_FLOAT, TensorShape({64, 64, num / (64 * 64)})); - data.flat().setRandom(); - Tensor axes(DT_INT32, TensorShape({3})); +template +static Graph* ToScalar(const string& reduce, int num_x, int num_y) { + auto* g = new Graph(OpRegistry::Global()); + Tensor data(DataTypeToEnum::value, TensorShape({num_x, num_y})); + data.flat().setRandom(); + Tensor axes(DT_INT32, TensorShape({2})); axes.flat()(0) = 0; axes.flat()(1) = 1; - axes.flat()(2) = 2; + test::graph::Reduce(g, reduce, test::graph::Constant(g, data), + test::graph::Constant(g, axes)); + return g; +} + +static Graph* ColReduce(const string& reduce, int num_x, int num_y) { + auto* g = new Graph(OpRegistry::Global()); + Tensor data(DT_FLOAT, TensorShape({num_x, num_y})); + data.flat().setRandom(); + Tensor axes(DT_INT32, TensorShape({1})); + axes.flat()(0) = 0; + test::graph::Reduce(g, reduce, test::graph::Constant(g, data), + test::graph::Constant(g, axes)); + return g; +} + +static Graph* RowReduce(const string& reduce, int num_x, int num_y) { + auto* g = new Graph(OpRegistry::Global()); + Tensor data(DT_FLOAT, TensorShape({num_x, num_y})); + data.flat().setRandom(); + Tensor axes(DT_INT32, TensorShape({1})); + axes.flat()(0) = 1; + test::graph::Reduce(g, reduce, test::graph::Constant(g, data), + test::graph::Constant(g, axes)); + return g; +} + +static Graph* ThreeDYReduce(const string& reduce, int num_y, int num_z) { + auto* g = new Graph(OpRegistry::Global()); + Tensor data(DT_FLOAT, TensorShape({4, num_y, num_z})); + data.flat().setRandom(); + Tensor axes(DT_INT32, TensorShape({1})); + axes.flat()(0) = 1; + test::graph::Reduce(g, reduce, test::graph::Constant(g, data), + test::graph::Constant(g, axes)); + return g; +} + +static Graph* ThreeDXZReduce(const string& reduce, int num_y, int num_z) { + auto* g = new Graph(OpRegistry::Global()); + Tensor data(DT_FLOAT, TensorShape({4, num_y, num_z})); + data.flat().setRandom(); + Tensor axes(DT_INT32, TensorShape({2})); + axes.flat()(0) = 0; + axes.flat()(1) = 2; test::graph::Reduce(g, reduce, test::graph::Constant(g, data), test::graph::Constant(g, axes)); return g; @@ -37,51 +83,100 @@ static Graph* ToScalar(const string& reduce, int num) { // Creates a bench which reduces a 3D tensor with total "num" floats // into a scalar on a "device". Runs the bench for "iters" times. +template static void ReduceToScalar(int iters, const string& device, - const string& reduce, int num) { - testing::ItemsProcessed(static_cast(iters) * num); - testing::BytesProcessed(static_cast(iters) * num * sizeof(float)); - test::Benchmark(device, ToScalar(reduce, num)).Run(iters); + const string& reduce, int num_x, int num_y) { + testing::ItemsProcessed(static_cast(iters) * num_x * num_y); + testing::BytesProcessed(static_cast(iters) * num_x * num_y * + sizeof(T)); + test::Benchmark(device, ToScalar(reduce, num_x, num_y)).Run(iters); } -static void BM_Sum3DToScalarCPU(int iters, int num) { - ReduceToScalar(iters, "cpu", "Sum", num); +static void DoRowReduce(int iters, const string& device, const string& reduce, + int num_x, int num_y) { + testing::ItemsProcessed(static_cast(iters) * num_x * num_y); + testing::BytesProcessed(static_cast(iters) * num_x * num_y * + sizeof(float)); + test::Benchmark(device, RowReduce(reduce, num_x, num_y)).Run(iters); } -BENCHMARK(BM_Sum3DToScalarCPU)->Range(1 << 13, 1 << 20); -static void BM_Max3DToScalarCPU(int iters, int num) { - ReduceToScalar(iters, "cpu", "Max", num); +static void DoColReduce(int iters, const string& device, const string& reduce, + int num_x, int num_y) { + testing::ItemsProcessed(static_cast(iters) * num_x * num_y); + testing::BytesProcessed(static_cast(iters) * num_x * num_y * + sizeof(float)); + test::Benchmark(device, ColReduce(reduce, num_x, num_y)).Run(iters); } -BENCHMARK(BM_Max3DToScalarCPU)->Range(1 << 13, 1 << 20); -static void BM_Prod3DToScalarCPU(int iters, int num) { - ReduceToScalar(iters, "cpu", "Prod", num); +static void Do3DYReduce(int iters, const string& device, const string& reduce, + int num_x, int num_y) { + testing::ItemsProcessed(static_cast(iters) * num_x * num_y); + testing::BytesProcessed(static_cast(iters) * num_x * num_y * + sizeof(float)); + test::Benchmark(device, ThreeDYReduce(reduce, num_x, num_y)).Run(iters); } -BENCHMARK(BM_Prod3DToScalarCPU)->Range(1 << 13, 1 << 20); -static void BM_Mean3DToScalarCPU(int iters, int num) { - ReduceToScalar(iters, "cpu", "Mean", num); +static void Do3DXZReduce(int iters, const string& device, const string& reduce, + int num_x, int num_y) { + testing::ItemsProcessed(static_cast(iters) * num_x * num_y); + testing::BytesProcessed(static_cast(iters) * num_x * num_y * + sizeof(float)); + test::Benchmark(device, ThreeDXZReduce(reduce, num_x, num_y)).Run(iters); } -BENCHMARK(BM_Mean3DToScalarCPU)->Range(1 << 13, 1 << 20); -static void BM_Sum3DToScalarGPU(int iters, int num) { - ReduceToScalar(iters, "gpu", "Sum", num); +static void BM_Sum2DToScalarGPU(int iters, int num_x, int num_y) { + ReduceToScalar(iters, "gpu", "Sum", num_x, num_y); } -BENCHMARK(BM_Sum3DToScalarGPU)->Range(1 << 13, 1 << 20); +BENCHMARK(BM_Sum2DToScalarGPU)->RangePair(1, 8192, 1, 8192); -static void BM_Max3DToScalarGPU(int iters, int num) { - ReduceToScalar(iters, "gpu", "Max", num); +static void BM_Sum2DToScalarGPUComplex(int iters, int num_x, int num_y) { + ReduceToScalar>(iters, "gpu", "Sum", num_x, num_y); } -BENCHMARK(BM_Max3DToScalarGPU)->Range(1 << 13, 1 << 20); +BENCHMARK(BM_Sum2DToScalarGPUComplex)->RangePair(1, 8192, 1, 8192); -static void BM_Prod3DToScalarGPU(int iters, int num) { - ReduceToScalar(iters, "gpu", "Prod", num); +static void BM_Sum2DToScalarGPUHalf(int iters, int num_x, int num_y) { + ReduceToScalar(iters, "gpu", "Sum", num_x, num_y); } -BENCHMARK(BM_Prod3DToScalarGPU)->Range(1 << 13, 1 << 20); +BENCHMARK(BM_Sum2DToScalarGPUHalf)->RangePair(1, 8192, 1, 8192); -static void BM_Mean3DToScalarGPU(int iters, int num) { - ReduceToScalar(iters, "gpu", "Mean", num); +static void BM_Sum2DRowReduceGPU(int iters, int num_x, int num_y) { + DoRowReduce(iters, "gpu", "Sum", num_x, num_y); } -BENCHMARK(BM_Mean3DToScalarGPU)->Range(1 << 13, 1 << 20); +BENCHMARK(BM_Sum2DRowReduceGPU)->RangePair(1, 8192, 1, 8192); + +static void BM_Sum2DColumnReduceGPU(int iters, int num_x, int num_y) { + DoColReduce(iters, "gpu", "Sum", num_x, num_y); +} +BENCHMARK(BM_Sum2DColumnReduceGPU)->RangePair(1, 8192, 1, 8192); + +static void BM_Sum3DYReduceGPU(int iters, int num_x, int num_y) { + Do3DYReduce(iters, "gpu", "Sum", num_x, num_y); +} +BENCHMARK(BM_Sum3DYReduceGPU)->RangePair(64, 4096, 64, 4096); + +static void BM_Sum3DXZReduceGPU(int iters, int num_x, int num_y) { + Do3DXZReduce(iters, "gpu", "Sum", num_x, num_y); +} +BENCHMARK(BM_Sum3DXZReduceGPU)->RangePair(64, 4096, 64, 4096); + +static void BM_Mean2DToScalarGPU(int iters, int num_x, int num_y) { + ReduceToScalar(iters, "gpu", "Mean", num_x, num_y); +} +BENCHMARK(BM_Mean2DToScalarGPU)->RangePair(2048, 8192, 2048, 8192); + +static void BM_Max2DToScalarGPU(int iters, int num_x, int num_y) { + ReduceToScalar(iters, "gpu", "Max", num_x, num_y); +} +BENCHMARK(BM_Max2DToScalarGPU)->RangePair(2048, 8192, 2048, 8192); + +static void BM_Min2DToScalarGPU(int iters, int num_x, int num_y) { + ReduceToScalar(iters, "gpu", "Min", num_x, num_y); +} +BENCHMARK(BM_Min2DToScalarGPU)->RangePair(2048, 8192, 2048, 8192); + +static void BM_Bool2DToScalarGPU(int iters, int num_x, int num_y) { + ReduceToScalar(iters, "gpu", "All", num_x, num_y); +} +BENCHMARK(BM_Bool2DToScalarGPU)->RangePair(2048, 8192, 2048, 8192); } // end namespace tensorflow diff --git a/tensorflow/core/kernels/save_restore_tensor.cc b/tensorflow/core/kernels/save_restore_tensor.cc index 80d49017406..6b06cf650a8 100644 --- a/tensorflow/core/kernels/save_restore_tensor.cc +++ b/tensorflow/core/kernels/save_restore_tensor.cc @@ -216,9 +216,12 @@ void RestoreTensor(OpKernelContext* context, if (output_shape.num_elements() == 0) return; -#define READER_COPY(T) \ - case DataTypeToEnum::value: \ - reader->CopySliceData(tensor_name, slice_to_load, t->flat().data()); \ +#define READER_COPY(T) \ + case DataTypeToEnum::value: \ + OP_REQUIRES(context, \ + reader->CopySliceData(tensor_name, slice_to_load, \ + t->flat().data()), \ + errors::InvalidArgument("Error copying slice data")); \ break; switch (type) { diff --git a/tensorflow/core/kernels/shape_op_test.cc b/tensorflow/core/kernels/shape_op_test.cc index a305598fe2b..96eaa4ac75b 100644 --- a/tensorflow/core/kernels/shape_op_test.cc +++ b/tensorflow/core/kernels/shape_op_test.cc @@ -101,7 +101,7 @@ TEST_F(ShapeOpTest, Simple) { Tensor variant_tensor(DT_VARIANT, TensorShape({1})); Status s = session.Run({{input, variant_tensor}}, {shape_output}, &outputs); EXPECT_FALSE(s.ok()); - ExpectHasError(s, "Shape of non-scalar Variant not supported."); + ExpectHasError(s, "Shape of non-unary Variant not supported."); } { diff --git a/tensorflow/core/kernels/shape_ops.h b/tensorflow/core/kernels/shape_ops.h index 0c39d46aeaf..ac607f4e8b8 100644 --- a/tensorflow/core/kernels/shape_ops.h +++ b/tensorflow/core/kernels/shape_ops.h @@ -35,7 +35,7 @@ inline Status GetRegularOrVariantShape(OpKernelContext* ctx, int input_index, if (ctx->input_dtype(0) == DT_VARIANT) { if (inp.dims() != 0) { return errors::InvalidArgument( - "Shape of non-scalar Variant not supported."); + "Shape of non-unary Variant not supported."); } TF_RETURN_IF_ERROR(GetUnaryVariantShape(inp, shape)); } else { diff --git a/tensorflow/core/kernels/summary_interface.cc b/tensorflow/core/kernels/summary_interface.cc new file mode 100644 index 00000000000..19e0f702f9f --- /dev/null +++ b/tensorflow/core/kernels/summary_interface.cc @@ -0,0 +1,432 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/resource_mgr.h" +#include "tensorflow/core/framework/summary.pb.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/kernels/summary_interface.h" +#include "tensorflow/core/lib/histogram/histogram.h" +#include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/png/png_io.h" +#include "tensorflow/core/lib/wav/wav_io.h" +#include "tensorflow/core/util/event.pb.h" +#include "tensorflow/core/util/events_writer.h" + +namespace tensorflow { +namespace { +template +Status TensorValueAt(Tensor t, int index, T* out) { + switch (t.dtype()) { + case DT_FLOAT: + *out = t.flat()(index); + break; + case DT_DOUBLE: + *out = t.flat()(index); + break; + case DT_HALF: + *out = T(t.flat()(index)); + break; + case DT_INT32: + *out = t.flat()(index); + break; + case DT_UINT8: + *out = t.flat()(index); + break; + case DT_INT16: + *out = t.flat()(index); + break; + case DT_INT8: + *out = t.flat()(index); + break; + case DT_BOOL: + *out = t.flat()(index); + break; + case DT_INT64: + *out = t.flat()(index); + break; + default: + return errors::Unimplemented("Scalar summary for dtype ", + DataTypeString(t.dtype()), + " is not supported."); + } + return Status::OK(); +} + +typedef Eigen::Tensor Uint8Image; + +// Add the sequence of images specified by ith_image to the summary. +// +// Factoring this loop out into a helper function lets ith_image behave +// differently in the float and uint8 cases: the float case needs a temporary +// buffer which can be shared across calls to ith_image, but the uint8 case +// does not. +Status AddImages(const string& tag, int max_images, int batch_size, int w, + int h, int depth, + const std::function& ith_image, Summary* s) { + const int N = std::min(max_images, batch_size); + for (int i = 0; i < N; ++i) { + Summary::Value* v = s->add_value(); + // The tag depends on the number of requested images (not the number + // produced.) + // + // Note that later on avisu uses "/" to figure out a consistent naming + // convention for display, so we append "/image" to guarantee that the + // image(s) won't be displayed in the global scope with no name. + if (max_images > 1) { + v->set_tag(strings::StrCat(tag, "/image/", i)); + } else { + v->set_tag(strings::StrCat(tag, "/image")); + } + + auto image = ith_image(i); + Summary::Image* si = v->mutable_image(); + si->set_height(h); + si->set_width(w); + si->set_colorspace(depth); + const int channel_bits = 8; + const int compression = -1; // Use zlib default + if (!png::WriteImageToBuffer(image.data(), w, h, w * depth, depth, + channel_bits, compression, + si->mutable_encoded_image_string(), nullptr)) { + return errors::Internal("PNG encoding failed"); + } + } + return Status::OK(); +} + +template +void NormalizeFloatImage(int hw, int depth, + typename TTypes::ConstMatrix values, + typename TTypes::ConstVec bad_color, + Uint8Image* image) { + if (!image->size()) return; // Nothing to do for empty images + + // Rescale the image to uint8 range. + // + // We are trying to generate an RGB image from a float/half tensor. We do + // not have any info about the expected range of values in the tensor + // but the generated image needs to have all RGB values within [0, 255]. + // + // We use two different algorithms to generate these values. If the + // tensor has only positive values we scale them all by 255/max(values). + // If the tensor has both negative and positive values we scale them by + // the max of their absolute values and center them around 127. + // + // This works for most cases, but does not respect the relative dynamic + // range across different instances of the tensor. + + // Compute min and max ignoring nonfinite pixels + float image_min = std::numeric_limits::infinity(); + float image_max = -image_min; + for (int i = 0; i < hw; i++) { + bool finite = true; + for (int j = 0; j < depth; j++) { + if (!Eigen::numext::isfinite(values(i, j))) { + finite = false; + break; + } + } + if (finite) { + for (int j = 0; j < depth; j++) { + float value(values(i, j)); + image_min = std::min(image_min, value); + image_max = std::max(image_max, value); + } + } + } + + // Pick an affine transform into uint8 + const float kZeroThreshold = 1e-6; + T scale, offset; + if (image_min < 0) { + float max_val = std::max(std::abs(image_min), std::abs(image_max)); + scale = T(max_val < kZeroThreshold ? 0.0f : 127.0f / max_val); + offset = T(128.0f); + } else { + scale = T(image_max < kZeroThreshold ? 0.0f : 255.0f / image_max); + offset = T(0.0f); + } + + // Transform image, turning nonfinite values to bad_color + for (int i = 0; i < hw; i++) { + bool finite = true; + for (int j = 0; j < depth; j++) { + if (!Eigen::numext::isfinite(values(i, j))) { + finite = false; + break; + } + } + if (finite) { + image->chip<0>(i) = + (values.template chip<0>(i) * scale + offset).template cast(); + } else { + image->chip<0>(i) = bad_color; + } + } +} + +template +Status NormalizeAndAddImages(const Tensor& tensor, int max_images, int h, int w, + int hw, int depth, int batch_size, + const string& base_tag, Tensor bad_color_tensor, + Summary* s) { + // For float and half images, nans and infs are replaced with bad_color. + if (bad_color_tensor.dim_size(0) < depth) { + return errors::InvalidArgument( + "expected depth <= bad_color.size, got depth = ", depth, + ", bad_color.size = ", bad_color_tensor.dim_size(0)); + } + auto bad_color_full = bad_color_tensor.vec(); + typename TTypes::ConstVec bad_color(bad_color_full.data(), depth); + + // Float images must be scaled and translated. + Uint8Image image(hw, depth); + auto ith_image = [&tensor, &image, bad_color, batch_size, hw, depth](int i) { + auto tensor_eigen = tensor.template shaped({batch_size, hw, depth}); + typename TTypes::ConstMatrix values( + &tensor_eigen(i, 0, 0), Eigen::DSizes(hw, depth)); + NormalizeFloatImage(hw, depth, values, bad_color, &image); + return image; + }; + return AddImages(base_tag, max_images, batch_size, w, h, depth, ith_image, s); +} + +} // namespace + +class SummaryWriterImpl : public SummaryWriterInterface { + public: + SummaryWriterImpl(int max_queue, int flush_millis) + : SummaryWriterInterface(), + max_queue_(max_queue), + flush_millis_(flush_millis) {} + + Status Initialize(const string& logdir, const string& filename_suffix, + Env* env) { + Status is_dir = env->IsDirectory(logdir); + if (!is_dir.ok()) { + if (is_dir.code() != tensorflow::error::NOT_FOUND) { + return is_dir; + } + TF_RETURN_IF_ERROR(env->CreateDir(logdir)); + } + mutex_lock ml(mu_); + events_writer_ = + xla::MakeUnique(io::JoinPath(logdir, "events")); + if (!events_writer_->InitWithSuffix(filename_suffix)) { + return errors::Unknown("Could not initialize events writer."); + } + last_flush_ = Env::Default()->NowMicros(); + return Status::OK(); + } + + Status Flush() override { + mutex_lock ml(mu_); + return InternalFlush(); + } + + ~SummaryWriterImpl() override { + (void)Flush(); // Ignore errors. + } + + Status WriteTensor(int64 global_step, Tensor t, const string& tag, + const string& serialized_metadata) override { + Summary s; + Summary::Value* v = s.add_value(); + t.AsProtoTensorContent(v->mutable_tensor()); + v->set_tag(tag); + v->mutable_metadata()->ParseFromString(serialized_metadata); + return Enqueue(global_step, s); + } + + Status WriteScalar(int64 global_step, Tensor t, const string& tag) override { + Summary s; + Summary::Value* v = s.add_value(); + v->set_tag(tag); + float value; + TF_RETURN_IF_ERROR(TensorValueAt(t, 0, &value)); + v->set_simple_value(value); + return Enqueue(global_step, s); + } + + Status WriteHistogram(int64 global_step, Tensor t, + const string& tag) override { + Summary s; + Summary::Value* v = s.add_value(); + v->set_tag(tag); + histogram::Histogram histo; + for (int64 i = 0; i < t.NumElements(); i++) { + double double_val; + TF_RETURN_IF_ERROR(TensorValueAt(t, i, &double_val)); + if (Eigen::numext::isnan(double_val)) { + return errors::InvalidArgument("Nan in summary histogram for: ", tag); + } else if (Eigen::numext::isinf(double_val)) { + return errors::InvalidArgument("Infinity in summary histogram for: ", + tag); + } + histo.Add(double_val); + } + + histo.EncodeToProto(v->mutable_histo(), false /* Drop zero buckets */); + return Enqueue(global_step, s); + } + + Status WriteImage(int64 global_step, Tensor tensor, const string& tag, + int max_images, Tensor bad_color) override { + if (!(tensor.dims() == 4 && + (tensor.dim_size(3) == 1 || tensor.dim_size(3) == 3 || + tensor.dim_size(3) == 4))) { + return errors::InvalidArgument( + "Tensor must be 4-D with last dim 1, 3, or 4, not ", + tensor.shape().DebugString()); + } + if (!(tensor.dim_size(0) < (1LL << 31) && + tensor.dim_size(1) < (1LL << 31) && + tensor.dim_size(2) < (1LL << 31) && + (tensor.dim_size(1) * tensor.dim_size(2)) < (1LL << 29))) { + return errors::InvalidArgument("Tensor too large for summary ", + tensor.shape().DebugString()); + } + Summary s; + // The casts and h * w cannot overflow because of the limits above. + const int batch_size = static_cast(tensor.dim_size(0)); + const int h = static_cast(tensor.dim_size(1)); + const int w = static_cast(tensor.dim_size(2)); + const int hw = h * w; // Compact these two dims for simplicity + const int depth = static_cast(tensor.dim_size(3)); + if (tensor.dtype() == DT_UINT8) { + // For uint8 input, no normalization is necessary + auto ith_image = [&tensor, batch_size, hw, depth](int i) { + auto values = tensor.shaped({batch_size, hw, depth}); + return typename TTypes::ConstMatrix( + &values(i, 0, 0), Eigen::DSizes(hw, depth)); + }; + TF_RETURN_IF_ERROR( + AddImages(tag, max_images, batch_size, w, h, depth, ith_image, &s)); + } else if (tensor.dtype() == DT_HALF) { + TF_RETURN_IF_ERROR(NormalizeAndAddImages( + tensor, max_images, h, w, hw, depth, batch_size, tag, bad_color, &s)); + } else if (tensor.dtype() == DT_FLOAT) { + TF_RETURN_IF_ERROR(NormalizeAndAddImages( + tensor, max_images, h, w, hw, depth, batch_size, tag, bad_color, &s)); + } else { + return errors::InvalidArgument( + "Only DT_INT8, DT_HALF, and DT_FLOAT images are supported. Got ", + DataTypeString(tensor.dtype())); + } + + return Enqueue(global_step, s); + } + + Status WriteAudio(int64 global_step, Tensor tensor, const string& tag, + int max_outputs, float sample_rate) override { + if (sample_rate <= 0.0f) { + return errors::InvalidArgument("sample_rate must be > 0"); + } + const int batch_size = tensor.dim_size(0); + const int64 length_frames = tensor.dim_size(1); + const int64 num_channels = + tensor.dims() == 2 ? 1 : tensor.dim_size(tensor.dims() - 1); + Summary s; + const int N = std::min(max_outputs, batch_size); + for (int i = 0; i < N; ++i) { + Summary::Value* v = s.add_value(); + if (max_outputs > 1) { + v->set_tag(strings::StrCat(tag, "/audio/", i)); + } else { + v->set_tag(strings::StrCat(tag, "/audio")); + } + + Summary::Audio* sa = v->mutable_audio(); + sa->set_sample_rate(sample_rate); + sa->set_num_channels(num_channels); + sa->set_length_frames(length_frames); + sa->set_content_type("audio/wav"); + + auto values = + tensor.shaped({batch_size, length_frames, num_channels}); + auto channels_by_frames = typename TTypes::ConstMatrix( + &values(i, 0, 0), + Eigen::DSizes(length_frames, num_channels)); + size_t sample_rate_truncated = lrintf(sample_rate); + if (sample_rate_truncated == 0) { + sample_rate_truncated = 1; + } + TF_RETURN_IF_ERROR(wav::EncodeAudioAsS16LEWav( + channels_by_frames.data(), sample_rate_truncated, num_channels, + length_frames, sa->mutable_encoded_audio_string())); + } + + return Enqueue(global_step, s); + } + + string DebugString() override { return "SummaryWriterImpl"; } + + private: + Status Enqueue(int64 global_step, const Summary& summary) { + mutex_lock ml(mu_); + queue_.emplace_back(global_step, summary, Env::Default()->NowMicros()); + if (queue_.size() >= max_queue_ || + Env::Default()->NowMicros() - last_flush_ > 1000 * flush_millis_) { + return InternalFlush(); + } + return Status::OK(); + } + + Status InternalFlush() EXCLUSIVE_LOCKS_REQUIRED(mu_) { + for (const EventInfo& e : queue_) { + Event event; + event.set_step(std::get<0>(e)); + *event.mutable_summary() = std::get<1>(e); + event.set_wall_time(std::get<2>(e)); + events_writer_->WriteEvent(event); + } + queue_.clear(); + if (!events_writer_->Flush()) { + return errors::InvalidArgument("Could not flush events file."); + } + last_flush_ = Env::Default()->NowMicros(); + return Status::OK(); + } + + const int max_queue_; + const int flush_millis_; + uint64 last_flush_; + using EventInfo = std::tuple; + mutex mu_; + std::vector queue_ GUARDED_BY(mu_); + // A pointer to allow deferred construction. + std::unique_ptr events_writer_ GUARDED_BY(mu_); + std::vector> registered_summaries_ + GUARDED_BY(mu_); +}; + +Status CreateSummaryWriter(int max_queue, int flush_millis, + const string& logdir, const string& filename_suffix, + Env* env, SummaryWriterInterface** result) { + SummaryWriterImpl* w = new SummaryWriterImpl(max_queue, flush_millis); + Status s = w->Initialize(logdir, filename_suffix, env); + if (!s.ok()) { + w->Unref(); + *result = nullptr; + return s; + } + *result = w; + return Status::OK(); +} + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/summary_interface.h b/tensorflow/core/kernels/summary_interface.h new file mode 100644 index 00000000000..ae2fbb70fe3 --- /dev/null +++ b/tensorflow/core/kernels/summary_interface.h @@ -0,0 +1,59 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_KERNELS_SUMMARY_INTERFACE_H_ +#define TENSORFLOW_CORE_KERNELS_SUMMARY_INTERFACE_H_ + + +#include "tensorflow/core/framework/resource_mgr.h" + +namespace tensorflow { + +// Main interface for the summary writer resource. +class SummaryWriterInterface : public ResourceBase { + public: + virtual ~SummaryWriterInterface() override {} + + // Flushes all unwritten messages in the queue. + virtual Status Flush() = 0; + + // These are called in the OpKernel::Compute methods for the summary ops. + virtual Status WriteTensor(int64 global_step, Tensor t, const string& tag, + const string& serialized_metadata) = 0; + + virtual Status WriteScalar(int64 global_step, Tensor t, + const string& tag) = 0; + + virtual Status WriteHistogram(int64 global_step, Tensor t, + const string& tag) = 0; + + virtual Status WriteImage(int64 global_step, Tensor t, const string& tag, + int max_images, Tensor bad_color) = 0; + + virtual Status WriteAudio(int64 global_step, Tensor t, const string& tag, + int max_outputs_, float sample_rate) = 0; +}; + +// Creates a SummaryWriterInterface instance which writes to a file. It will +// enqueue up to max_queue summaries, and flush at least every flush_millis +// milliseconds. The summaries will be written to the directory specified by +// logdir and with the filename suffixed by filename_suffix. The caller owns a +// reference to result if the returned status is ok. +Status CreateSummaryWriter(int max_queue, int flush_millis, + const string& logdir, const string& filename_suffix, + Env* env, SummaryWriterInterface** result); + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_KERNELS_SUMMARY_INTERFACE_H_ diff --git a/tensorflow/core/kernels/summary_interface_test.cc b/tensorflow/core/kernels/summary_interface_test.cc new file mode 100644 index 00000000000..66bde2cb063 --- /dev/null +++ b/tensorflow/core/kernels/summary_interface_test.cc @@ -0,0 +1,167 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "tensorflow/core/framework/summary.pb.h" +#include "tensorflow/core/kernels/summary_interface.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/refcount.h" +#include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/io/record_reader.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/util/event.pb.h" + +namespace tensorflow { +namespace { + +Status SummaryTestHelper( + const string& test_name, + std::function writer_fn, + std::function test_fn) { + SummaryWriterInterface* writer; + Env* env = Env::Default(); + TF_CHECK_OK( + CreateSummaryWriter(1, 1, testing::TmpDir(), test_name, env, &writer)); + core::ScopedUnref deleter(writer); + + TF_CHECK_OK(writer_fn(writer)); + TF_CHECK_OK(writer->Flush()); + + std::vector files; + TF_CHECK_OK(env->GetChildren(testing::TmpDir(), &files)); + bool found = false; + for (const string& f : files) { + if (StringPiece(f).contains(test_name)) { + if (found) { + return errors::Unknown("Found more than one file for ", test_name); + } + found = true; + std::unique_ptr read_file; + TF_CHECK_OK(env->NewRandomAccessFile(io::JoinPath(testing::TmpDir(), f), + &read_file)); + io::RecordReader reader(read_file.get(), io::RecordReaderOptions()); + string record; + uint64 offset = 0; + TF_CHECK_OK(reader.ReadRecord(&offset, + &record)); // The first event is irrelevant + TF_CHECK_OK(reader.ReadRecord(&offset, &record)); + Event e; + e.ParseFromString(record); + test_fn(e); + } + } + if (!found) { + return errors::Unknown("Found no file for ", test_name); + } + return Status::OK(); +} + +TEST(SummaryInterfaceTest, WriteTensor) { + TF_CHECK_OK(SummaryTestHelper("tensor_test", + [](SummaryWriterInterface* writer) { + Tensor one(DT_FLOAT, TensorShape({})); + one.scalar()() = 1.0; + TF_RETURN_IF_ERROR(writer->WriteTensor( + 2, one, "name", + SummaryMetadata().SerializeAsString())); + TF_RETURN_IF_ERROR(writer->Flush()); + return Status::OK(); + }, + [](const Event& e) { + EXPECT_EQ(e.step(), 2); + CHECK_EQ(e.summary().value_size(), 1); + EXPECT_EQ(e.summary().value(0).tag(), "name"); + })); +} + +TEST(SummaryInterfaceTest, WriteScalar) { + TF_CHECK_OK(SummaryTestHelper( + "scalar_test", + [](SummaryWriterInterface* writer) { + Tensor one(DT_FLOAT, TensorShape({})); + one.scalar()() = 1.0; + TF_RETURN_IF_ERROR(writer->WriteScalar(2, one, "name")); + TF_RETURN_IF_ERROR(writer->Flush()); + return Status::OK(); + }, + [](const Event& e) { + EXPECT_EQ(e.step(), 2); + CHECK_EQ(e.summary().value_size(), 1); + EXPECT_EQ(e.summary().value(0).tag(), "name"); + EXPECT_EQ(e.summary().value(0).simple_value(), 1.0); + })); +} + +TEST(SummaryInterfaceTest, WriteHistogram) { + TF_CHECK_OK(SummaryTestHelper("hist_test", + [](SummaryWriterInterface* writer) { + Tensor one(DT_FLOAT, TensorShape({})); + one.scalar()() = 1.0; + TF_RETURN_IF_ERROR( + writer->WriteHistogram(2, one, "name")); + TF_RETURN_IF_ERROR(writer->Flush()); + return Status::OK(); + }, + [](const Event& e) { + EXPECT_EQ(e.step(), 2); + CHECK_EQ(e.summary().value_size(), 1); + EXPECT_EQ(e.summary().value(0).tag(), "name"); + EXPECT_TRUE(e.summary().value(0).has_histo()); + })); +} + +TEST(SummaryInterfaceTest, WriteImage) { + TF_CHECK_OK(SummaryTestHelper( + "image_test", + [](SummaryWriterInterface* writer) { + Tensor one(DT_UINT8, TensorShape({1, 1, 1, 1})); + one.scalar()() = 1; + TF_RETURN_IF_ERROR(writer->WriteImage(2, one, "name", 1, Tensor())); + TF_RETURN_IF_ERROR(writer->Flush()); + return Status::OK(); + }, + [](const Event& e) { + EXPECT_EQ(e.step(), 2); + CHECK_EQ(e.summary().value_size(), 1); + EXPECT_EQ(e.summary().value(0).tag(), "name/image"); + CHECK(e.summary().value(0).has_image()); + EXPECT_EQ(e.summary().value(0).image().height(), 1); + EXPECT_EQ(e.summary().value(0).image().width(), 1); + EXPECT_EQ(e.summary().value(0).image().colorspace(), 1); + })); +} + +TEST(SummaryInterfaceTest, WriteAudio) { + TF_CHECK_OK(SummaryTestHelper( + "scalar_test", + [](SummaryWriterInterface* writer) { + Tensor one(DT_FLOAT, TensorShape({1, 1})); + one.scalar()() = 1.0; + TF_RETURN_IF_ERROR(writer->WriteAudio(2, one, "name", 1, 1)); + TF_RETURN_IF_ERROR(writer->Flush()); + return Status::OK(); + }, + [](const Event& e) { + EXPECT_EQ(e.step(), 2); + CHECK_EQ(e.summary().value_size(), 1); + EXPECT_EQ(e.summary().value(0).tag(), "name/audio"); + CHECK(e.summary().value(0).has_audio()); + })); +} + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/kernels/summary_kernels.cc b/tensorflow/core/kernels/summary_kernels.cc new file mode 100644 index 00000000000..d0eca0f1e7f --- /dev/null +++ b/tensorflow/core/kernels/summary_kernels.cc @@ -0,0 +1,226 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/resource_mgr.h" +#include "tensorflow/core/kernels/summary_interface.h" + +namespace tensorflow { + +REGISTER_KERNEL_BUILDER(Name("SummaryWriter").Device(DEVICE_CPU), + ResourceHandleOp); + +class CreateSummaryFileWriterOp : public OpKernel { + public: + explicit CreateSummaryFileWriterOp(OpKernelConstruction* ctx) + : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + const Tensor* tmp; + OP_REQUIRES_OK(ctx, ctx->input("logdir", &tmp)); + const string logdir = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("max_queue", &tmp)); + const int32 max_queue = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("flush_millis", &tmp)); + const int32 flush_millis = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("filename_suffix", &tmp)); + const string filename_suffix = tmp->scalar()(); + SummaryWriterInterface* s; + OP_REQUIRES_OK(ctx, CreateSummaryWriter(max_queue, flush_millis, logdir, + filename_suffix, ctx->env(), &s)); + Status status = CreateResource(ctx, HandleFromInput(ctx, 0), s); + if (!status.ok()) { + s->Unref(); + ctx->SetStatus(status); + return; + } + } +}; +REGISTER_KERNEL_BUILDER(Name("CreateSummaryFileWriter").Device(DEVICE_CPU), + CreateSummaryFileWriterOp); + +class FlushSummaryWriterOp : public OpKernel { + public: + explicit FlushSummaryWriterOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + SummaryWriterInterface* s; + OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s)); + core::ScopedUnref unref(s); + OP_REQUIRES_OK(ctx, s->Flush()); + } +}; +REGISTER_KERNEL_BUILDER(Name("FlushSummaryWriter").Device(DEVICE_CPU), + FlushSummaryWriterOp); + +class CloseSummaryWriterOp : public OpKernel { + public: + explicit CloseSummaryWriterOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + OP_REQUIRES_OK(ctx, DeleteResource( + ctx, HandleFromInput(ctx, 0))); + } +}; +REGISTER_KERNEL_BUILDER(Name("CloseSummaryWriter").Device(DEVICE_CPU), + CloseSummaryWriterOp); + +class WriteSummaryOp : public OpKernel { + public: + explicit WriteSummaryOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + SummaryWriterInterface* s; + OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s)); + core::ScopedUnref unref(s); + const Tensor* tmp; + OP_REQUIRES_OK(ctx, ctx->input("global_step", &tmp)); + const int64 global_step = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("tag", &tmp)); + const string& tag = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("summary_metadata", &tmp)); + const string& serialized_metadata = tmp->scalar()(); + + const Tensor* t; + OP_REQUIRES_OK(ctx, ctx->input("tensor", &t)); + + OP_REQUIRES_OK(ctx, + s->WriteTensor(global_step, *t, tag, serialized_metadata)); + } +}; +REGISTER_KERNEL_BUILDER(Name("WriteSummary").Device(DEVICE_CPU), + WriteSummaryOp); + +class WriteScalarSummaryOp : public OpKernel { + public: + explicit WriteScalarSummaryOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + SummaryWriterInterface* s; + OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s)); + core::ScopedUnref unref(s); + const Tensor* tmp; + OP_REQUIRES_OK(ctx, ctx->input("global_step", &tmp)); + const int64 global_step = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("tag", &tmp)); + const string& tag = tmp->scalar()(); + + const Tensor* t; + OP_REQUIRES_OK(ctx, ctx->input("value", &t)); + + OP_REQUIRES_OK(ctx, s->WriteScalar(global_step, *t, tag)); + } +}; +REGISTER_KERNEL_BUILDER(Name("WriteScalarSummary").Device(DEVICE_CPU), + WriteScalarSummaryOp); + +class WriteHistogramSummaryOp : public OpKernel { + public: + explicit WriteHistogramSummaryOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + SummaryWriterInterface* s; + OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s)); + core::ScopedUnref unref(s); + const Tensor* tmp; + OP_REQUIRES_OK(ctx, ctx->input("global_step", &tmp)); + const int64 global_step = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("tag", &tmp)); + const string& tag = tmp->scalar()(); + + const Tensor* t; + OP_REQUIRES_OK(ctx, ctx->input("values", &t)); + + OP_REQUIRES_OK(ctx, s->WriteHistogram(global_step, *t, tag)); + } +}; +REGISTER_KERNEL_BUILDER(Name("WriteHistogramSummary").Device(DEVICE_CPU), + WriteHistogramSummaryOp); + +class WriteImageSummaryOp : public OpKernel { + public: + explicit WriteImageSummaryOp(OpKernelConstruction* ctx) : OpKernel(ctx) { + int64 max_images_tmp; + OP_REQUIRES_OK(ctx, ctx->GetAttr("max_images", &max_images_tmp)); + OP_REQUIRES(ctx, max_images_tmp < (1LL << 31), + errors::InvalidArgument("max_images must be < 2^31")); + max_images_ = static_cast(max_images_tmp); + } + + void Compute(OpKernelContext* ctx) override { + SummaryWriterInterface* s; + OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s)); + core::ScopedUnref unref(s); + const Tensor* tmp; + OP_REQUIRES_OK(ctx, ctx->input("global_step", &tmp)); + const int64 global_step = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("tag", &tmp)); + const string& tag = tmp->scalar()(); + const Tensor* bad_color; + OP_REQUIRES_OK(ctx, ctx->input("bad_color", &bad_color)); + OP_REQUIRES( + ctx, TensorShapeUtils::IsVector(bad_color->shape()), + errors::InvalidArgument("bad_color must be a vector, got shape ", + bad_color->shape().DebugString())); + + const Tensor* t; + OP_REQUIRES_OK(ctx, ctx->input("tensor", &t)); + + OP_REQUIRES_OK( + ctx, s->WriteImage(global_step, *t, tag, max_images_, *bad_color)); + } + + private: + int32 max_images_; +}; +REGISTER_KERNEL_BUILDER(Name("WriteImageSummary").Device(DEVICE_CPU), + WriteImageSummaryOp); + +class WriteAudioSummaryOp : public OpKernel { + public: + explicit WriteAudioSummaryOp(OpKernelConstruction* ctx) : OpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("max_outputs", &max_outputs_)); + OP_REQUIRES(ctx, max_outputs_ > 0, + errors::InvalidArgument("max_outputs must be > 0")); + } + + void Compute(OpKernelContext* ctx) override { + SummaryWriterInterface* s; + OP_REQUIRES_OK(ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &s)); + core::ScopedUnref unref(s); + const Tensor* tmp; + OP_REQUIRES_OK(ctx, ctx->input("global_step", &tmp)); + const int64 global_step = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("tag", &tmp)); + const string& tag = tmp->scalar()(); + OP_REQUIRES_OK(ctx, ctx->input("sample_rate", &tmp)); + const float sample_rate = tmp->scalar()(); + + const Tensor* t; + OP_REQUIRES_OK(ctx, ctx->input("tensor", &t)); + + OP_REQUIRES_OK( + ctx, s->WriteAudio(global_step, *t, tag, max_outputs_, sample_rate)); + } + + private: + int max_outputs_; + bool has_sample_rate_attr_; + float sample_rate_attr_; +}; +REGISTER_KERNEL_BUILDER(Name("WriteAudioSummary").Device(DEVICE_CPU), + WriteAudioSummaryOp); + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/variable_ops.cc b/tensorflow/core/kernels/variable_ops.cc index b14e5551039..36b8ff09d73 100644 --- a/tensorflow/core/kernels/variable_ops.cc +++ b/tensorflow/core/kernels/variable_ops.cc @@ -83,7 +83,6 @@ TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SYCL_KERNEL); IsVariableInitializedOp); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS); -TF_CALL_bool(REGISTER_GPU_KERNELS) #undef REGISTER_GPU_KERNELS #endif // GOOGLE_CUDA diff --git a/tensorflow/core/lib/jpeg/jpeg_mem.cc b/tensorflow/core/lib/jpeg/jpeg_mem.cc index 258793aa1e6..3c7e5ca696d 100644 --- a/tensorflow/core/lib/jpeg/jpeg_mem.cc +++ b/tensorflow/core/lib/jpeg/jpeg_mem.cc @@ -70,13 +70,24 @@ class FewerArgsForCompiler { int stride_; }; +// Check whether the crop window is valid, assuming crop is true. +bool IsCropWindowValid(const UncompressFlags& flags, int input_image_width, + int input_image_height) { + // Crop window is valid only if it is non zero and all the window region is + // within the original image. + return flags.crop_width > 0 && flags.crop_height > 0 && flags.crop_x >= 0 && + flags.crop_y >= 0 && + flags.crop_y + flags.crop_height <= input_image_height && + flags.crop_x + flags.crop_width <= input_image_width; +} + uint8* UncompressLow(const void* srcdata, FewerArgsForCompiler* argball) { // unpack the argball const int datasize = argball->datasize_; const auto& flags = argball->flags_; const int ratio = flags.ratio; int components = flags.components; - int stride = flags.stride; // may be 0 + int stride = flags.stride; // may be 0 int64* const nwarn = argball->pnwarn_; // may be NULL // Can't decode if the ratio is not recognized by libjpeg @@ -159,8 +170,43 @@ uint8* UncompressLow(const void* srcdata, FewerArgsForCompiler* argball) { return nullptr; } + JDIMENSION target_output_width = cinfo.output_width; + JDIMENSION target_output_height = cinfo.output_height; + JDIMENSION skipped_scanlines = 0; +#if !defined(WIN32) + if (flags.crop) { + // Update target output height and width based on crop window. + target_output_height = flags.crop_height; + target_output_width = flags.crop_width; + + // So far, cinfo holds the original input image information. + if (!IsCropWindowValid(flags, cinfo.output_width, cinfo.output_height)) { + LOG(ERROR) << "Invalid crop window: x=" << flags.crop_x + << ", y=" << flags.crop_y << ", w=" << target_output_width + << ", h=" << target_output_height + << " for image_width: " << cinfo.output_width + << " and image_height: " << cinfo.output_height; + jpeg_destroy_decompress(&cinfo); + return nullptr; + } + + // Update cinfo.output_width. It is tricky that cinfo.output_width must + // fall on an Minimum Coded Unit (MCU) boundary; if it doesn't, then it will + // be moved left to the nearest MCU boundary, and width will be increased + // accordingly. Therefore, the final cinfo.crop_width might differ from the + // given flags.crop_width. Please see libjpeg library for details. + JDIMENSION crop_width = flags.crop_width; + JDIMENSION crop_x = flags.crop_x; + jpeg_crop_scanline(&cinfo, &crop_x, &crop_width); + + // Update cinfo.output_scanline. + skipped_scanlines = jpeg_skip_scanlines(&cinfo, flags.crop_y); + CHECK_EQ(skipped_scanlines, flags.crop_y); + } +#endif + // check for compatible stride - const int min_stride = cinfo.output_width * components * sizeof(JSAMPLE); + const int min_stride = target_output_width * components * sizeof(JSAMPLE); if (stride == 0) { stride = min_stride; } else if (stride < min_stride) { @@ -170,47 +216,88 @@ uint8* UncompressLow(const void* srcdata, FewerArgsForCompiler* argball) { } // Remember stride and height for use in Uncompress - argball->height_ = cinfo.output_height; + argball->height_ = target_output_height; argball->stride_ = stride; - uint8* const dstdata = argball->allocate_output_( - cinfo.output_width, cinfo.output_height, components); +#if defined(WIN32) + uint8* dstdata = nullptr; + if (flags.crop) { + dstdata = new JSAMPLE[stride * target_output_height]; + } else { + dstdata = argball->allocate_output_(target_output_width, + target_output_height, components); + } +#else + uint8* dstdata = argball->allocate_output_(target_output_width, + target_output_height, components); +#endif if (dstdata == nullptr) { jpeg_destroy_decompress(&cinfo); return nullptr; } JSAMPLE* output_line = static_cast(dstdata); - // Temporary buffer used for CMYK -> RGB conversion. + // jpeg_read_scanlines requires the buffers to be allocated based on + // cinfo.output_width, but the target image width might be different if crop + // is enabled and crop_width is not MCU aligned. In this case, we need to + // realign the scanline output to achieve the exact cropping. Notably, only + // cinfo.output_width needs to fall on MCU boundary, while cinfo.output_height + // has no such constraint. + const bool need_realign_cropped_scanline = + (target_output_width != cinfo.output_width); const bool use_cmyk = (cinfo.out_color_space == JCS_CMYK); - tempdata = use_cmyk ? new JSAMPLE[cinfo.output_width * 4] : nullptr; + + if (use_cmyk) { + // Temporary buffer used for CMYK -> RGB conversion. + tempdata = new JSAMPLE[cinfo.output_width * 4]; + } else if (need_realign_cropped_scanline) { + // Temporary buffer used for MCU-aligned scanline data. + tempdata = new JSAMPLE[cinfo.output_width * components]; + } // If there is an error reading a line, this aborts the reading. // Save the fraction of the image that has been read. - argball->height_read_ = cinfo.output_height; - while (cinfo.output_scanline < cinfo.output_height) { + argball->height_read_ = target_output_height; + + // These variables are just to avoid repeated computation in the loop. + const int max_scanlines_to_read = skipped_scanlines + target_output_height; + const int mcu_align_offset = + (cinfo.output_width - target_output_width) * (use_cmyk ? 4 : components); + while (cinfo.output_scanline < max_scanlines_to_read) { int num_lines_read = 0; - if (cinfo.out_color_space == JCS_CMYK) { + if (use_cmyk) { num_lines_read = jpeg_read_scanlines(&cinfo, &tempdata, 1); - // Convert CMYK to RGB - for (size_t i = 0; i < cinfo.output_width; ++i) { - int c = tempdata[4 * i + 0]; - int m = tempdata[4 * i + 1]; - int y = tempdata[4 * i + 2]; - int k = tempdata[4 * i + 3]; - int r, g, b; - if (cinfo.saw_Adobe_marker) { - r = (k * c) / 255; - g = (k * m) / 255; - b = (k * y) / 255; - } else { - r = (255 - k) * (255 - c) / 255; - g = (255 - k) * (255 - m) / 255; - b = (255 - k) * (255 - y) / 255; + if (num_lines_read > 0) { + // Convert CMYK to RGB if scanline read succeeded. + for (size_t i = 0; i < target_output_width; ++i) { + int offset = 4 * i; + if (need_realign_cropped_scanline) { + // Align the offset for MCU boundary. + offset += mcu_align_offset; + } + const int c = tempdata[offset + 0]; + const int m = tempdata[offset + 1]; + const int y = tempdata[offset + 2]; + const int k = tempdata[offset + 3]; + int r, g, b; + if (cinfo.saw_Adobe_marker) { + r = (k * c) / 255; + g = (k * m) / 255; + b = (k * y) / 255; + } else { + r = (255 - k) * (255 - c) / 255; + g = (255 - k) * (255 - m) / 255; + b = (255 - k) * (255 - y) / 255; + } + output_line[3 * i + 0] = r; + output_line[3 * i + 1] = g; + output_line[3 * i + 2] = b; } - output_line[3 * i + 0] = r; - output_line[3 * i + 1] = g; - output_line[3 * i + 2] = b; + } + } else if (need_realign_cropped_scanline) { + num_lines_read = jpeg_read_scanlines(&cinfo, &tempdata, 1); + if (num_lines_read > 0) { + memcpy(output_line, tempdata + mcu_align_offset, min_stride); } } else { num_lines_read = jpeg_read_scanlines(&cinfo, &output_line, 1); @@ -218,12 +305,13 @@ uint8* UncompressLow(const void* srcdata, FewerArgsForCompiler* argball) { // Handle error cases if (num_lines_read == 0) { LOG(ERROR) << "Premature end of JPEG data. Stopped at line " - << cinfo.output_scanline << "/" << cinfo.output_height; + << cinfo.output_scanline - skipped_scanlines << "/" + << target_output_height; if (!flags.try_recover_truncated_jpeg) { - argball->height_read_ = cinfo.output_scanline; + argball->height_read_ = cinfo.output_scanline - skipped_scanlines; error = JPEGERRORS_UNEXPECTED_END_OF_DATA; } else { - for (size_t line = cinfo.output_scanline; line < cinfo.output_height; + for (size_t line = cinfo.output_scanline; line < max_scanlines_to_read; ++line) { if (line == 0) { // If even the first line is missing, fill with black color @@ -235,9 +323,9 @@ uint8* UncompressLow(const void* srcdata, FewerArgsForCompiler* argball) { output_line += stride; } argball->height_read_ = - cinfo.output_height; // consider all lines as read + target_output_height; // consider all lines as read // prevent error-on-exit in libjpeg: - cinfo.output_scanline = cinfo.output_height; + cinfo.output_scanline = max_scanlines_to_read; } break; } @@ -248,23 +336,33 @@ uint8* UncompressLow(const void* srcdata, FewerArgsForCompiler* argball) { delete[] tempdata; tempdata = nullptr; +#if !defined(WIN32) + if (flags.crop && cinfo.output_scanline < cinfo.output_height) { + // Skip the rest of scanlines, required by jpeg_destroy_decompress. + jpeg_skip_scanlines(&cinfo, + cinfo.output_height - flags.crop_y - flags.crop_height); + // After this, cinfo.output_height must be equal to cinfo.output_height; + // otherwise, jpeg_destroy_decompress would fail. + } +#endif + // Convert the RGB data to RGBA, with alpha set to 0xFF to indicate // opacity. // RGBRGBRGB... --> RGBARGBARGBA... if (components == 4) { // Start on the last line. JSAMPLE* scanlineptr = static_cast( - dstdata + static_cast(cinfo.output_height - 1) * stride); + dstdata + static_cast(target_output_height - 1) * stride); const JSAMPLE kOpaque = -1; // All ones appropriate for JSAMPLE. - const int right_rgb = (cinfo.output_width - 1) * 3; - const int right_rgba = (cinfo.output_width - 1) * 4; + const int right_rgb = (target_output_width - 1) * 3; + const int right_rgba = (target_output_width - 1) * 4; - for (int y = cinfo.output_height; y-- > 0;) { + for (int y = target_output_height; y-- > 0;) { // We do all the transformations in place, going backwards for each row. const JSAMPLE* rgb_pixel = scanlineptr + right_rgb; JSAMPLE* rgba_pixel = scanlineptr + right_rgba; scanlineptr -= stride; - for (int x = cinfo.output_width; x-- > 0; + for (int x = target_output_width; x-- > 0; rgba_pixel -= 4, rgb_pixel -= 3) { // We copy the 3 bytes at rgb_pixel into the 4 bytes at rgba_pixel // The "a" channel is set to be opaque. @@ -319,8 +417,61 @@ uint8* UncompressLow(const void* srcdata, FewerArgsForCompiler* argball) { LOG(ERROR) << "Unhandled case " << error; break; } - jpeg_destroy_decompress(&cinfo); +#if defined(WIN32) + // TODO(tanmingxing): delete all these code after migrating to libjpeg_turbo + // for Windows. + if (flags.crop) { + // Update target output height and width based on crop window. + target_output_height = flags.crop_height; + target_output_width = flags.crop_width; + + // cinfo holds the original input image information. + if (!IsCropWindowValid(flags, cinfo.output_width, cinfo.output_height)) { + LOG(ERROR) << "Invalid crop window: x=" << flags.crop_x + << ", y=" << flags.crop_y << ", w=" << target_output_width + << ", h=" << target_output_height + << " for image_width: " << cinfo.output_width + << " and image_height: " << cinfo.output_height; + delete[] dstdata; + jpeg_destroy_decompress(&cinfo); + return nullptr; + } + + const uint8* full_image = dstdata; + dstdata = argball->allocate_output_(target_output_width, + target_output_height, components); + if (dstdata == nullptr) { + delete[] full_image; + jpeg_destroy_decompress(&cinfo); + return nullptr; + } + + const int full_image_stride = stride; + // Update stride and hight for crop window. + const int min_stride = target_output_width * components * sizeof(JSAMPLE); + if (flags.stride == 0) { + stride = min_stride; + } + argball->height_ = target_output_height; + argball->stride_ = stride; + + if (argball->height_read_ > target_output_height) { + argball->height_read_ = target_output_height; + } + const int crop_offset = flags.crop_x * components * sizeof(JSAMPLE); + const uint8* full_image_ptr = full_image + flags.crop_y * full_image_stride; + uint8* crop_image_ptr = dstdata; + for (int i = 0; i < argball->height_read_; i++) { + memcpy(crop_image_ptr, full_image_ptr + crop_offset, min_stride); + crop_image_ptr += stride; + full_image_ptr += full_image_stride; + } + delete[] full_image; + } +#endif + + jpeg_destroy_decompress(&cinfo); return dstdata; } diff --git a/tensorflow/core/lib/jpeg/jpeg_mem.h b/tensorflow/core/lib/jpeg/jpeg_mem.h index ac34f29f221..59342d28c0f 100644 --- a/tensorflow/core/lib/jpeg/jpeg_mem.h +++ b/tensorflow/core/lib/jpeg/jpeg_mem.h @@ -61,6 +61,17 @@ struct UncompressFlags { // // Setting this has a quality/speed trade-off implication. J_DCT_METHOD dct_method = JDCT_DEFAULT; + + // Settings of crop window before decompression. + bool crop = false; + // Vertical coordinate of the top-left corner of the result in the input. + int crop_x = 0; + // Horizontal coordinate of the top-left corner of the result in the input. + int crop_y = 0; + // Width of the output image. + int crop_width = 0; + // Height of the output image. + int crop_height = 0; }; // Uncompress some raw JPEG data given by the pointer srcdata and the length diff --git a/tensorflow/core/lib/jpeg/jpeg_mem_unittest.cc b/tensorflow/core/lib/jpeg/jpeg_mem_unittest.cc index cc8646750e1..15266af1dbd 100644 --- a/tensorflow/core/lib/jpeg/jpeg_mem_unittest.cc +++ b/tensorflow/core/lib/jpeg/jpeg_mem_unittest.cc @@ -57,7 +57,7 @@ void ReadFileToStringOrDie(Env* env, const string& filename, string* output) { void TestJPEG(Env* env, const string& jpegfile) { // Read the data from the jpeg file into memory string jpeg; - ReadFileToStringOrDie(Env::Default(), jpegfile, &jpeg); + ReadFileToStringOrDie(env, jpegfile, &jpeg); const int fsize = jpeg.size(); const uint8* const temp = bit_cast(jpeg.data()); @@ -95,6 +95,194 @@ TEST(JpegMemTest, Jpeg) { TestJPEG(env, data_path + "jpeg_merge_test1_cmyk.jpg"); } +void TestCropAndDecodeJpeg(Env* env, const string& jpegfile, + const UncompressFlags& default_flags) { + // Read the data from the jpeg file into memory + string jpeg; + ReadFileToStringOrDie(env, jpegfile, &jpeg); + const int fsize = jpeg.size(); + auto temp = bit_cast(jpeg.data()); + + // Decode the whole image. + std::unique_ptr imgdata1; + int w1, h1, c1; + { + UncompressFlags flags = default_flags; + if (flags.stride == 0) { + imgdata1.reset(Uncompress(temp, fsize, flags, &w1, &h1, &c1, nullptr)); + } else { + // If stride is not zero, the default allocator would fail because it + // allocate w*h*c bytes, but the actual required bytes should be stride*h. + // Therefore, we provide a specialized allocator here. + uint8* buffer = nullptr; + imgdata1.reset(Uncompress(temp, fsize, flags, nullptr, + [&](int width, int height, int components) { + w1 = width; + h1 = height; + c1 = components; + buffer = new uint8[flags.stride * height]; + return buffer; + })); + } + ASSERT_NE(imgdata1, nullptr); + } + + auto check_crop_and_decode_func = [&](int crop_x, int crop_y, int crop_width, + int crop_height) { + std::unique_ptr imgdata2; + int w, h, c; + UncompressFlags flags = default_flags; + flags.crop = true; + flags.crop_x = crop_x; + flags.crop_y = crop_y; + flags.crop_width = crop_width; + flags.crop_height = crop_height; + if (flags.stride == 0) { + imgdata2.reset(Uncompress(temp, fsize, flags, &w, &h, &c, nullptr)); + } else { + uint8* buffer = nullptr; + imgdata2.reset(Uncompress(temp, fsize, flags, nullptr, + [&](int width, int height, int components) { + w = width; + h = height; + c = components; + buffer = new uint8[flags.stride * height]; + return buffer; + })); + } + ASSERT_NE(imgdata2, nullptr); + + ASSERT_EQ(w, crop_width); + ASSERT_EQ(h, crop_height); + ASSERT_EQ(c, c1); + + const int stride1 = (flags.stride != 0) ? flags.stride : w1 * c; + const int stride2 = (flags.stride != 0) ? flags.stride : w * c; + for (int i = 0; i < crop_height; i++) { + const uint8* p1 = &imgdata1[(i + crop_y) * stride1 + crop_x * c]; + const uint8* p2 = &imgdata2[i * stride2]; + + for (int j = 0; j < c * w; j++) { + ASSERT_EQ(p1[j], p2[j]) + << "p1 != p2 in [" << i << "][" << j / 3 << "][" << j % 3 << "]"; + } + } + }; + + // Check different crop windows. + check_crop_and_decode_func(0, 0, 5, 5); + check_crop_and_decode_func(0, 0, w1, 5); + check_crop_and_decode_func(0, 0, 5, h1); + check_crop_and_decode_func(0, 0, w1, h1); + check_crop_and_decode_func(w1 - 5, h1 - 6, 5, 6); + check_crop_and_decode_func(5, 6, 10, 15); +} + +TEST(JpegMemTest, CropAndDecodeJpeg) { + Env* env = Env::Default(); + const string data_path = kTestData; + UncompressFlags flags; + + // Test basic flags for jpeg and cmyk jpeg. + TestCropAndDecodeJpeg(env, data_path + "jpeg_merge_test1.jpg", flags); + TestCropAndDecodeJpeg(env, data_path + "jpeg_merge_test1_cmyk.jpg", flags); +} + +TEST(JpegMemTest, CropAndDecodeJpegWithRatio) { + Env* env = Env::Default(); + const string data_path = kTestData; + UncompressFlags flags; + for (int ratio : {1, 2, 4, 8}) { + flags.ratio = ratio; + TestCropAndDecodeJpeg(env, data_path + "jpeg_merge_test1.jpg", flags); + } +} + +TEST(JpegMemTest, CropAndDecodeJpegWithComponents) { + Env* env = Env::Default(); + const string data_path = kTestData; + UncompressFlags flags; + for (const int components : {0, 1, 3}) { + flags.components = components; + TestCropAndDecodeJpeg(env, data_path + "jpeg_merge_test1.jpg", flags); + } +} + +TEST(JpegMemTest, CropAndDecodeJpegWithUpScaling) { + Env* env = Env::Default(); + const string data_path = kTestData; + UncompressFlags flags; + flags.fancy_upscaling = true; + TestCropAndDecodeJpeg(env, data_path + "jpeg_merge_test1.jpg", flags); +} + +TEST(JpegMemTest, CropAndDecodeJpegWithStride) { + Env* env = Env::Default(); + const string data_path = kTestData; + + // Read the data from the jpeg file into memory + string jpeg; + ReadFileToStringOrDie(env, data_path + "jpeg_merge_test1.jpg", &jpeg); + const int fsize = jpeg.size(); + auto temp = bit_cast(jpeg.data()); + + int w, h, c; + ASSERT_TRUE(GetImageInfo(temp, fsize, &w, &h, &c)); + + // stride must be either 0 or > w*c; otherwise, uncompress would fail. + UncompressFlags flags; + flags.stride = w * c; + TestCropAndDecodeJpeg(env, data_path + "jpeg_merge_test1.jpg", flags); + flags.stride = w * c * 3; + TestCropAndDecodeJpeg(env, data_path + "jpeg_merge_test1.jpg", flags); + flags.stride = w * c + 100; + TestCropAndDecodeJpeg(env, data_path + "jpeg_merge_test1.jpg", flags); +} + +void CheckInvalidCropWindowFailed(const uint8* const temp, int fsize, int x, + int y, int w, int h) { + std::unique_ptr imgdata; + int ww, hh, cc; + UncompressFlags flags; + flags.components = 3; + flags.crop = true; + flags.crop_x = x; + flags.crop_y = y; + flags.crop_width = w; + flags.crop_height = h; + imgdata.reset(Uncompress(temp, fsize, flags, &ww, &hh, &cc, nullptr)); + CHECK(imgdata == nullptr); +} + +TEST(JpegMemTest, CropAndDecodeJpegWithInvalidCropWindow) { + Env* env = Env::Default(); + const string data_path = kTestData; + + // Read the data from the jpeg file into memory + string jpeg; + ReadFileToStringOrDie(env, data_path + "jpeg_merge_test1.jpg", &jpeg); + const int fsize = jpeg.size(); + auto temp = bit_cast(jpeg.data()); + + int w, h, c; + ASSERT_TRUE(GetImageInfo(temp, fsize, &w, &h, &c)); + + // Width and height for the crop window must be non zero. + CheckInvalidCropWindowFailed(temp, fsize, 11, 11, /*w=*/0, 11); + CheckInvalidCropWindowFailed(temp, fsize, 11, 11, 11, /*h=*/0); + + // Crop window must be non negative. + CheckInvalidCropWindowFailed(temp, fsize, /*x=*/-1, 11, 11, 11); + CheckInvalidCropWindowFailed(temp, fsize, 11, /*y=*/-1, 11, 11); + CheckInvalidCropWindowFailed(temp, fsize, 11, 11, /*w=*/-1, 11); + CheckInvalidCropWindowFailed(temp, fsize, 11, 11, 11, /*h=*/-1); + + // Invalid crop window width: x + crop_width = w + 1 > w + CheckInvalidCropWindowFailed(temp, fsize, /*x=*/w - 10, 11, 11, 11); + // Invalid crop window height: y + crop_height= h + 1 > h + CheckInvalidCropWindowFailed(temp, fsize, 11, /*y=*/h - 10, 11, 11); +} + TEST(JpegMemTest, Jpeg2) { // create known data, for size in_w x in_h const int in_w = 256; diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index ad290d123e5..22d4a0056f8 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -10467,8 +10467,8 @@ op { type_list_attr: "Treduce_func_other_arguments" } input_arg { - name: "window_size" - type: DT_INT64 + name: "window_size_func_other_arguments" + type_list_attr: "Twindow_size_func_other_arguments" } output_arg { name: "handle" @@ -10482,6 +10482,10 @@ op { name: "reduce_func" type: "func" } + attr { + name: "window_size_func" + type: "func" + } attr { name: "Tkey_func_other_arguments" type: "list(type)" @@ -10492,6 +10496,11 @@ op { type: "list(type)" has_minimum: true } + attr { + name: "Twindow_size_func_other_arguments" + type: "list(type)" + has_minimum: true + } attr { name: "output_types" type: "list(type)" diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc index f6bd5768d7c..37d9a737e29 100644 --- a/tensorflow/core/ops/dataset_ops.cc +++ b/tensorflow/core/ops/dataset_ops.cc @@ -237,12 +237,15 @@ REGISTER_OP("GroupByWindowDataset") .Input("input_dataset: resource") .Input("key_func_other_arguments: Tkey_func_other_arguments") .Input("reduce_func_other_arguments: Treduce_func_other_arguments") - .Input("window_size: int64") + .Input( + "window_size_func_other_arguments: Twindow_size_func_other_arguments") .Output("handle: resource") .Attr("key_func: func") .Attr("reduce_func: func") + .Attr("window_size_func: func") .Attr("Tkey_func_other_arguments: list(type) >= 0") .Attr("Treduce_func_other_arguments: list(type) >= 0") + .Attr("Twindow_size_func_other_arguments: list(type) >= 0") .Attr("output_types: list(type) >= 1") .Attr("output_shapes: list(shape) >= 1") .SetShapeFn(shape_inference::ScalarShape) diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index 0a96258dd1f..1ab1f1a7366 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -1945,7 +1945,7 @@ Computes softsign gradients for a softsign operation. gradients: The backpropagated gradients to the corresponding softsign operation. features: The features passed as input to the corresponding softsign operation. -backprops: The gradients: `gradients / (1 + abs(-features)) ** 2`. +backprops: The gradients: `gradients / (1 + abs(features)) ** 2`. )doc"); // -------------------------------------------------------------------------- diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 13356e1d8a6..3a28ce3767d 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -9611,8 +9611,8 @@ op { type_list_attr: "Treduce_func_other_arguments" } input_arg { - name: "window_size" - type: DT_INT64 + name: "window_size_func_other_arguments" + type_list_attr: "Twindow_size_func_other_arguments" } output_arg { name: "handle" @@ -9627,6 +9627,10 @@ op { name: "reduce_func" type: "func" } + attr { + name: "window_size_func" + type: "func" + } attr { name: "Tkey_func_other_arguments" type: "list(type)" @@ -9637,6 +9641,11 @@ op { type: "list(type)" has_minimum: true } + attr { + name: "Twindow_size_func_other_arguments" + type: "list(type)" + has_minimum: true + } attr { name: "output_types" type: "list(type)" @@ -24677,7 +24686,7 @@ op { } output_arg { name: "backprops" - description: "The gradients: `gradients / (1 + abs(-features)) ** 2`." + description: "The gradients: `gradients / (1 + abs(features)) ** 2`." type_attr: "T" } attr { diff --git a/tensorflow/core/ops/summary_ops.cc b/tensorflow/core/ops/summary_ops.cc new file mode 100644 index 00000000000..f778b487972 --- /dev/null +++ b/tensorflow/core/ops/summary_ops.cc @@ -0,0 +1,218 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); + +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { + +REGISTER_OP("SummaryWriter") + .Output("writer: resource") + .Attr("shared_name: string = ''") + .Attr("container: string = ''") + .SetShapeFn(shape_inference::ScalarShape) + .Doc(R"doc( +Returns a handle to be used to access a summary writer. + +The summary writer is an in-graph resource which can be used by ops to write +summaries to event files. + +writer: the summary writer resource. Scalar handle. +)doc"); + +REGISTER_OP("CreateSummaryFileWriter") + .Input("writer: resource") + .Input("logdir: string") + .Input("max_queue: int32") + .Input("flush_millis: int32") + .Input("filename_suffix: string") + .Doc(R"doc( +Creates a summary file writer accessible by the given resource handle. + +writer: A handle to the summary writer resource +logdir: Directory where the event file will be written. +max_queue: Size of the queue of pending events and summaries. +flush_millis: How often, in milliseconds, to flush the pending events and + summaries to disk. +filename_suffix: Every event file's name is suffixed with this suffix. +)doc"); + +REGISTER_OP("FlushSummaryWriter") + .Input("writer: resource") + .SetShapeFn(shape_inference::NoOutputs) + .Doc(R"( +Flushes the writer's unwritten events. + +writer: A handle to the summary writer resource. +)"); + +REGISTER_OP("CloseSummaryWriter") + .Input("writer: resource") + .SetShapeFn(shape_inference::NoOutputs) + .Doc(R"( +Flushes and closes the summary writer. + +Also removes it from the resource manager. To reopen, use another +CreateSummaryFileWriter op. + +writer: A handle to the summary writer resource. +)"); + +REGISTER_OP("WriteSummary") + .Input("writer: resource") + .Input("global_step: int64") + .Input("tensor: T") + .Input("tag: string") + .Input("summary_metadata: string") + .Attr("T: type") + .SetShapeFn(shape_inference::NoOutputs) + .Doc(R"doc( +Outputs a `Summary` protocol buffer with a tensor. + +writer: A handle to a summary writer. +global_step: The step to write the summary for. +tensor: A tensor to serialize. +tag: The summary's tag. +summary_metadata: Serialized SummaryMetadata protocol buffer containing + plugin-related metadata for this summary. +)doc"); + +REGISTER_OP("WriteScalarSummary") + .Input("writer: resource") + .Input("global_step: int64") + .Input("tag: string") + .Input("value: T") + .Attr("T: realnumbertype") + .SetShapeFn(shape_inference::NoOutputs) + .Doc(R"doc( +Writes a `Summary` protocol buffer with scalar values. + +The input `tag` and `value` must have the scalars. + +writer: A handle to a summary writer. +global_step: The step to write the summary for. +tag: Tag for the summary. +value: Value for the summary. +)doc"); + +REGISTER_OP("WriteHistogramSummary") + .Input("writer: resource") + .Input("global_step: int64") + .Input("tag: string") + .Input("values: T") + .Attr("T: realnumbertype = DT_FLOAT") + .SetShapeFn(shape_inference::NoOutputs) + .Doc(R"doc( +Writes a `Summary` protocol buffer with a histogram. + +The generated +[`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto) +has one summary value containing a histogram for `values`. + +This op reports an `InvalidArgument` error if any value is not finite. + +writer: A handle to a summary writer. +global_step: The step to write the summary for. +tag: Scalar. Tag to use for the `Summary.Value`. +values: Any shape. Values to use to build the histogram. +)doc"); + +REGISTER_OP("WriteImageSummary") + .Input("writer: resource") + .Input("global_step: int64") + .Input("tag: string") + .Input("tensor: T") + .Input("bad_color: uint8") + .Attr("max_images: int >= 1 = 3") + .Attr("T: {uint8, float, half} = DT_FLOAT") + .SetShapeFn(shape_inference::NoOutputs) + .Doc(R"doc( +Writes a `Summary` protocol buffer with images. + +The summary has up to `max_images` summary values containing images. The +images are built from `tensor` which must be 4-D with shape `[batch_size, +height, width, channels]` and where `channels` can be: + +* 1: `tensor` is interpreted as Grayscale. +* 3: `tensor` is interpreted as RGB. +* 4: `tensor` is interpreted as RGBA. + +The images have the same number of channels as the input tensor. For float +input, the values are normalized one image at a time to fit in the range +`[0, 255]`. `uint8` values are unchanged. The op uses two different +normalization algorithms: + +* If the input values are all positive, they are rescaled so the largest one + is 255. + +* If any input value is negative, the values are shifted so input value 0.0 + is at 127. They are then rescaled so that either the smallest value is 0, + or the largest one is 255. + +The `tag` argument is a scalar `Tensor` of type `string`. It is used to +build the `tag` of the summary values: + +* If `max_images` is 1, the summary value tag is '*tag*/image'. +* If `max_images` is greater than 1, the summary value tags are + generated sequentially as '*tag*/image/0', '*tag*/image/1', etc. + +The `bad_color` argument is the color to use in the generated images for +non-finite input values. It is a `unit8` 1-D tensor of length `channels`. +Each element must be in the range `[0, 255]` (It represents the value of a +pixel in the output image). Non-finite values in the input tensor are +replaced by this tensor in the output image. The default value is the color +red. + +writer: A handle to a summary writer. +global_step: The step to write the summary for. +tag: Scalar. Used to build the `tag` attribute of the summary values. +tensor: 4-D of shape `[batch_size, height, width, channels]` where + `channels` is 1, 3, or 4. +max_images: Max number of batch elements to generate images for. +bad_color: Color to use for pixels with non-finite values. +)doc"); + +REGISTER_OP("WriteAudioSummary") + .Input("writer: resource") + .Input("global_step: int64") + .Input("tag: string") + .Input("tensor: float") + .Input("sample_rate: float") + .Attr("max_outputs: int >= 1 = 3") + .SetShapeFn(shape_inference::NoOutputs) + .Doc(R"doc( +Writes a `Summary` protocol buffer with audio. + +The summary has up to `max_outputs` summary values containing audio. The +audio is built from `tensor` which must be 3-D with shape `[batch_size, +frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are +assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`. + +The `tag` argument is a scalar `Tensor` of type `string`. It is used to +build the `tag` of the summary values: + +* If `max_outputs` is 1, the summary value tag is '*tag*/audio'. +* If `max_outputs` is greater than 1, the summary value tags are + generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc. + +writer: A handle to a summary writer. +global_step: The step to write the summary for. +tag: Scalar. Used to build the `tag` attribute of the summary values. +tensor: 2-D of shape `[batch_size, frames]`. +sample_rate: The sample rate of the signal in hertz. +max_outputs: Max number of batch elements to generate audio for. +)doc"); + +} // namespace tensorflow diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index 126558cac38..e1ad66c387a 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -75,6 +75,9 @@ def tf_proto_library_py(name, srcs=[], protodeps=[], deps=[], visibility=[], def tf_jspb_proto_library(**kwargs): pass +def tf_nano_proto_library(**kwargs): + pass + def tf_proto_library(name, srcs = [], has_services = None, protodeps = [], visibility = [], testonly = 0, cc_libs = [], diff --git a/tensorflow/core/profiler/README.md b/tensorflow/core/profiler/README.md index 5c50a86c88f..f0d4dafd3ea 100644 --- a/tensorflow/core/profiler/README.md +++ b/tensorflow/core/profiler/README.md @@ -56,7 +56,7 @@ with tf.contrib.tfprof.ProfileContext() as pctx: ```shell # Profiling from Python API is not interactive. -# Dump the profiles to files and profile with interactive command line. +# Dump the profiles to files and profile with interactive command line or web UI. with tf.contrib.tfprof.ProfileContext() as pctx: pctx.add_auto_profile_dump('/tmp/profiles', [100]) train_loop() @@ -66,7 +66,15 @@ bazel-bin/tensorflow/core/profiler/profiler \ --run_meta_path=/tmp/profiles/run_meta \ --op_log_path=/tmp/profiles/tfprof_log \ tfprof> op -select micros,bytes,occurrence -order_by micros + + +# To be open sourced... +bazel-bin/third_party/tensorflow/python/profiler/profiler_ui \ + --graph_path=/tmp/profiles/graph.pbtxt \ + --run_meta_path=/tmp/profiles/run_meta \ + --op_log_path=/tmp/profiles/tfprof_log \ ``` +![ProfilerUI](g3doc/profiler_ui.jpg) Detail Tutorials @@ -239,5 +247,6 @@ bug fix. `OpLogProto` is a good plus if it is used. #### Teams * Xin Pan (xpan@google.com, github: panyx0718) +* Chris Antaki * Yao Zhang * Jon Shlens diff --git a/tensorflow/core/profiler/g3doc/profiler_ui.jpg b/tensorflow/core/profiler/g3doc/profiler_ui.jpg new file mode 100644 index 00000000000..36aa94502a8 Binary files /dev/null and b/tensorflow/core/profiler/g3doc/profiler_ui.jpg differ diff --git a/tensorflow/core/util/permutation_input_iterator.h b/tensorflow/core/util/permutation_input_iterator.h new file mode 100644 index 00000000000..f6375b25157 --- /dev/null +++ b/tensorflow/core/util/permutation_input_iterator.h @@ -0,0 +1,134 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_UTIL_PERMUTATION_INPUT_ITERATOR_H_ +#define TENSORFLOW_UTIL_PERMUTATION_INPUT_ITERATOR_H_ + +#include +#include + +namespace tensorflow { + +template +class PermutationInputIterator { + public: + // Required iterator traits + typedef PermutationInputIterator self_type; ///< My own type + typedef OffsetT difference_type; ///< Type to express the result of + ///< subtracting one iterator from another + typedef ValueType + value_type; ///< The type of the element the iterator can point to + typedef ValueType* pointer; ///< The type of a pointer to an element the + ///< iterator can point to + typedef ValueType reference; ///< The type of a reference to an element the + ///< iterator can point to + + typedef std::random_access_iterator_tag + iterator_category; ///< The iterator category + + private: + InputIteratorT input_itr; + IndexIteratorT index_itr; + + public: + /// Constructor + __host__ __device__ __forceinline__ PermutationInputIterator( + InputIteratorT input_itr, ///< Input iterator to wrap + IndexIteratorT index_itr) ///< Conversion functor to wrap + : input_itr(input_itr), index_itr(index_itr) {} + + /// Postfix increment + __host__ __device__ __forceinline__ self_type operator++(int) { + self_type retval = *this; + index_itr++; + return retval; + } + + /// Prefix increment + __host__ __device__ __forceinline__ self_type operator++() { + index_itr++; + return *this; + } + + /// Indirection + __host__ __device__ __forceinline__ reference operator*() const { + return input_itr[*index_itr]; + } + + /// Addition + template + __host__ __device__ __forceinline__ self_type operator+(Distance n) const { + self_type retval(input_itr, index_itr + n); + return retval; + } + + /// Addition assignment + template + __host__ __device__ __forceinline__ self_type& operator+=(Distance n) { + index_itr += n; + return *this; + } + + /// Subtraction + template + __host__ __device__ __forceinline__ self_type operator-(Distance n) const { + self_type retval(input_itr, index_itr - n); + return retval; + } + + /// Subtraction assignment + template + __host__ __device__ __forceinline__ self_type& operator-=(Distance n) { + index_itr -= n; + return *this; + } + + /// Distance + __host__ __device__ __forceinline__ difference_type + operator-(self_type other) const { + return index_itr - other.index_itr; + } + + /// Array subscript + template + __host__ __device__ __forceinline__ reference operator[](Distance n) const { + return input_itr[index_itr[n]]; + } + + /// Structure dereference + __host__ __device__ __forceinline__ pointer operator->() { + return input_itr + *index_itr; + } + + /// Equal to + __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) { + return (index_itr == rhs.index_itr && input_itr == rhs.input_itr); + } + + /// Not equal to + __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) { + return !(*this == rhs); + } + + /// ostream operator + friend std::ostream& operator<<(std::ostream& os, const self_type& itr) { + return os; + } +}; + +} // end namespace tensorflow + +#endif // TENSORFLOW_UTIL_PERMUTATION_INPUT_ITERATOR_H_ diff --git a/tensorflow/core/util/tensor_slice_reader.h b/tensorflow/core/util/tensor_slice_reader.h index eeb31295737..5932d59a159 100644 --- a/tensorflow/core/util/tensor_slice_reader.h +++ b/tensorflow/core/util/tensor_slice_reader.h @@ -165,13 +165,18 @@ bool TensorSliceReader::CopySliceData(const string& name, CHECK_GE(idx, 0) << "Failed to find the index for filename " << fname; // We read a record in the corresponding sstable const string key = EncodeTensorNameSlice(name, slice_s); - CHECK(sss_[idx]->Get(key, &value)) - << "Failed to seek to the record for tensor " << name << ", slice " - << slice_s.DebugString() << ": computed key = " << key; + if (!sss_[idx]->Get(key, &value)) { + VLOG(1) << "Failed to seek to the record for tensor " << name + << ", slice " << slice_s.DebugString() + << ": computed key = " << key; + return false; + } SavedTensorSlices sts; - CHECK(ParseProtoUnlimited(&sts, value)) - << "Failed to parse the record for tensor " << name << ", slice " - << slice_s.DebugString() << ": computed key = " << key; + if (!ParseProtoUnlimited(&sts, value)) { + VLOG(1) << "Failed to parse the record for tensor " << name << ", slice " + << slice_s.DebugString() << ": computed key = " << key; + return false; + } CopyDataFromTensorSliceToTensorSlice( tss->shape(), slice_s, slice, checkpoint::TensorProtoData(sts.data().data()), data); diff --git a/tensorflow/core/util/transform_output_iterator.h b/tensorflow/core/util/transform_output_iterator.h new file mode 100644 index 00000000000..1640791ad17 --- /dev/null +++ b/tensorflow/core/util/transform_output_iterator.h @@ -0,0 +1,149 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_UTIL_TRANSFORM_OUTPUT_ITERATOR_H_ +#define TENSORFLOW_UTIL_TRANSFORM_OUTPUT_ITERATOR_H_ + +#include +#include + +namespace tensorflow { + +template +class TransformOutputIterator { + private: + // Proxy object + struct Reference { + StoreType* ptr; + ConversionOp conversion_op; + + /// Constructor + __host__ __device__ __forceinline__ Reference(StoreType* ptr, + ConversionOp conversion_op) + : ptr(ptr), conversion_op(conversion_op) {} + + /// Assignment + __host__ __device__ __forceinline__ InputType operator=(InputType val) { + *ptr = conversion_op(val); + return val; + } + }; + + public: + // Required iterator traits + typedef TransformOutputIterator self_type; ///< My own type + typedef OffsetT difference_type; ///< Type to express the result of + ///< subtracting one iterator from another + typedef void + value_type; ///< The type of the element the iterator can point to + typedef void pointer; ///< The type of a pointer to an element the iterator + ///< can point to + typedef Reference reference; ///< The type of a reference to an element the + ///< iterator can point to + + typedef std::random_access_iterator_tag + iterator_category; ///< The iterator category + + /*private:*/ + + StoreType* ptr; + ConversionOp conversion_op; + + public: + /// Constructor + template + __host__ __device__ __forceinline__ TransformOutputIterator( + QualifiedStoreType* ptr, + ConversionOp conversionOp) ///< Native pointer to wrap + : ptr(ptr), conversion_op(conversionOp) {} + + /// Postfix increment + __host__ __device__ __forceinline__ self_type operator++(int) { + self_type retval = *this; + ptr++; + return retval; + } + + /// Prefix increment + __host__ __device__ __forceinline__ self_type operator++() { + ptr++; + return *this; + } + + /// Indirection + __host__ __device__ __forceinline__ reference operator*() const { + return Reference(ptr, conversion_op); + } + + /// Addition + template + __host__ __device__ __forceinline__ self_type operator+(Distance n) const { + self_type retval(ptr + n, conversion_op); + return retval; + } + + /// Addition assignment + template + __host__ __device__ __forceinline__ self_type& operator+=(Distance n) { + ptr += n; + return *this; + } + + /// Subtraction + template + __host__ __device__ __forceinline__ self_type operator-(Distance n) const { + self_type retval(ptr - n, conversion_op); + return retval; + } + + /// Subtraction assignment + template + __host__ __device__ __forceinline__ self_type& operator-=(Distance n) { + ptr -= n; + return *this; + } + + /// Distance + __host__ __device__ __forceinline__ difference_type + operator-(self_type other) const { + return ptr - other.ptr; + } + + /// Array subscript + template + __host__ __device__ __forceinline__ reference operator[](Distance n) const { + return Reference(ptr + n, conversion_op); + } + + /// Equal to + __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) { + return (ptr == rhs.ptr); + } + + /// Not equal to + __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) { + return (ptr != rhs.ptr); + } + + /// ostream operator + friend std::ostream& operator<<(std::ostream& os, const self_type& itr) { + return os; + } +}; + +} // end namespace tensorflow + +#endif // TENSORFLOW_UTIL_TRANSFORM_OUTPUT_ITERATOR_H_ diff --git a/tensorflow/docs_src/programmers_guide/datasets.md b/tensorflow/docs_src/programmers_guide/datasets.md index bf3cb5bf196..ba26bd5e941 100644 --- a/tensorflow/docs_src/programmers_guide/datasets.md +++ b/tensorflow/docs_src/programmers_guide/datasets.md @@ -1,4 +1,4 @@ -# Using the `Dataset` API for TensorFlow Input Pipelines +# Importing Data The `Dataset` API enables you to build complex input pipelines from simple, reusable pieces. For example, the pipeline for an image model might @@ -735,7 +735,7 @@ def dataset_input_fn(): return {"image_data": image, "date_time": parsed["date_time"]}, label - # Use `Dataset.map()` to build a pair of a feature dictionary and a label + # Use `Dataset.map()` to build a pair of a feature dictionary and a label # tensor for each example. dataset = dataset.map(parser) dataset = dataset.shuffle(buffer_size=10000) diff --git a/tensorflow/docs_src/programmers_guide/estimators.md b/tensorflow/docs_src/programmers_guide/estimators.md index a5724ea294e..755bb049c99 100644 --- a/tensorflow/docs_src/programmers_guide/estimators.md +++ b/tensorflow/docs_src/programmers_guide/estimators.md @@ -134,7 +134,7 @@ The heart of every Estimator--whether pre-made or custom--is its evaluation, and prediction. When you are using a pre-made Estimator, someone else has already implemented the model function. When relying on a custom Estimator, you must write the model function yourself. A -${$extend/estimators$companion document) +@{$extend/estimators$companion document} explains how to write the model function. diff --git a/tensorflow/docs_src/programmers_guide/index.md b/tensorflow/docs_src/programmers_guide/index.md index 22fe229422a..eef35d6dcc7 100644 --- a/tensorflow/docs_src/programmers_guide/index.md +++ b/tensorflow/docs_src/programmers_guide/index.md @@ -4,6 +4,8 @@ The documents in this unit dive into the details of writing TensorFlow code. For TensorFlow 1.3, we revised this document extensively. The units are now as follows: + * @{$programmers_guide/estimators$Estimators}, which introduces a high-level + TensorFlow API that greatly simplifies ML programming. * @{$programmers_guide/tensors$Tensors}, which explains how to create, manipulate, and access Tensors--the fundamental object in TensorFlow. * @{$programmers_guide/variables$Variables}, which details how @@ -18,8 +20,6 @@ The units are now as follows: such as Estimators or Keras, the high-level API creates and manages graphs and sessions for you, but understanding graphs and sessions can still be helpful. - * @{$programmers_guide/estimators$Estimators}, which introduces a high-level - TensorFlow API that greatly simplifies ML programming. * @{$programmers_guide/saved_model$Saving and Restoring}, which explains how to save and restore variables and models. * @{$programmers_guide/datasets$Input Pipelines}, which explains how to diff --git a/tensorflow/docs_src/programmers_guide/leftnav_files b/tensorflow/docs_src/programmers_guide/leftnav_files index 5082e7f36c8..0c42f119c95 100644 --- a/tensorflow/docs_src/programmers_guide/leftnav_files +++ b/tensorflow/docs_src/programmers_guide/leftnav_files @@ -1,8 +1,8 @@ index.md +estimators.md tensors.md variables.md graphs.md -estimators.md saved_model.md datasets.md threading_and_queues.md diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index 0781347fd6e..dda707aea26 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -209,6 +209,95 @@ func VarHandleOp(scope *Scope, dtype tf.DataType, shape tf.Shape, optional ...Va return op.Output(0) } +// Writes a `Summary` protocol buffer with scalar values. +// +// The input `tag` and `value` must have the scalars. +// +// Arguments: +// writer: A handle to a summary writer. +// global_step: The step to write the summary for. +// tag: Tag for the summary. +// value: Value for the summary. +// +// Returns the created operation. +func WriteScalarSummary(scope *Scope, writer tf.Output, global_step tf.Output, tag tf.Output, value tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "WriteScalarSummary", + Input: []tf.Input{ + writer, global_step, tag, value, + }, + } + return scope.AddOperation(opspec) +} + +// Outputs a `Summary` protocol buffer with a tensor. +// +// Arguments: +// writer: A handle to a summary writer. +// global_step: The step to write the summary for. +// tensor: A tensor to serialize. +// tag: The summary's tag. +// summary_metadata: Serialized SummaryMetadata protocol buffer containing +// plugin-related metadata for this summary. +// +// Returns the created operation. +func WriteSummary(scope *Scope, writer tf.Output, global_step tf.Output, tensor tf.Output, tag tf.Output, summary_metadata tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "WriteSummary", + Input: []tf.Input{ + writer, global_step, tensor, tag, summary_metadata, + }, + } + return scope.AddOperation(opspec) +} + +// Flushes and closes the summary writer. +// +// Also removes it from the resource manager. To reopen, use another +// CreateSummaryFileWriter op. +// +// Arguments: +// writer: A handle to the summary writer resource. +// +// Returns the created operation. +func CloseSummaryWriter(scope *Scope, writer tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "CloseSummaryWriter", + Input: []tf.Input{ + writer, + }, + } + return scope.AddOperation(opspec) +} + +// Flushes the writer's unwritten events. +// +// Arguments: +// writer: A handle to the summary writer resource. +// +// Returns the created operation. +func FlushSummaryWriter(scope *Scope, writer tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "FlushSummaryWriter", + Input: []tf.Input{ + writer, + }, + } + return scope.AddOperation(opspec) +} + // FakeQuantWithMinMaxVarsPerChannelGradientAttr is an optional argument to FakeQuantWithMinMaxVarsPerChannelGradient. type FakeQuantWithMinMaxVarsPerChannelGradientAttr func(optionalAttr) @@ -2149,6 +2238,34 @@ func ConcatOffset(scope *Scope, concat_dim tf.Output, shape []tf.Output) (offset return offset } +// Writes a `Summary` protocol buffer with a histogram. +// +// The generated +// [`Summary`](https://www.tensorflow.org/code/tensorflow/core/framework/summary.proto) +// has one summary value containing a histogram for `values`. +// +// This op reports an `InvalidArgument` error if any value is not finite. +// +// Arguments: +// writer: A handle to a summary writer. +// global_step: The step to write the summary for. +// tag: Scalar. Tag to use for the `Summary.Value`. +// values: Any shape. Values to use to build the histogram. +// +// Returns the created operation. +func WriteHistogramSummary(scope *Scope, writer tf.Output, global_step tf.Output, tag tf.Output, values tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "WriteHistogramSummary", + Input: []tf.Input{ + writer, global_step, tag, values, + }, + } + return scope.AddOperation(opspec) +} + // Concatenates tensors along one dimension. // // Arguments: @@ -7087,6 +7204,48 @@ func ResizeNearestNeighbor(scope *Scope, images tf.Output, size tf.Output, optio return op.Output(0) } +// SummaryWriterAttr is an optional argument to SummaryWriter. +type SummaryWriterAttr func(optionalAttr) + +// SummaryWriterSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func SummaryWriterSharedName(value string) SummaryWriterAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// SummaryWriterContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func SummaryWriterContainer(value string) SummaryWriterAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// Returns a handle to be used to access a summary writer. +// +// The summary writer is an in-graph resource which can be used by ops to write +// summaries to event files. +// +// Returns the summary writer resource. Scalar handle. +func SummaryWriter(scope *Scope, optional ...SummaryWriterAttr) (writer tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "SummaryWriter", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Returns the set of files matching one or more glob patterns. // // Note that this routine only supports wildcard characters in the @@ -10570,6 +10729,61 @@ func Restore(scope *Scope, file_pattern tf.Output, tensor_name tf.Output, dt tf. return op.Output(0) } +// WriteAudioSummaryAttr is an optional argument to WriteAudioSummary. +type WriteAudioSummaryAttr func(optionalAttr) + +// WriteAudioSummaryMaxOutputs sets the optional max_outputs attribute to value. +// +// value: Max number of batch elements to generate audio for. +// If not specified, defaults to 3 +// +// REQUIRES: value >= 1 +func WriteAudioSummaryMaxOutputs(value int64) WriteAudioSummaryAttr { + return func(m optionalAttr) { + m["max_outputs"] = value + } +} + +// Writes a `Summary` protocol buffer with audio. +// +// The summary has up to `max_outputs` summary values containing audio. The +// audio is built from `tensor` which must be 3-D with shape `[batch_size, +// frames, channels]` or 2-D with shape `[batch_size, frames]`. The values are +// assumed to be in the range of `[-1.0, 1.0]` with a sample rate of `sample_rate`. +// +// The `tag` argument is a scalar `Tensor` of type `string`. It is used to +// build the `tag` of the summary values: +// +// * If `max_outputs` is 1, the summary value tag is '*tag*/audio'. +// * If `max_outputs` is greater than 1, the summary value tags are +// generated sequentially as '*tag*/audio/0', '*tag*/audio/1', etc. +// +// Arguments: +// writer: A handle to a summary writer. +// global_step: The step to write the summary for. +// tag: Scalar. Used to build the `tag` attribute of the summary values. +// tensor: 2-D of shape `[batch_size, frames]`. +// sample_rate: The sample rate of the signal in hertz. +// +// Returns the created operation. +func WriteAudioSummary(scope *Scope, writer tf.Output, global_step tf.Output, tag tf.Output, tensor tf.Output, sample_rate tf.Output, optional ...WriteAudioSummaryAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "WriteAudioSummary", + Input: []tf.Input{ + writer, global_step, tag, tensor, sample_rate, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + // FusedResizeAndPadConv2DAttr is an optional argument to FusedResizeAndPadConv2D. type FusedResizeAndPadConv2DAttr func(optionalAttr) @@ -15797,6 +16011,30 @@ func Dilation2D(scope *Scope, input tf.Output, filter tf.Output, strides []int64 return op.Output(0) } +// Creates a summary file writer accessible by the given resource handle. +// +// Arguments: +// writer: A handle to the summary writer resource +// logdir: Directory where the event file will be written. +// max_queue: Size of the queue of pending events and summaries. +// flush_millis: How often, in milliseconds, to flush the pending events and +// summaries to disk. +// filename_suffix: Every event file's name is suffixed with this suffix. +// +// Returns the created operation. +func CreateSummaryFileWriter(scope *Scope, writer tf.Output, logdir tf.Output, max_queue tf.Output, flush_millis tf.Output, filename_suffix tf.Output) (o *tf.Operation) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "CreateSummaryFileWriter", + Input: []tf.Input{ + writer, logdir, max_queue, flush_millis, filename_suffix, + }, + } + return scope.AddOperation(opspec) +} + // EncodeBase64Attr is an optional argument to EncodeBase64. type EncodeBase64Attr func(optionalAttr) @@ -17172,6 +17410,84 @@ func Cumsum(scope *Scope, x tf.Output, axis tf.Output, optional ...CumsumAttr) ( return op.Output(0) } +// WriteImageSummaryAttr is an optional argument to WriteImageSummary. +type WriteImageSummaryAttr func(optionalAttr) + +// WriteImageSummaryMaxImages sets the optional max_images attribute to value. +// +// value: Max number of batch elements to generate images for. +// If not specified, defaults to 3 +// +// REQUIRES: value >= 1 +func WriteImageSummaryMaxImages(value int64) WriteImageSummaryAttr { + return func(m optionalAttr) { + m["max_images"] = value + } +} + +// Writes a `Summary` protocol buffer with images. +// +// The summary has up to `max_images` summary values containing images. The +// images are built from `tensor` which must be 4-D with shape `[batch_size, +// height, width, channels]` and where `channels` can be: +// +// * 1: `tensor` is interpreted as Grayscale. +// * 3: `tensor` is interpreted as RGB. +// * 4: `tensor` is interpreted as RGBA. +// +// The images have the same number of channels as the input tensor. For float +// input, the values are normalized one image at a time to fit in the range +// `[0, 255]`. `uint8` values are unchanged. The op uses two different +// normalization algorithms: +// +// * If the input values are all positive, they are rescaled so the largest one +// is 255. +// +// * If any input value is negative, the values are shifted so input value 0.0 +// is at 127. They are then rescaled so that either the smallest value is 0, +// or the largest one is 255. +// +// The `tag` argument is a scalar `Tensor` of type `string`. It is used to +// build the `tag` of the summary values: +// +// * If `max_images` is 1, the summary value tag is '*tag*/image'. +// * If `max_images` is greater than 1, the summary value tags are +// generated sequentially as '*tag*/image/0', '*tag*/image/1', etc. +// +// The `bad_color` argument is the color to use in the generated images for +// non-finite input values. It is a `unit8` 1-D tensor of length `channels`. +// Each element must be in the range `[0, 255]` (It represents the value of a +// pixel in the output image). Non-finite values in the input tensor are +// replaced by this tensor in the output image. The default value is the color +// red. +// +// Arguments: +// writer: A handle to a summary writer. +// global_step: The step to write the summary for. +// tag: Scalar. Used to build the `tag` attribute of the summary values. +// tensor: 4-D of shape `[batch_size, height, width, channels]` where +// `channels` is 1, 3, or 4. +// bad_color: Color to use for pixels with non-finite values. +// +// Returns the created operation. +func WriteImageSummary(scope *Scope, writer tf.Output, global_step tf.Output, tag tf.Output, tensor tf.Output, bad_color tf.Output, optional ...WriteImageSummaryAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "WriteImageSummary", + Input: []tf.Input{ + writer, global_step, tag, tensor, bad_color, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + // Pads a tensor with zeros. // // This operation pads a `input` with zeros according to the `paddings` you @@ -20306,7 +20622,7 @@ func Prod(scope *Scope, input tf.Output, reduction_indices tf.Output, optional . // gradients: The backpropagated gradients to the corresponding softsign operation. // features: The features passed as input to the corresponding softsign operation. // -// Returns The gradients: `gradients / (1 + abs(-features)) ** 2`. +// Returns The gradients: `gradients / (1 + abs(features)) ** 2`. func SoftsignGrad(scope *Scope, gradients tf.Output, features tf.Output) (backprops tf.Output) { if scope.Err() != nil { return diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 6597889fbcb..98dce82ee31 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -1766,6 +1766,8 @@ py_library( srcs_version = "PY2AND3", deps = [ ":array_ops", + ":array_ops_gen", + ":dtypes", ":framework_ops", ":resource_variable_ops_gen", ":tensor_shape", @@ -1775,7 +1777,7 @@ py_library( "//tensorflow/python/eager:context", "//tensorflow/python/eager:custom_gradient", "//tensorflow/python/eager:tape", - "//tensorflow/python/eager:tensor", + "//tensorflow/python/eager:tensor_node", ], ) @@ -2550,6 +2552,7 @@ py_library( srcs_version = "PY2AND3", deps = [ ":array_ops", + ":checkpoint_ops_gen", ":client", ":control_flow_ops", ":data_flow_ops", @@ -3573,6 +3576,28 @@ py_test( ], ) +py_test( + name = "checkpoint_ops_test", + size = "small", + srcs = ["training/checkpoint_ops_test.py"], + srcs_version = "PY2AND3", + tags = ["no_windows"], + deps = [ + ":checkpoint_ops_gen", + ":client", + ":client_testlib", + ":framework_for_generated_wrappers", + ":io_ops", + ":partitioned_variables", + ":platform", + ":pywrap_tensorflow", + ":state_ops", + ":training", + ":variable_scope", + ":variables", + ], +) + py_test( name = "monitored_session_test", size = "small", diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i index 08dd3922dbe..fa49e66e87b 100644 --- a/tensorflow/python/client/tf_session.i +++ b/tensorflow/python/client/tf_session.i @@ -373,6 +373,33 @@ def TF_Reset(target, containers=None, config=None): TF_DeleteSessionOptions(opts) %} +// We use TF_GraphToFunction_wrapper instead of TF_GraphToFunction +%ignore TF_GraphToFunction; +// TF_GraphToFunction_wrapper does not use any Python methods and +// does not require GIL to be held. +%unignore TF_GraphToFunction_wrapper; + +// $input is a Python list of wrapped TF_Operations +%typemap(in) (const std::vector* opers) + (std::vector opers) { + if ($input != Py_None) { + if (!PyList_Check($input)) { + SWIG_exception_fail(SWIG_TypeError, "$symname: expected list"); + } + size_t size = PyList_Size($input); + for (int i = 0; i < size; ++i) { + PyObject* item = PyList_GetItem($input, i); + TF_Operation* oper_ptr; + SWIG_ConvertPtr(item, reinterpret_cast(&oper_ptr), + $descriptor(TF_Operation*), 0); + opers.push_back(oper_ptr); + } + $1 = &opers; + } else { + $1 = nullptr; + } +} + %include "tensorflow/python/client/tf_session_helper.h" %unignoreall diff --git a/tensorflow/python/client/tf_session_helper.cc b/tensorflow/python/client/tf_session_helper.cc index 60a589fa8bb..72f560fa878 100644 --- a/tensorflow/python/client/tf_session_helper.cc +++ b/tensorflow/python/client/tf_session_helper.cc @@ -337,4 +337,38 @@ std::vector TF_OperationGetControlInputs_wrapper( return control_inputs; } +TF_Function* TF_GraphToFunction_wrapper(const TF_Graph* fn_body, + const char* fn_name, + const std::vector* opers, + const std::vector& inputs, + const std::vector& outputs, + const NameVector& output_names, + const TF_FunctionOptions* opts, + TF_Status* out_status) { + if (!output_names.empty() && output_names.size() != outputs.size()) { + Set_TF_Status_from_Status( + out_status, + errors::InvalidArgument( + "output names must be either empty or equal in size to outputs. ", + "output names size = ", output_names.size(), + " outputs size = ", outputs.size())); + return nullptr; + } + + int nopers = -1; + const TF_Operation* const* opers_array = nullptr; + if (opers != nullptr) { + nopers = opers->size(); + opers_array = opers->data(); + } + + const char** output_names_ptr = + output_names.empty() ? nullptr + : const_cast(output_names.data()); + + return TF_GraphToFunction(fn_body, fn_name, nopers, opers_array, + inputs.size(), inputs.data(), outputs.size(), + outputs.data(), output_names_ptr, opts, out_status); +} + } // namespace tensorflow diff --git a/tensorflow/python/client/tf_session_helper.h b/tensorflow/python/client/tf_session_helper.h index 3bc63f822fe..8fae6206c07 100644 --- a/tensorflow/python/client/tf_session_helper.h +++ b/tensorflow/python/client/tf_session_helper.h @@ -148,6 +148,16 @@ void TF_SessionPRun_wrapper(TF_Session* session, const char* handle, std::vector TF_OperationGetControlInputs_wrapper( TF_Operation* oper); +// `opers` equaling NULL are converted to `nopers = -1`. +// `output_names` must be empty or have the same length as `outputs`. +TF_Function* TF_GraphToFunction_wrapper(const TF_Graph* fn_body, + const char* fn_name, + const std::vector* opers, + const std::vector& inputs, + const std::vector& outputs, + const NameVector& output_names, + const TF_FunctionOptions* opts, + TF_Status* out_status); } // namespace tensorflow #endif // TENSORFLOW_PYTHON_CLIENT_TF_SESSION_HELPER_H_ diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index ca3ad1a2c33..326f56ebf9b 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -169,10 +169,6 @@ def _record_gradient(op_name, inputs, attrs, results, name): execute.record_gradient = _record_gradient -def _ones(shape, dtype): - return array_ops.fill(shape, tensor.Tensor(1, dtype=dtype)) - - def _aggregate_grads(gradients): """Aggregate gradients of the same tensor.""" grad_lists = dict() @@ -225,7 +221,7 @@ def implicit_val_and_grad(f): (end_node.progenitors, repr(start_node))) output_gradients = kwds.get("output_gradients", None) if output_gradients is None: - output_gradients = _ones(end_node.shape, end_node.dtype) + output_gradients = array_ops.ones_like(end_node.value) grad = ag_core.backward_pass(output_gradients, end_node, start_node) return end_node.value, _aggregate_grads(grad.gradients) diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py index 010124ed56a..b4379055096 100644 --- a/tensorflow/python/eager/backprop_test.py +++ b/tensorflow/python/eager/backprop_test.py @@ -85,7 +85,7 @@ class BackpropTest(test.TestCase): initial_value=tensor.Tensor(1.0), name='x') def fn(): - tape.watch(x.handle) + tape.watch_variable(x) b = tensor.Tensor(2.0) c = math_ops.add(x.value(), b) return math_ops.add(c, tensor.Tensor(3.0)) @@ -307,6 +307,20 @@ class BackpropTest(test.TestCase): [tensor_shape.TensorShape(s).as_proto() for s in shape_list], backprop.make_attr([pywrap_tensorflow.TF_ATTR_SHAPE], shape_list)) + def testMultiValueConvertToTensor(self): + x = resource_variable_ops.ResourceVariable( + initial_value=array_ops.constant([1.0]), name='x') + + def fn(): + tape.watch_variable(x) + a = math_ops.add(x.value(), 1.0) + # Make sure convert_to_tensor works correctly with list of TensorNodes. + b = array_ops.stack([a, a], axis=0) + return math_ops.reduce_mean(b) + + grad = backprop.implicit_grad(fn)()[0][1] + self.assertAllEqual([1.0], grad.numpy()) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py index 27ffdd98105..a5a93b7bbe0 100644 --- a/tensorflow/python/eager/context.py +++ b/tensorflow/python/eager/context.py @@ -171,16 +171,6 @@ class Context(object): """Sets summary writer resource.""" self._summary_writer_resource = resource - @property - def recording_summaries(self): - """Returns True if recording summaries is enabled in current thread..""" - return self._eager_context.recording_summaries - - @recording_summaries.setter - def recording_summaries(self, val): - """Enables recording summaries is enabled in current thread..""" - self._eager_context.recording_summaries = val - @property def device_name(self): """Returns the device name for the current thread.""" @@ -360,24 +350,6 @@ def device(name): return context().device(name) -@contextlib.contextmanager -def record_summaries(): - """Context-manager to enable recording of summaries.""" - ctx = context() - old = ctx.recording_summaries - ctx.recording_summaries = True - try: - yield - finally: - ctx.recording_summaries = old - - -def should_record_summary(): - """True if a summary should be recorded now.""" - c = context() - return c.recording_summaries and c.summary_writer_resource is not None - - def run(main=None, argv=None): """Runs the program with an optional 'main' function and 'argv' list. diff --git a/tensorflow/python/eager/core_test.py b/tensorflow/python/eager/core_test.py index 7ae80aa156a..5de396f62c3 100644 --- a/tensorflow/python/eager/core_test.py +++ b/tensorflow/python/eager/core_test.py @@ -55,10 +55,6 @@ class TFETest(test_util.TensorFlowTestCase): ctx.summary_writer_resource = 'mock' self.assertEqual('mock', ctx.summary_writer_resource) - self.assertFalse(ctx.recording_summaries) - ctx.recording_summaries = True - self.assertTrue(ctx.recording_summaries) - self.assertEqual('', ctx.device_name) self.assertEqual(ctx.device_name, ctx.device_spec.to_string()) with ctx.device('GPU:0'): @@ -95,8 +91,7 @@ class TFETest(test_util.TensorFlowTestCase): return [ ctx.in_graph_mode(), ctx.in_eager_mode(), ctx.scope_name, ctx.summary_writer_resource, - ctx.recording_summaries, ctx.device_name, - ctx.num_gpus() + ctx.device_name, ctx.num_gpus() ] def get_values(ctx, values): diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 18b722e7923..c15dde9e487 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -29,6 +29,7 @@ from tensorflow.python.framework import function as tf_function from tensorflow.python.ops import array_ops from tensorflow.python.ops import clip_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import resource_variable_ops class FunctionTest(test.TestCase): @@ -52,6 +53,19 @@ class FunctionTest(test.TestCase): out = sq(t) self.assertAllEqual(out.numpy(), math_ops.matmul(t, t).numpy()) + def testGraphModeWithGradients(self): + v = resource_variable_ops.ResourceVariable(1.0) + + @function.defun + def step(): + def inner(): + tape.watch(v.handle) + return v * v + + return backprop.implicit_grad(inner)()[0][1] + + self.assertAllEqual(step().numpy(), 2.0) + def testTensorConversionWithDefun(self): @function.defun diff --git a/tensorflow/python/eager/tape.py b/tensorflow/python/eager/tape.py index 4d09db73c97..9cd29f630df 100644 --- a/tensorflow/python/eager/tape.py +++ b/tensorflow/python/eager/tape.py @@ -151,6 +151,15 @@ def watch(tensor): return tensor +def watch_variable(resource_variable): + """Marks this ResourceVariable to be watched by all tapes in the stack. + + Args: + resource_variable: A ResourceVariable to be watched. + """ + watch(resource_variable.handle) # py-lint: disable=protected-access + + def pop_tape(): """Pops the top tape in the stack, if any.""" if _tape_stack.stack: diff --git a/tensorflow/python/eager/tensor_test.py b/tensorflow/python/eager/tensor_test.py index 8d0f639ddcb..bd8e653b976 100644 --- a/tensorflow/python/eager/tensor_test.py +++ b/tensorflow/python/eager/tensor_test.py @@ -77,8 +77,8 @@ class TFETensorTest(test_util.TensorFlowTestCase): def testMultiLineTensorStr(self): t = tensor.Tensor(np.eye(3)) tensor_str = str(t) - self.assertIn("shape=%s, dtype=%s, " % (t.shape, t.dtype.name), tensor_str) - self.assertIn("numpy=\n%s" % t.numpy(), tensor_str) + self.assertIn("shape=%s, dtype=%s" % (t.shape, t.dtype.name), tensor_str) + self.assertIn(str(t.numpy()), tensor_str) def testMultiLineTensorRepr(self): t = tensor.Tensor(np.eye(3)) @@ -95,7 +95,7 @@ class TFETensorTest(test_util.TensorFlowTestCase): np.set_printoptions(threshold=2, edgeitems=1) t = tensor.Tensor(np.arange(10, dtype=np.int32)) - self.assertIn("numpy=[0 ..., 9]", str(t)) + self.assertIn("[0 ..., 9]", str(t)) self.assertIn("[0, ..., 9]", repr(t)) # Clean up: reset to previous printoptions. @@ -103,7 +103,7 @@ class TFETensorTest(test_util.TensorFlowTestCase): def testZeroDimTensorStr(self): t = tensor.Tensor(42) - self.assertIn("shape=(), dtype=int32, numpy=42", str(t)) + self.assertIn("42, shape=(), dtype=int32", str(t)) def testZeroDimTensorRepr(self): t = tensor.Tensor(42) @@ -113,7 +113,7 @@ class TFETensorTest(test_util.TensorFlowTestCase): def testZeroSizeTensorStr(self): t = tensor.Tensor(np.zeros(0, dtype=np.float32)) - self.assertIn("shape=(0,), dtype=float32, numpy=[]", str(t)) + self.assertIn("[], shape=(0,), dtype=float32", str(t)) def testZeroSizeTensorRepr(self): t = tensor.Tensor(np.zeros(0, dtype=np.float32)) @@ -127,8 +127,8 @@ class TFETensorTest(test_util.TensorFlowTestCase): t = tensor.Tensor(42) # Force change dtype to a numpy-unprintable type. t._dtype = dtypes.resource - self.assertIn("numpy=", str(t)) - self.assertIn("numpy=", repr(t)) + self.assertIn("", str(t)) + self.assertIn("", repr(t)) def testStringTensor(self): t_np_orig = np.array([[b"a", b"ab"], [b"abc", b"abcd"]]) diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD index 83eeeb35b67..167f9b10543 100644 --- a/tensorflow/python/estimator/BUILD +++ b/tensorflow/python/estimator/BUILD @@ -148,6 +148,7 @@ py_test( name = "dnn_test", size = "medium", srcs = ["canned/dnn_test.py"], + shard_count = 4, srcs_version = "PY2AND3", tags = ["no_pip"], deps = [ @@ -201,7 +202,7 @@ py_test( name = "dnn_linear_combined_test", size = "medium", srcs = ["canned/dnn_linear_combined_test.py"], - shard_count = 4, + shard_count = 8, srcs_version = "PY2AND3", tags = ["no_pip"], deps = [ @@ -552,11 +553,9 @@ py_test( name = "linear_test", size = "medium", srcs = ["canned/linear_test.py"], + shard_count = 4, srcs_version = "PY2AND3", - tags = [ - "no_pip", - "noasan", # times out b/63680444 - ], + tags = ["no_pip"], deps = [ ":linear", ":linear_testing_utils", diff --git a/tensorflow/python/estimator/canned/head.py b/tensorflow/python/estimator/canned/head.py index d2c5772483b..80d109d927a 100644 --- a/tensorflow/python/estimator/canned/head.py +++ b/tensorflow/python/estimator/canned/head.py @@ -200,8 +200,11 @@ def _check_labels(labels, expected_labels_dimension): dim1 = static_shape[1] if (dim1 is not None) and (dim1 != expected_labels_dimension): raise ValueError( - 'labels shape must be [batch_size, labels_dimension], got %s.' % - (static_shape,)) + 'Mismatched label shape. ' + 'Classifier configured with n_classes=%s. Received %s. ' + 'Suggested Fix: check your n_classes argument to the estimator ' + 'and/or the shape of your label.' % + (expected_labels_dimension, dim1)) assert_dimension = check_ops.assert_equal( expected_labels_dimension, labels_shape[1], message=err_msg) with ops.control_dependencies([assert_dimension]): diff --git a/tensorflow/python/estimator/canned/head_test.py b/tensorflow/python/estimator/canned/head_test.py index 23678013c66..fa3d5b44eb6 100644 --- a/tensorflow/python/estimator/canned/head_test.py +++ b/tensorflow/python/estimator/canned/head_test.py @@ -139,7 +139,7 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase): features = {'x': np.array(((42.,),))} # Static shape. - with self.assertRaisesRegexp(ValueError, 'labels shape'): + with self.assertRaisesRegexp(ValueError, 'Mismatched label shape'): head.create_loss( features=features, mode=model_fn.ModeKeys.EVAL, @@ -889,7 +889,7 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase): logits_2x1 = np.array(((45.,), (41.,),)) # Static shape. - with self.assertRaisesRegexp(ValueError, 'labels shape'): + with self.assertRaisesRegexp(ValueError, 'Mismatched label shape'): head.create_loss( features={'x': np.array(((42.,),))}, mode=model_fn.ModeKeys.EVAL, @@ -1692,7 +1692,7 @@ class RegressionHeadWithMeanSquaredErrorLossTest(test.TestCase): values_1d = np.array(((43.,), (44.,),)) # Static shape. - with self.assertRaisesRegexp(ValueError, 'labels shape'): + with self.assertRaisesRegexp(ValueError, 'Mismatched label shape'): head.create_loss( features={'x': values_1d}, mode=model_fn.ModeKeys.EVAL, @@ -1737,7 +1737,7 @@ class RegressionHeadWithMeanSquaredErrorLossTest(test.TestCase): values_1d = np.array(((43.,), (44.,),)) # Static shape. - with self.assertRaisesRegexp(ValueError, 'labels shape'): + with self.assertRaisesRegexp(ValueError, 'Mismatched label shape'): head.create_loss( features={'x': values_1d}, mode=model_fn.ModeKeys.TRAIN, diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py index 2f35f0e04b6..7a866ee6e8a 100644 --- a/tensorflow/python/framework/function.py +++ b/tensorflow/python/framework/function.py @@ -26,7 +26,9 @@ import hashlib from tensorflow.core.framework import attr_value_pb2 from tensorflow.core.framework import op_def_pb2 +from tensorflow.python import pywrap_tensorflow as c_api from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.framework import graph_to_function_def from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops @@ -290,6 +292,7 @@ class _DefinedFunction(object): self._shape_func = shape_func self._extra_kwargs = kwargs self._definition = None # Constructed lazily. + self._c_func = None # Constructed with definition. self._sub_functions = dict() # Constructed with definition. self._args = [] @@ -396,6 +399,22 @@ class _DefinedFunction(object): if self._func.__doc__: self._definition.signature.description = self._func.__doc__ + # pylint: disable=protected-access + if temp_graph._c_graph: + with errors.raise_exception_on_not_ok_status() as status: + output_names = ([compat.as_bytes(x) for x in self._out_names] + if self._out_names else []) + self._c_func = c_api.TF_GraphToFunction_wrapper( + temp_graph._c_graph, + self._func_name, + None, # opers + [t._as_tf_output() for t in inputs], + [t._as_tf_output() for t in outputs], + output_names, + None, # opts + status) + # pylint: enable=protected-access + def _create_hash_str(self, input_arg, output_arg, node_def): """Creates an 8-character string unique to this input. diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py index 589db9ef4dc..40205ddf053 100644 --- a/tensorflow/python/framework/function_test.py +++ b/tensorflow/python/framework/function_test.py @@ -33,6 +33,7 @@ from tensorflow.python.framework import function from tensorflow.python.framework import graph_to_function_def from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape +from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import clip_ops from tensorflow.python.ops import control_flow_ops @@ -63,7 +64,51 @@ def _OptimizerOptions(): do_constant_folding=cfold))) -class FunctionTest(test.TestCase): +class FunctionTestMethods(object): + """Test methods for verifying Function support. + + These test methods are used as mix-ins in two test cases: with + and without C API support. + """ + + def testIdentity(self): + + @function.Defun(dtypes.float32, func_name="MyIdentity") + def MyIdentityFunc(a): + return a + + with ops.Graph().as_default(): + call = MyIdentityFunc([18.0]) + self.assertEqual("MyIdentity", call.op.name) + with session.Session() as sess: + self.assertAllEqual([18.0], sess.run(call)) + + def testIdentityOutputName(self): + + @function.Defun( + dtypes.float32, func_name="MyIdentity", out_names=["my_result_name"]) + def MyIdentityFunc(a): + return a + + with ops.Graph().as_default(): + call = MyIdentityFunc([18.0]) + self.assertEqual("MyIdentity", call.op.name) + with session.Session() as sess: + self.assertAllEqual([18.0], sess.run(call)) + + def testTooManyOutputNames(self): + + @function.Defun( + dtypes.float32, func_name="MyIdentity", + out_names=["my_result1", "my_result2"]) + def MyIdentityFunc(a): + return a + + with ops.Graph().as_default(): + with self.assertRaisesRegexp( + ValueError, (r"Length of out_names \(2\) does not match number of " + r"outputs \(1\): my_result1, my_result2")): + MyIdentityFunc([18.0]) def testDefineFunction2Args(self): @@ -77,6 +122,35 @@ class FunctionTest(test.TestCase): with session.Session() as sess: self.assertAllEqual([5.0], sess.run(call)) + def testValueErrorOnFunctionWithNoOutput(self): + # TODO(iga): Remove this restriction and this test + + @function.Defun(dtypes.float32, dtypes.float32) + def APlus2B(a, b): + print(a + b * 2) # Create some ops to have nodes in the body + # Using 'print' to make lint happy + + with ops.Graph().as_default(): + with self.assertRaisesRegexp(ValueError, + "Function can not return None"): + APlus2B([1.0], [2.0]) + + def testDefineFunction2ArgsOutputName(self): + + @function.Defun( + dtypes.float32, + dtypes.float32, + func_name="APlus2B", + out_names=["my_result_name"]) + def APlus2B(a, b): + return a + b * 2 + + with ops.Graph().as_default(): + call = APlus2B([1.0], [2.0]) + self.assertEqual("APlus2B", call.op.name) + with session.Session() as sess: + self.assertAllEqual([5.0], sess.run(call)) + def testDefineFunctionDuplicateOutputs(self): @function.Defun(dtypes.float32, func_name="Duplicate") @@ -137,6 +211,7 @@ class FunctionTest(test.TestCase): out, = sess.run(dx, feed) self.assertAllClose(1 - np.square(np.tanh(inp)), out) + @test_util.disable_c_api # Function gradients don't work with C API def testCustomGradient(self): dtype = dtypes.float32 @@ -169,6 +244,7 @@ class FunctionTest(test.TestCase): out, = sess.run(dlogits, {logits: x, labels: y}) self.assertAllClose(out, np.exp(prob - y)) + @test_util.disable_c_api # Function gradients don't work with C API def testCustomGradientError(self): dtype = dtypes.float32 @@ -194,6 +270,7 @@ class FunctionTest(test.TestCase): "SymGrad expects to return 1.*but get 2.*instead"): _ = sess.run(dinp, {inp: x}) + @test_util.disable_c_api # Function gradients don't work with C API def testSymGradShape(self): g = ops.Graph() with g.as_default(): @@ -209,6 +286,7 @@ class FunctionTest(test.TestCase): self.assertEqual(x.get_shape(), dx.get_shape()) self.assertEqual(y.get_shape(), dy.get_shape()) + @test_util.disable_c_api # Function gradients don't work with C API def testSymGradAttr(self): @function.Defun(noinline=True) @@ -312,6 +390,7 @@ class FunctionTest(test.TestCase): "assertion failed.*-3"): self.assertAllEqual(Foo(constant_op.constant(-3.0)).eval(), 6.0) + @test_util.disable_c_api # Op._add_control_inputs doesn't work with C API def testAssertWrapper(self): @function.Defun(dtypes.float32) @@ -326,6 +405,7 @@ class FunctionTest(test.TestCase): "assertion"): _ = MyFn(100.0).eval() + @test_util.disable_c_api # Op._add_control_inputs doesn't work with C API def testWhileLoopCallsFunc(self): with self.test_session(use_gpu=True) as sess: @@ -345,6 +425,7 @@ class FunctionTest(test.TestCase): ans = sess.run(loop) self.assertAllClose(ans, 131072.) + @test_util.disable_c_api # Op._add_control_inputs doesn't work with C API def testControlFlowStrictness(self): """Inlined functions must not execute in a untaken control flow branch.""" @@ -607,6 +688,7 @@ class FunctionTest(test.TestCase): self.assertAllClose(vals[0], vals[1]) self.assertAllClose(vals[2], vals[3]) + @test_util.disable_c_api # Function Declaration doesn't work with C API def testDeclare(self): foo = function.Declare("Foo", [("x", dtypes.float32)], [("y", dtypes.float32)]) @@ -626,6 +708,7 @@ class FunctionTest(test.TestCase): expected = rand * rand + 1.0 self.assertAllClose(expected, y.eval(feed_dict={x: rand})) + @test_util.disable_c_api # Function Declaration doesn't work with C API def testDeclareUsedInDefun(self): foo = function.Declare("Foo", [("x", dtypes.float32)], [("y", dtypes.float32)]) @@ -649,6 +732,7 @@ class FunctionTest(test.TestCase): expected = rand * rand + 1.0 self.assertAllClose(expected, y.eval(feed_dict={x: rand})) + @test_util.disable_c_api # Function Declaration doesn't work with C API def testDeclareTypeMistake(self): foo = function.Declare("Foo", [("x", dtypes.float32)], [("y", dtypes.float32)]) @@ -861,6 +945,32 @@ class FunctionTest(test.TestCase): self.assertEqual(len(f.signature.input_arg), 3) +class FunctionTest(FunctionTestMethods, test.TestCase): + """Test case that invokes test methods with _USE_C_API=False.""" + + def setUp(self): + self.prev_use_c_api = ops._USE_C_API + ops._USE_C_API = False + super(FunctionTest, self).setUp() + + def tearDown(self): + ops._USE_C_API = self.prev_use_c_api + super(FunctionTest, self).tearDown() + + +class FunctionWithCApiTest(FunctionTestMethods, test.TestCase): + """Test case that invokes test methods with _USE_C_API=True.""" + + def setUp(self): + self.prev_use_c_api = ops._USE_C_API + ops._USE_C_API = True + super(FunctionWithCApiTest, self).setUp() + + def tearDown(self): + ops._USE_C_API = self.prev_use_c_api + super(FunctionWithCApiTest, self).tearDown() + + class FunctionsFromProtos(test.TestCase): def expectFunctionsEqual(self, func, grad_func=None, new_func=None): diff --git a/tensorflow/python/framework/op_def_library.py b/tensorflow/python/framework/op_def_library.py index aa373600669..76424ef579b 100644 --- a/tensorflow/python/framework/op_def_library.py +++ b/tensorflow/python/framework/op_def_library.py @@ -784,6 +784,7 @@ class OpDefLibrary(object): if arg.is_ref] with _MaybeColocateWith(must_colocate_inputs): # Add Op to graph + inputs = [ag_core.getval(x) for x in inputs] op = g.create_op(op_type_name, inputs, output_types, name=scope, input_types=input_types, attrs=attr_protos, op_def=op_def) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 5a0c323ce47..b197e96886e 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -49,6 +49,7 @@ from tensorflow.python.framework import versions from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import compat from tensorflow.python.util import decorator_utils +from tensorflow.python.util import nest from tensorflow.python.util import tf_contextlib # Temporary global switch determining if we should enable the work-in-progress @@ -604,6 +605,13 @@ def _maybe_modify_numpy_dtype_determination(np_array): return np_array +def _has_string(value): + if isinstance(value, compat.bytes_or_text_types): return True + if isinstance(value, collections.Sequence) and value: + return _has_string(value[0]) + return False + + # TODO(agarwal): rename to TensorHandle. class EagerTensor(Tensor): """A TensorFlow Eager Tensor.""" @@ -625,6 +633,8 @@ class EagerTensor(Tensor): # https://www.tensorflow.org/code/tensorflow/python/framework/constant_op.py self._id = uid() if not isinstance(value, np.ndarray): + if dtype is None and _has_string(value): + dtype = dtypes.string npt = None if dtype is None else dtype.as_numpy_dtype try: value = np.array(value, dtype=npt) @@ -712,12 +722,12 @@ class EagerTensor(Tensor): return numpy_text def __str__(self): - return "tfe.Tensor(shape=%s, dtype=%s, numpy=%s)" % (self.shape, - self.dtype.name, - self._numpy_text()) + return "tf.Tensor(%s, shape=%s, dtype=%s)" % (self._numpy_text(), + self.shape, + self.dtype.name) def __repr__(self): - return "" % ( + return "" % ( self._id, self.shape, self.dtype.name, self._numpy_text(is_repr=True)) @staticmethod @@ -1027,12 +1037,19 @@ def internal_convert_to_tensor(value, # tracing gradients, to ensure the same behavior happens with and without # tracing. unwrapped = ag_core.getval(value) - # Fast path for EagerTensors that don't need any conversion. - if isinstance(unwrapped, EagerTensor) and context.in_eager_mode(): - # Note that we don't check that value's dtype matches the dtype - # argument. We exepct that the C runtime will do that checking - # when we execute the kernel. - return value + + if context.in_eager_mode(): + # Fast path for EagerTensors that don't need any conversion. + if isinstance(unwrapped, EagerTensor): + # Note that we don't check that value's dtype matches the dtype + # argument. We exepct that the C runtime will do that checking + # when we execute the kernel. + return value + values = nest.flatten(value) + if (len(values) > 1 and + any(isinstance(ag_core.getval(v), EagerTensor) for v in values)): + raise TypeError("Cannot convert to a eager tensor.") + if dtype is not None: dtype = dtypes.as_dtype(dtype) unwrapped_type = type(unwrapped) @@ -2939,6 +2956,14 @@ class Graph(object): if self._graph_def_versions.min_consumer < 12: self._graph_def_versions.min_consumer = 12 self._functions[name] = function + if self._c_graph: + # pylint: disable=protected-access + assert function._c_func, ( + "Cannot add function created without C API support to graph " + "created with C API support") + with errors.raise_exception_on_not_ok_status() as status: + c_api.TF_GraphAddFunction(self._c_graph, function._c_func, status) + # pylint: enable=protected-access @property def building_function(self): diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index c65816a5436..73b7f821c82 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -65,7 +65,7 @@ def gpu_device_name(): """Returns the name of a GPU device if available or the empty string.""" for x in device_lib.list_local_devices(): if x.device_type == "GPU" or x.device_type == "SYCL": - return x.name + return compat.as_str(x.name) return "" diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 4fa1e1fee80..e432998c21d 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -518,7 +518,7 @@ tf_py_test( tf_py_test( name = "matrix_solve_ls_op_test", - size = "small", + size = "medium", srcs = ["matrix_solve_ls_op_test.py"], additional_deps = [ "//third_party/py/numpy", @@ -1708,6 +1708,26 @@ cuda_py_test( tags = ["no_windows_gpu"], ) +cuda_py_test( + name = "reduction_ops_test_big", + size = "medium", + srcs = ["reduction_ops_test_big.py"], + additional_deps = [ + "//third_party/py/numpy", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:math_ops", + ], + tags = [ + "manual", + "no_gpu", + "nogpu", + "noguitar", + "notap", + ], +) + cuda_py_test( name = "relu_op_test", size = "small", @@ -2154,7 +2174,7 @@ cuda_py_test( "//tensorflow/python:nn_grad", "//tensorflow/python:nn_ops", ], - tags = ["noasan"], # times out b/63680444 + shard_count = 2, ) cuda_py_test( diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py index 392639fa179..77c5bb6d400 100644 --- a/tensorflow/python/kernel_tests/array_ops_test.py +++ b/tensorflow/python/kernel_tests/array_ops_test.py @@ -981,15 +981,15 @@ class SequenceMaskTest(test_util.TensorFlowTestCase): class ConcatSliceResourceTest(test_util.TensorFlowTestCase): + @test_util.run_in_graph_and_eager_modes() def testConcatSlice(self): - with self.test_session(): - r1 = test_ops.stub_resource_handle_op(container="a", shared_name="b") - r2 = test_ops.stub_resource_handle_op(container="a", shared_name="c") - c = array_ops.stack([r1, r2]) - s = array_ops.strided_slice(c, [1], [2]) - test_ops.resource_create_op(s).run() - with self.assertRaises(errors.AlreadyExistsError): - test_ops.resource_create_op(r2).run() + r1 = test_ops.stub_resource_handle_op(container="a", shared_name="b") + r2 = test_ops.stub_resource_handle_op(container="a", shared_name="c") + c = array_ops.stack([r1, r2]) + s = array_ops.strided_slice(c, [1], [2]) + self.evaluate(test_ops.resource_create_op(s)) + with self.assertRaises(errors.AlreadyExistsError): + self.evaluate(test_ops.resource_create_op(r2)) class IdentityTest(test_util.TensorFlowTestCase): diff --git a/tensorflow/python/kernel_tests/cholesky_op_test.py b/tensorflow/python/kernel_tests/cholesky_op_test.py index eb06e067a7f..de80fb30554 100644 --- a/tensorflow/python/kernel_tests/cholesky_op_test.py +++ b/tensorflow/python/kernel_tests/cholesky_op_test.py @@ -183,14 +183,11 @@ class CholeskyGradTest(test.TestCase): self.runFiniteDifferences( shapes, dtypes=(dtypes_lib.float32, dtypes_lib.float64)) - # TODO(eriche): investigate why this test fails only in opensource - # ubuntu gpu python3 - - # def testSmallMatricesComplex(self): - # np.random.seed(0) - # shapes = self.getShapes([1, 2, 10]) - # self.runFiniteDifferences( - # shapes, dtypes=(dtypes_lib.complex64, dtypes_lib.complex128)) + def testSmallMatricesComplex(self): + np.random.seed(0) + shapes = self.getShapes([1, 2, 10]) + self.runFiniteDifferences( + shapes, dtypes=(dtypes_lib.complex64, dtypes_lib.complex128)) def testOneBlockMatrices(self): np.random.seed(0) diff --git a/tensorflow/python/kernel_tests/constant_op_eager_test.py b/tensorflow/python/kernel_tests/constant_op_eager_test.py index 0e98afbe6e4..0b4fa60d81b 100644 --- a/tensorflow/python/kernel_tests/constant_op_eager_test.py +++ b/tensorflow/python/kernel_tests/constant_op_eager_test.py @@ -26,27 +26,33 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes as dtypes_lib from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops +from tensorflow.python.util import compat -# TODO(josh11b): add tests with string types, lists/tuples, Shape. +# TODO(josh11b): add tests with lists/tuples, Shape. class ConstantTest(test.TestCase): def _testCpu(self, x): np_ans = np.array(x) - tf_ans = ops.convert_to_tensor(x).numpy() + with context.device("/device:CPU:0"): + tf_ans = ops.convert_to_tensor(x).numpy() if np_ans.dtype in [np.float32, np.float64, np.complex64, np.complex128]: self.assertAllClose(np_ans, tf_ans) else: self.assertAllEqual(np_ans, tf_ans) def _testGpu(self, x): - np_ans = np.array(x) - tf_ans = ops.convert_to_tensor(x).numpy() - if np_ans.dtype in [np.float32, np.float64, np.complex64, np.complex128]: - self.assertAllClose(np_ans, tf_ans) - else: - self.assertAllEqual(np_ans, tf_ans) + device = test_util.gpu_device_name() + if device: + np_ans = np.array(x) + with context.device(device): + tf_ans = ops.convert_to_tensor(x).numpy() + if np_ans.dtype in [np.float32, np.float64, np.complex64, np.complex128]: + self.assertAllClose(np_ans, tf_ans) + else: + self.assertAllEqual(np_ans, tf_ans) def _testAll(self, x): self._testCpu(x) @@ -78,11 +84,11 @@ class ConstantTest(test.TestCase): def testComplex64(self): self._testAll( - np.complex(1, 2) * np.arange(-15, 15).reshape([2, 3, 5 - ]).astype(np.complex64)) + np.complex(1, 2) * + np.arange(-15, 15).reshape([2, 3, 5]).astype(np.complex64)) self._testAll( - np.complex(1, 2) * np.random.normal(size=30).reshape( - [2, 3, 5]).astype(np.complex64)) + np.complex(1, 2) * + np.random.normal(size=30).reshape([2, 3, 5]).astype(np.complex64)) self._testAll(np.empty((2, 0, 5)).astype(np.complex64)) def testComplex128(self): @@ -94,6 +100,26 @@ class ConstantTest(test.TestCase): [2, 3, 5]).astype(np.complex128)) self._testAll(np.empty((2, 0, 5)).astype(np.complex128)) + def testString(self): + val = [compat.as_bytes(str(x)) for x in np.arange(-15, 15)] + self._testCpu(np.array(val).reshape([2, 3, 5])) + self._testCpu(np.empty((2, 0, 5)).astype(np.str_)) + + def testStringWithNulls(self): + val = ops.convert_to_tensor(b"\0\0\0\0").numpy() + self.assertEqual(len(val), 4) + self.assertEqual(val, b"\0\0\0\0") + + val = ops.convert_to_tensor(b"xx\0xx").numpy() + self.assertEqual(len(val), 5) + self.assertAllEqual(val, b"xx\0xx") + + nested = [[b"\0\0\0\0", b"xx\0xx"], [b"\0_\0_\0_\0", b"\0"]] + val = ops.convert_to_tensor(nested).numpy() + # NOTE(mrry): Do not use assertAllEqual, because it converts nested to a + # numpy array, which loses the null terminators. + self.assertEqual(val.tolist(), nested) + def testExplicitShapeNumPy(self): c = constant_op.constant( np.arange(-15, 15).reshape([2, 3, 5]).astype(np.float32), diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py index df413939c76..6167cb9999b 100644 --- a/tensorflow/python/kernel_tests/constant_op_test.py +++ b/tensorflow/python/kernel_tests/constant_op_test.py @@ -32,6 +32,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradient_checker +from tensorflow.python.ops import logging_ops from tensorflow.python.ops import math_ops from tensorflow.python.platform import test from tensorflow.python.util import compat @@ -119,11 +120,11 @@ class ConstantTest(test.TestCase): variant_val=[ tensor_pb2.VariantTensorDataProto( # Match registration in variant_op_registry.cc - type_name=b"int32", + type_name=b"int", metadata=np.array(1, dtype=np.int32).tobytes()) ]) - const_op = constant_op.constant(variant_tensor).op - const_value = const_op.get_attr("value") + const = constant_op.constant(variant_tensor) + const_value = const.op.get_attr("value") # Ensure we stored the tensor proto properly. self.assertProtoEquals(variant_tensor, const_value) @@ -134,7 +135,10 @@ class ConstantTest(test.TestCase): # native numpy types cannot be passed to ops.convert_to_tensor. # TODO(ebrevdo): Add registration mechanism for # ops.convert_to_tensor and for session.run output. - const_op.run() + logging_const_op = logging_ops.Print( + const, [const], + message="Variant storing an int, decoded const value:").op + logging_const_op.run() def testStringWithNulls(self): with self.test_session(): @@ -469,6 +473,35 @@ class ZerosLikeTest(test.TestCase): self.assertEqual(y.shape, shape) self.assertAllEqual(y, np.zeros(shape, dtype=out_type)) + def testZerosLikeVariant(self): + # TODO(ebrevdo): Re-enable use_gpu=True once non-DMA Variant + # copying between CPU and GPU is supported AND we register a + # ZerosLike callback for GPU for Variant storing primitive types + # in variant_op_registry.cc. + with self.test_session(use_gpu=False): + variant_tensor = tensor_pb2.TensorProto( + dtype=dtypes_lib.variant.as_datatype_enum, + tensor_shape=tensor_shape.TensorShape([]).as_proto(), + variant_val=[ + tensor_pb2.VariantTensorDataProto( + # Match registration in variant_op_registry.cc + type_name=b"int", + metadata=np.array(1, dtype=np.int32).tobytes()) + ]) + const_variant = constant_op.constant(variant_tensor) + zeros_like = array_ops.zeros_like(const_variant) + zeros_like_op = logging_ops.Print( + zeros_like, [const_variant, zeros_like], + message="Variant storing an int, input and output of zeros_like:").op + + # Smoke test -- ensure this executes without trouble. + # Right now, non-numpy-compatible objects cannot be returned from a + # session.run call; similarly, objects that can't be converted to + # native numpy types cannot be passed to ops.convert_to_tensor. + # TODO(ebrevdo): Add registration mechanism for + # ops.convert_to_tensor and for session.run output. + zeros_like_op.run() + class OnesTest(test.TestCase): diff --git a/tensorflow/python/kernel_tests/fifo_queue_test.py b/tensorflow/python/kernel_tests/fifo_queue_test.py index 85e7b635d80..748135440ec 100644 --- a/tensorflow/python/kernel_tests/fifo_queue_test.py +++ b/tensorflow/python/kernel_tests/fifo_queue_test.py @@ -1078,6 +1078,9 @@ class FIFOQueueTest(test.TestCase): self.assertEqual([50.0], dequeued_t.eval()) self.assertEqual([60.0], dequeued_t.eval()) + # Make sure the thread finishes before exiting. + thread.join() + def testBlockingEnqueueBeforeClose(self): with self.test_session() as sess: q = data_flow_ops.FIFOQueue(4, dtypes_lib.float32) diff --git a/tensorflow/python/kernel_tests/padding_fifo_queue_test.py b/tensorflow/python/kernel_tests/padding_fifo_queue_test.py index 53b1897f488..d8c3f9823c3 100644 --- a/tensorflow/python/kernel_tests/padding_fifo_queue_test.py +++ b/tensorflow/python/kernel_tests/padding_fifo_queue_test.py @@ -1191,6 +1191,9 @@ class PaddingFIFOQueueTest(test.TestCase): self.assertEqual([50.0], dequeued_t.eval()) self.assertEqual([60.0], dequeued_t.eval()) + # Make sure the thread finishes before exiting. + thread.join() + def testBlockingEnqueueBeforeClose(self): with self.test_session() as sess: q = data_flow_ops.PaddingFIFOQueue(4, dtypes_lib.float32, ((),)) diff --git a/tensorflow/python/kernel_tests/pooling_ops_3d_test.py b/tensorflow/python/kernel_tests/pooling_ops_3d_test.py index fa1553a3f6b..b01fc129538 100644 --- a/tensorflow/python/kernel_tests/pooling_ops_3d_test.py +++ b/tensorflow/python/kernel_tests/pooling_ops_3d_test.py @@ -321,6 +321,15 @@ class PoolingTest(test.TestCase): strides=(1, 1, 1), padding="VALID") + def testMaxPoolGradValidPadding1_2_3d(self): + self._ConstructAndTestGradient( + nn_ops.max_pool3d, + input_sizes=[1, 3, 3, 3, 1], + output_sizes=[1, 2, 2, 2, 1], + window=(1, 1, 1), + strides=(2, 2, 2), + padding="VALID") + def testMaxPoolGradValidPadding2_2_3d(self): self._ConstructAndTestGradient( nn_ops.max_pool3d, @@ -339,6 +348,15 @@ class PoolingTest(test.TestCase): strides=(1, 1, 1), padding="SAME") + def testMaxPoolGradSamePadding1_2_3d(self): + self._ConstructAndTestGradient( + nn_ops.max_pool3d, + input_sizes=[1, 3, 2, 4, 1], + output_sizes=[1, 2, 1, 2, 1], + window=(1, 1, 1), + strides=(2, 2, 2), + padding="SAME") + def testMaxPoolGradSamePadding2_1_3d(self): self._ConstructAndTestGradient( nn_ops.max_pool3d, @@ -375,6 +393,15 @@ class PoolingTest(test.TestCase): strides=(1, 1, 1), padding="VALID") + def testAvgPoolGradValidPadding1_2_3d(self): + self._ConstructAndTestGradient( + nn_ops.avg_pool3d, + input_sizes=[1, 3, 3, 3, 1], + output_sizes=[1, 2, 2, 2, 1], + window=(1, 1, 1), + strides=(2, 2, 2), + padding="VALID") + def testAvgPoolGradValidPadding2_1_3d(self): self._ConstructAndTestGradient( nn_ops.avg_pool3d, @@ -402,6 +429,15 @@ class PoolingTest(test.TestCase): strides=(1, 1, 1), padding="SAME") + def testAvgPoolGradSamePadding1_2_3d(self): + self._ConstructAndTestGradient( + nn_ops.avg_pool3d, + input_sizes=[1, 3, 2, 4, 2], + output_sizes=[1, 2, 1, 2, 2], + window=(1, 1, 1), + strides=(2, 2, 2), + padding="SAME") + def testAvgPoolGradSamePadding2_1_3d(self): self._ConstructAndTestGradient( nn_ops.avg_pool3d, diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py index da14871c872..9eb1fea8037 100644 --- a/tensorflow/python/kernel_tests/pooling_ops_test.py +++ b/tensorflow/python/kernel_tests/pooling_ops_test.py @@ -998,6 +998,20 @@ class PoolingTest(test.TestCase): data_format=data_format, use_gpu=use_gpu) + def _testMaxPoolGradValidPadding1_2(self, data_format, use_gpu): + for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + self._ConstructAndTestGradient( + pool_func, + input_sizes=[1, 3, 3, 1], + output_sizes=[1, 2, 2, 1], + window_rows=1, + window_cols=1, + row_stride=2, + col_stride=2, + padding="VALID", + data_format=data_format, + use_gpu=use_gpu) + def _testMaxPoolGradValidPadding2_2(self, data_format, use_gpu): for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestGradient( @@ -1026,6 +1040,20 @@ class PoolingTest(test.TestCase): data_format=data_format, use_gpu=use_gpu) + def _testMaxPoolGradSamePadding1_2(self, data_format, use_gpu): + for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + self._ConstructAndTestGradient( + pool_func, + input_sizes=[2, 2, 4, 3], + output_sizes=[2, 1, 2, 3], + window_rows=1, + window_cols=1, + row_stride=2, + col_stride=2, + padding="SAME", + data_format=data_format, + use_gpu=use_gpu) + def _testMaxPoolGradSamePadding2_1(self, data_format, use_gpu): for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestGradient( @@ -1071,10 +1099,12 @@ class PoolingTest(test.TestCase): def testMaxPoolGrad(self): for (data_format, use_gpu) in GetTestConfigs(): self._testMaxPoolGradValidPadding1_1(data_format, use_gpu) + self._testMaxPoolGradValidPadding1_2(data_format, use_gpu) self._testMaxPoolGradValidPadding2_1_6(data_format, use_gpu) self._testMaxPoolGradValidPadding2_1_7(data_format, use_gpu) self._testMaxPoolGradValidPadding2_2(data_format, use_gpu) self._testMaxPoolGradSamePadding1_1(data_format, use_gpu) + self._testMaxPoolGradSamePadding1_2(data_format, use_gpu) self._testMaxPoolGradSamePadding2_1(data_format, use_gpu) self._testMaxPoolGradSamePadding2_2(data_format, use_gpu) self._testMaxPoolGradSamePadding3_1(data_format, use_gpu) @@ -1497,9 +1527,11 @@ class PoolingTest(test.TestCase): def testAvgPoolGrad(self): for (data_format, use_gpu) in GetTestConfigs(): self._testAvgPoolGradValidPadding1_1(data_format, use_gpu) + self._testAvgPoolGradValidPadding1_2(data_format, use_gpu) self._testAvgPoolGradValidPadding2_1(data_format, use_gpu) self._testAvgPoolGradValidPadding2_2(data_format, use_gpu) self._testAvgPoolGradSamePadding1_1(data_format, use_gpu) + self._testAvgPoolGradSamePadding1_2(data_format, use_gpu) self._testAvgPoolGradSamePadding2_1(data_format, use_gpu) self._testAvgPoolGradSamePadding2_2(data_format, use_gpu) self._testAvgPoolGradSamePadding3_1(data_format, use_gpu) @@ -1517,6 +1549,19 @@ class PoolingTest(test.TestCase): data_format=data_format, use_gpu=use_gpu) + def _testAvgPoolGradValidPadding1_2(self, data_format, use_gpu): + self._ConstructAndTestGradient( + nn_ops.avg_pool, + input_sizes=[2, 3, 3, 3], + output_sizes=[2, 2, 2, 3], + window_rows=1, + window_cols=1, + row_stride=2, + col_stride=2, + padding="VALID", + data_format=data_format, + use_gpu=use_gpu) + def _testAvgPoolGradValidPadding2_1(self, data_format, use_gpu): self._ConstructAndTestGradient( nn_ops.avg_pool, @@ -1556,6 +1601,19 @@ class PoolingTest(test.TestCase): data_format=data_format, use_gpu=use_gpu) + def _testAvgPoolGradSamePadding1_2(self, data_format, use_gpu): + self._ConstructAndTestGradient( + nn_ops.avg_pool, + input_sizes=[2, 2, 4, 3], + output_sizes=[2, 1, 2, 3], + window_rows=1, + window_cols=1, + row_stride=2, + col_stride=2, + padding="SAME", + data_format=data_format, + use_gpu=use_gpu) + def _testAvgPoolGradSamePadding2_1(self, data_format, use_gpu): self._ConstructAndTestGradient( nn_ops.avg_pool, diff --git a/tensorflow/python/kernel_tests/reduction_ops_test.py b/tensorflow/python/kernel_tests/reduction_ops_test.py index 04ce99a4a63..8d6b7925e45 100644 --- a/tensorflow/python/kernel_tests/reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/reduction_ops_test.py @@ -175,6 +175,24 @@ class SumReductionTest(BaseReductionTest): np_arr = self._makeIncremental((2,) * rank, dtypes.int32) self._compareAllAxes(np_arr) + def testFloat16(self): + for rank in range(1, _MAX_RANK + 1): + np_arr = self._makeIncremental((2,) * rank, dtypes.float16) + self._compareAllAxes(np_arr) + + # test that mean doesn't overflow + # only on GPU, since it has the more accurate implementation + if not test.is_gpu_available(): + return + + arr = np.ones([68000], dtype=np.float16) + + with self.test_session(graph=ops.Graph(), use_gpu=True) as sess: + tf_arr = array_ops.constant(arr) + tf_mean = math_ops.reduce_mean(tf_arr, 0, False) + tf_out_mean = sess.run(tf_mean) + self.assertAllClose(tf_out_mean, 1.) + def testFloat32(self): for rank in range(1, _MAX_RANK + 1): np_arr = self._makeIncremental((2,) * rank, dtypes.float32) @@ -523,7 +541,7 @@ class MinReductionTest(test.TestCase): def testFloatReduce3D(self): # Create a 3D array of floats and reduce across all possible # dimensions - np_arr = np.arange(0, 30).reshape([2, 3, 5]).astype(np.float32) + np_arr = np.arange(1, 31).reshape([2, 3, 5]).astype(np.float32) self._compareAll(np_arr, None) self._compareAll(np_arr, []) self._compareAll(np_arr, [0]) @@ -537,7 +555,7 @@ class MinReductionTest(test.TestCase): def testDoubleReduce3D(self): # Create a 3D array of doubles and reduce across all possible # dimensions - np_arr = np.arange(0, 30).reshape([2, 3, 5]).astype(np.float64) + np_arr = np.arange(1, 31).reshape([2, 3, 5]).astype(np.float64) self._compareAll(np_arr, None) self._compareAll(np_arr, []) self._compareAll(np_arr, [0]) @@ -629,7 +647,7 @@ class MaxReductionTest(test.TestCase): def testFloatReduce3D(self): # Create a 3D array of floats and reduce across all possible # dimensions - np_arr = np.arange(0, 30).reshape([2, 3, 5]).astype(np.float32) + np_arr = np.arange(-31, -1).reshape([2, 3, 5]).astype(np.float32) self._compareAll(np_arr, None) self._compareAll(np_arr, []) self._compareAll(np_arr, [0]) @@ -643,7 +661,7 @@ class MaxReductionTest(test.TestCase): def testDoubleReduce3D(self): # Create a 3D array of doubles and reduce across all possible # dimensions - np_arr = np.arange(0, 30).reshape([2, 3, 5]).astype(np.float64) + np_arr = np.arange(-31, -1).reshape([2, 3, 5]).astype(np.float64) self._compareAll(np_arr, None) self._compareAll(np_arr, []) self._compareAll(np_arr, [0]) @@ -656,7 +674,7 @@ class MaxReductionTest(test.TestCase): def testGradient(self): s = [2, 3, 4, 2] - x = np.arange(1.0, 49.0).reshape(s).astype(np.float64) + x = np.arange(-49.0, -1.0).reshape(s).astype(np.float64) with self.test_session(): t = ops.convert_to_tensor(x) su = math_ops.reduce_max(t, [1, 2]) @@ -666,7 +684,7 @@ class MaxReductionTest(test.TestCase): def testGradient2(self): s = [2, 3, 4, 2] - x = np.arange(1.0, 49.0).reshape(s).astype(np.float64) + x = np.arange(-49.0, -1.0).reshape(s).astype(np.float64) with self.test_session(): t = ops.convert_to_tensor(x) su = math_ops.reduce_max(t, [1]) @@ -676,7 +694,7 @@ class MaxReductionTest(test.TestCase): def testGradient3(self): s = [2, 3, 4, 2] - x = np.arange(1.0, 49.0).reshape(s).astype(np.float64) + x = np.arange(-49.0, -1.0).reshape(s).astype(np.float64) with self.test_session(): t = ops.convert_to_tensor(x) su = math_ops.reduce_max(t, [2]) @@ -686,7 +704,7 @@ class MaxReductionTest(test.TestCase): def testGradient4(self): s = [2, 3, 4, 2] - x = np.arange(1.0, 49.0).reshape(s).astype(np.float64) + x = np.arange(-49.0, -1.0).reshape(s).astype(np.float64) with self.test_session(): t = ops.convert_to_tensor(x) su = math_ops.reduce_max(t) diff --git a/tensorflow/python/kernel_tests/reduction_ops_test_big.py b/tensorflow/python/kernel_tests/reduction_ops_test_big.py new file mode 100644 index 00000000000..0959adb026e --- /dev/null +++ b/tensorflow/python/kernel_tests/reduction_ops_test_big.py @@ -0,0 +1,179 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Functional tests for reduction ops.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.framework import ops +from tensorflow.python.ops import math_ops +from tensorflow.python.platform import test + + +class BaseReductionTest(test.TestCase): + + def _tf_reduce(self, x, reduction_axes, keep_dims): + raise NotImplementedError() + + +class BigReductionTest(BaseReductionTest): + """Test reductions for sum and boolean all over a wide range of shapes.""" + + def _tf_reduce_max(self, x, reduction_axes, keep_dims): + return math_ops.reduce_max(x, reduction_axes, keep_dims) + + def _tf_reduce_all(self, x, reduction_axes, keep_dims): + return math_ops.reduce_all(x, reduction_axes, keep_dims) + + def _tf_reduce_mean(self, x, reduction_axes, keep_dims): + return math_ops.reduce_mean(x, reduction_axes, keep_dims) + + def _tf_reduce_sum(self, x, reduction_axes, keep_dims): + return math_ops.reduce_sum(x, reduction_axes, keep_dims) + + def testFloat32Sum(self): + # make sure we test all possible kernel invocations + # logic is the same for all ops, test just float32 for brevity + arr_ = np.ones([4097, 4097], dtype=np.float32) + for size_x in [ + 1, 2, 3, 4, 16, 17, 32, 33, 64, 65, 128, 131, 256, 263, 1024, 1025, + 4096, 4097 + ]: + for size_y in [ + 1, 2, 3, 4, 16, 17, 32, 33, 64, 65, 128, 131, 256, 263, 1024, 1025, + 4096, 4097 + ]: + arr = arr_[0:size_x, 0:size_y] + col_sum = np.ones([size_y], dtype=np.float32) * size_x + row_sum = np.ones([size_x], dtype=np.float32) * size_y + full_sum = np.ones([], dtype=np.float32) * size_x * size_y + + with self.test_session(graph=ops.Graph(), use_gpu=True) as sess: + tf_row_sum = self._tf_reduce_sum(arr, 1, False) + tf_col_sum = self._tf_reduce_sum(arr, 0, False) + tf_full_sum = self._tf_reduce_sum(arr, [0, 1], False) + tf_out_row, tf_out_col, tf_out_full = sess.run( + [tf_row_sum, tf_col_sum, tf_full_sum]) + self.assertAllClose(col_sum, tf_out_col) + self.assertAllClose(row_sum, tf_out_row) + self.assertAllClose(full_sum, tf_out_full) + + arr_ = np.ones([130, 130, 130], dtype=np.float32) + for size_x in range(1, 130, 13): + for size_y in range(1, 130, 13): + for size_z in range(1, 130, 13): + arr = arr_[0:size_x, 0:size_y, 0:size_z] + sum_y = np.ones([size_x, size_z], dtype=np.float32) + sum_xz = np.ones([size_y], dtype=np.float32) + + with self.test_session(graph=ops.Graph(), use_gpu=True) as sess: + tf_sum_xz = self._tf_reduce_mean(arr, [0, 2], False) + tf_sum_y = self._tf_reduce_mean(arr, 1, False) + tf_out_sum_xz, tf_out_sum_y = sess.run([tf_sum_xz, tf_sum_y]) + self.assertAllClose(sum_y, tf_out_sum_y) + self.assertAllClose(sum_xz, tf_out_sum_xz) + + def testFloat32Max(self): + # make sure we test all possible kernel invocations + # logic is the same for all ops, test just float32 for brevity + arr_ = np.random.uniform( + low=-3, high=-1, size=[4105, 4105]).astype(np.float32) + for size_x in [ + 1, 2, 3, 4, 16, 17, 32, 33, 64, 65, 128, 131, 256, 263, 1024, 1025, + 4096, 4097 + ]: + for size_y in [ + 1, 2, 3, 4, 16, 17, 32, 33, 64, 65, 128, 131, 256, 263, 1024, 1025, + 4096, 4097 + ]: + arr = arr_[0:size_x, 0:size_y] + col_max = np.max(arr, axis=0) + row_max = np.max(arr, axis=1) + full_max = np.max(col_max) + + with self.test_session(graph=ops.Graph(), use_gpu=True) as sess: + tf_row_max = self._tf_reduce_max(arr, 1, False) + tf_col_max = self._tf_reduce_max(arr, 0, False) + tf_full_max = self._tf_reduce_max(arr, [0, 1], False) + tf_out_row, tf_out_col, tf_out_full = sess.run( + [tf_row_max, tf_col_max, tf_full_max]) + self.assertAllClose(col_max, tf_out_col) + self.assertAllClose(row_max, tf_out_row) + self.assertAllClose(full_max, tf_out_full) + + arr_ = np.random.uniform( + low=-3, high=-1, size=[130, 130, 130]).astype(np.float32) + for size_x in range(1, 130, 13): + for size_y in range(1, 130, 13): + for size_z in range(1, 130, 13): + arr = arr_[0:size_x, 0:size_y, 0:size_z] + sum_y = np.max(arr, axis=1) + sum_xz = np.max(arr, axis=(0, 2)) + + with self.test_session(graph=ops.Graph(), use_gpu=True) as sess: + tf_sum_xz = self._tf_reduce_max(arr, [0, 2], False) + tf_sum_y = self._tf_reduce_max(arr, 1, False) + tf_out_sum_xz, tf_out_sum_y = sess.run([tf_sum_xz, tf_sum_y]) + self.assertAllClose(sum_y, tf_out_sum_y) + self.assertAllClose(sum_xz, tf_out_sum_xz) + + def testBooleanAll(self): + # make sure we test all possible kernel invocations + # test operation where T(0) is not the identity + arr_ = np.ones([4097, 4097], dtype=np.bool) + for size_x in [ + 1, 2, 3, 4, 16, 17, 32, 33, 64, 65, 128, 131, 256, 263, 1024, 1025, + 4096, 4097 + ]: + for size_y in [ + 1, 2, 3, 4, 16, 17, 32, 33, 64, 65, 128, 131, 256, 263, 1024, 1025, + 4096, 4097 + ]: + arr = arr_[0:size_x, 0:size_y] + col_sum = np.ones([size_y], dtype=np.bool) + row_sum = np.ones([size_x], dtype=np.bool) + full_sum = np.ones([1], dtype=np.bool).reshape([]) + + with self.test_session(graph=ops.Graph(), use_gpu=True) as sess: + tf_row_sum = self._tf_reduce_all(arr, 1, False) + tf_col_sum = self._tf_reduce_all(arr, 0, False) + tf_full_sum = self._tf_reduce_all(arr, [0, 1], False) + tf_out_row, tf_out_col, tf_out_full = sess.run( + [tf_row_sum, tf_col_sum, tf_full_sum]) + self.assertAllClose(col_sum, tf_out_col) + self.assertAllClose(row_sum, tf_out_row) + self.assertAllClose(full_sum, tf_out_full) + + arr_ = np.ones([130, 130, 130], dtype=np.bool) + for size_x in range(1, 130, 13): + for size_y in range(1, 130, 13): + for size_z in range(1, 130, 13): + arr = arr_[0:size_x, 0:size_y, 0:size_z] + sum_y = np.ones([size_x, size_z], dtype=np.bool) + sum_xz = np.ones([size_y], dtype=np.bool) + + with self.test_session(graph=ops.Graph(), use_gpu=True) as sess: + tf_sum_xz = self._tf_reduce_all(arr, [0, 2], False) + tf_sum_y = self._tf_reduce_all(arr, 1, False) + tf_out_sum_xz, tf_out_sum_y = sess.run([tf_sum_xz, tf_sum_y]) + self.assertAllClose(sum_y, tf_out_sum_y) + self.assertAllClose(sum_xz, tf_out_sum_xz) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/python/layers/convolutional.py b/tensorflow/python/layers/convolutional.py index 68293aa5fe5..41c67743b6d 100644 --- a/tensorflow/python/layers/convolutional.py +++ b/tensorflow/python/layers/convolutional.py @@ -24,6 +24,7 @@ import six from six.moves import xrange # pylint: disable=redefined-builtin import numpy as np +from tensorflow.python.eager import context from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops @@ -171,7 +172,7 @@ class _Conv(base.Layer): padding=self.padding.upper(), data_format=utils.convert_data_format(self.data_format, self.rank + 2)) - if self.bias is not None: + if self.use_bias: if self.data_format == 'channels_first': if self.rank == 1: # nn.bias_add does not accept a 1D input tensor. @@ -988,7 +989,7 @@ class SeparableConv2D(Conv2D): rate=self.dilation_rate, data_format=utils.convert_data_format(self.data_format, ndim=4)) - if self.bias is not None: + if self.use_bias: outputs = nn.bias_add( outputs, self.bias, @@ -1293,20 +1294,21 @@ class Conv2DTranspose(Conv2D): padding=self.padding.upper(), data_format=utils.convert_data_format(self.data_format, ndim=4)) - # Infer the static output shape: - out_shape = inputs.get_shape().as_list() - out_shape[c_axis] = self.filters - out_shape[h_axis] = utils.deconv_output_length(out_shape[h_axis], - kernel_h, - self.padding, - stride_h) - out_shape[w_axis] = utils.deconv_output_length(out_shape[w_axis], - kernel_w, - self.padding, - stride_w) - outputs.set_shape(out_shape) + if context.in_graph_mode(): + # Infer the static output shape: + out_shape = inputs.get_shape().as_list() + out_shape[c_axis] = self.filters + out_shape[h_axis] = utils.deconv_output_length(out_shape[h_axis], + kernel_h, + self.padding, + stride_h) + out_shape[w_axis] = utils.deconv_output_length(out_shape[w_axis], + kernel_w, + self.padding, + stride_w) + outputs.set_shape(out_shape) - if self.bias: + if self.use_bias: outputs = nn.bias_add( outputs, self.bias, @@ -1591,24 +1593,25 @@ class Conv3DTranspose(Conv3D): data_format=utils.convert_data_format(self.data_format, ndim=5), padding=self.padding.upper()) - # Infer the static output shape: - out_shape = inputs.get_shape().as_list() - out_shape[c_axis] = self.filters - out_shape[d_axis] = utils.deconv_output_length(out_shape[d_axis], - kernel_d, - self.padding, - stride_d) - out_shape[h_axis] = utils.deconv_output_length(out_shape[h_axis], - kernel_h, - self.padding, - stride_h) - out_shape[w_axis] = utils.deconv_output_length(out_shape[w_axis], - kernel_w, - self.padding, - stride_w) - outputs.set_shape(out_shape) + if context.in_graph_mode(): + # Infer the static output shape: + out_shape = inputs.get_shape().as_list() + out_shape[c_axis] = self.filters + out_shape[d_axis] = utils.deconv_output_length(out_shape[d_axis], + kernel_d, + self.padding, + stride_d) + out_shape[h_axis] = utils.deconv_output_length(out_shape[h_axis], + kernel_h, + self.padding, + stride_h) + out_shape[w_axis] = utils.deconv_output_length(out_shape[w_axis], + kernel_w, + self.padding, + stride_w) + outputs.set_shape(out_shape) - if self.bias: + if self.use_bias: outputs_shape = outputs.shape.as_list() if self.data_format == 'channels_first': outputs_4d = array_ops.reshape(outputs, [ diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py index 62f5881f164..1fc2d70f9ca 100644 --- a/tensorflow/python/layers/normalization.py +++ b/tensorflow/python/layers/normalization.py @@ -25,6 +25,7 @@ import six from six.moves import xrange # pylint: disable=redefined-builtin import numpy as np +from tensorflow.python.eager import context from tensorflow.python.framework import dtypes from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import ops @@ -242,15 +243,20 @@ class BatchNormalization(base.Layer): initializer=init_ops.zeros_initializer(), trainable=False) return var + with ops.device(None): - with ops.device(lambda _: self.moving_mean.device): + device = ((lambda _: self.moving_mean.device) + if context.in_graph_mode() else self.moving_mean.device) + with ops.device(device): self.renorm_mean = _renorm_variable('renorm_mean', (param_dim,)) self.renorm_mean_weight = _renorm_variable('renorm_mean_weight', ()) # We initialize renorm_stddev to 0, and maintain the (0-initialized) # renorm_stddev_weight. This allows us to (1) mix the average # stddev with the minibatch stddev early in training, and (2) compute # the unbiased average stddev by dividing renorm_stddev by the weight. - with ops.device(lambda _: self.moving_variance.device): + device = ((lambda _: self.moving_variance.device) + if context.in_graph_mode() else self.moving_variance.device) + with ops.device(device): self.renorm_stddev = _renorm_variable('renorm_stddev', (param_dim,)) self.renorm_stddev_weight = _renorm_variable( 'renorm_stddev_weight', ()) @@ -301,8 +307,12 @@ class BatchNormalization(base.Layer): self.moving_mean, mean, decay, zero_debias=False) variance_update = moving_averages.assign_moving_average( self.moving_variance, variance, decay, zero_debias=False) - self.add_update(mean_update, inputs=inputs) - self.add_update(variance_update, inputs=inputs) + if context.in_graph_mode(): + # Note that in Eager mode, the updates are already executed when running + # assign_moving_averages. So we do not need to put them into + # collections. + self.add_update(mean_update, inputs=inputs) + self.add_update(variance_update, inputs=inputs) return output @@ -335,6 +345,7 @@ class BatchNormalization(base.Layer): r = _smart_select(training, lambda: r, lambda: array_ops.ones_like(r)) d = _smart_select(training, lambda: d, lambda: array_ops.zeros_like(d)) decay = _smart_select(training, lambda: self.renorm_momentum, lambda: 1.) + def _update_renorm_variable(var, weight, value): """Updates a moving average and weight, returns the unbiased value.""" # Update the variables without zero debiasing. The debiasing will be @@ -418,9 +429,9 @@ class BatchNormalization(base.Layer): self.moving_mean, new_mean, decay, zero_debias=False) variance_update = moving_averages.assign_moving_average( self.moving_variance, new_variance, decay, zero_debias=False) - - self.add_update(mean_update, inputs=inputs) - self.add_update(variance_update, inputs=inputs) + if context.in_graph_mode(): + self.add_update(mean_update, inputs=inputs) + self.add_update(variance_update, inputs=inputs) else: mean, variance = self.moving_mean, self.moving_variance @@ -566,7 +577,6 @@ def batch_normalization(inputs, BatchNorm = BatchNormalization batch_norm = batch_normalization - # Helper function diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 2b9306e8748..33ba5df7a6e 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -1466,12 +1466,15 @@ def zeros_like(tensor, dtype=None, name=None, optimize=True): with ops.name_scope(name, "zeros_like", [tensor]) as name: tensor = ops.convert_to_tensor(tensor, name="tensor") - if tensor.shape.is_fully_defined(): + # For now, variant types must be created via zeros_like; as we need to + # pass the input variant object to the proper zeros callback. + + if tensor.shape.is_fully_defined() and tensor.dtype != dtypes.variant: # We can produce a zeros tensor independent of the value of 'tensor', # since the shape is known statically. return zeros(tensor.shape, dtype=dtype or tensor.dtype, name=name) - if dtype is not None and dtype != tensor.dtype: + if dtype is not None and dtype != tensor.dtype and dtype != dtypes.variant: return zeros( shape_internal(tensor, optimize=optimize), dtype=dtype, name=name) else: diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index 1d747f84008..1471b5909eb 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -19,11 +19,14 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from autograd import core as ag_core + from tensorflow.core.framework import attr_value_pb2 from tensorflow.core.framework import variable_pb2 from tensorflow.python.eager import context from tensorflow.python.eager import custom_gradient from tensorflow.python.eager import tape +from tensorflow.python.eager import tensor_node from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape @@ -574,7 +577,14 @@ class ResourceVariable(variables.Variable): def _run_op(a, *args): # pylint: disable=protected-access - return getattr(ops.Tensor, operator)(a._AsTensor(), *args) + value = a._AsTensor() + if ag_core.isnode(value): + # This avoids autograd trying to wrap a ResourceVariable. + value = ops.convert_to_tensor(value) + args = [ops.convert_to_tensor(x) for x in args] + return getattr(tensor_node.TensorNode, operator)(value, *args) + else: + return getattr(ops.Tensor, operator)(value, *args) # Propagate __doc__ to wrapper try: diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index 9093c12968f..645775239fd 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -1698,7 +1698,7 @@ def variable_scope(name_or_scope, use when doing asynchronous distributed training. Returns: - A scope that can be to captured and reused. + A scope that can be captured and reused. Raises: ValueError: when trying to reuse within a create scope, or create within diff --git a/tensorflow/python/training/checkpoint_ops.py b/tensorflow/python/training/checkpoint_ops.py new file mode 100644 index 00000000000..70460ceb480 --- /dev/null +++ b/tensorflow/python/training/checkpoint_ops.py @@ -0,0 +1,453 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Operations for generating and loading vocab remappings.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_checkpoint_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import math_ops + +ops.NotDifferentiable("GenerateVocabRemapping") +ops.NotDifferentiable("LoadAndRemapMatrix") + + +def _load_and_remap_matrix(ckpt_path, + old_tensor_name, + new_row_vocab_offset, + num_rows_to_load, + new_col_vocab_size, + initializer, + old_row_vocab_file=None, + new_row_vocab_file=None, + old_col_vocab_file=None, + new_col_vocab_file=None, + num_row_oov_buckets=0, + num_col_oov_buckets=0, + max_rows_in_memory=-1): + """Loads a 2-D (matrix) `Tensor` from checkpoint. + + Generates 1D-remappings for rows and columns using the + `GenerateVocabRemapping` op, and initializes any anticipated values with the + provided initializer. Then, uses the `LoadAndRemapMatrix` op to create a + matrix that loads existing values from the checkpoint, while filling out + "missing" values with the newly initialized values. See + contrib/framework/ops/checkpoint_ops.cc for more information on the wrapped + functionality (LoadAndRemapMatrix). This wrapper can be used to perform only + row remapping or only col remapping. If only row remapping is desired, + {new,old}_col_vocab_file should be `None`, and vice versa for column + remapping. + + NOTE: This only supports div-partitioning the vocabulary on the 1st dimension + (row axis) via `new_row_vocab_offset`. + + Args: + ckpt_path: Path to the TensorFlow checkpoint (version 2, `TensorBundle`) + from which the old matrix `Tensor` will be loaded. + old_tensor_name: Name of the 2-D `Tensor` to load from checkpoint. + new_row_vocab_offset: A 0-indexed integer representing what line to + start reading at in the new row vocabulary. Used for partitioned + variables. + num_rows_to_load: Number of rows to load for the new vocabulary (note: to + support variable partitioning and partial loading, this does not need to + be the same as the number of entries in `new_row_vocab_file`). + new_col_vocab_size: Number of columns to load - should be the same as the + number of entries in `new_col_vocab_file`, since we don't support + partitioning along the column axis. + initializer: Callable initializer function that accepts a 1-D tensor as the + arg to specify the shape of the returned tensor. Used to initialize + missing values. + old_row_vocab_file: A scalar `Tensor` of type `string` containing the + path to the old row vocabulary file. Can be None, which represents no + remapping on the row axis. + new_row_vocab_file: A scalar `Tensor` of type `string` containing the path + to the new row vocabulary file. Can be None, which represents no remapping + on the row axis - in which case, `new_row_vocab_offset` and + `num_rows_to_load` work under the assumption that the new row vocab is the + same as the old row vocab. + old_col_vocab_file: A scalar `Tensor` of type `string` containing the + path to the old column vocabulary file. Can be None, which represents no + remapping on the column axis. + new_col_vocab_file: A scalar `Tensor` of type `string` containing the path + to the new column vocabulary file. Can be None, which represents no + remapping on the column axis - in which case, `new_col_vocab_size` works + under the assumption that the new col vocab is the same as the old col + vocab. + num_row_oov_buckets: `int` specifying the number of out-of-vocabulary rows + to append. Must be >= 0. + num_col_oov_buckets: `int` specifying the number of out-of-vocabulary + columns to append. Must be >= 0. + max_rows_in_memory: `int` specifying the maximum number of rows to load from + the checkpoint at once. If less than or equal to 0, the entire matrix will + be loaded into memory. Setting this arg trades increased disk reads for + lower memory usage. + + Returns: + A Tensor of shape `[num_rows_to_load + num_row_oov_buckets, + new_col_vocab_size + num_col_oov_buckets]`, with values loaded from the + specified tensor in the checkpoint, and any missing or OOV values + initialized with the given `initializer`. + + Raises: + ValueError: If `num_row_oov_buckets` or `num_col_oov_buckets` < 0. + ValueError: If either `old_row_vocab_file` or `new_row_vocab_file` is + provided, while the other is not. Same for `old_col_vocab_file` and + `new_col_vocab_file`. + ValueError: If neither row vocabs or col vocabs are provided. + """ + if num_row_oov_buckets < 0: + raise ValueError("num_row_oov_buckets must be >= 0, but received %d" % + num_row_oov_buckets) + if num_col_oov_buckets < 0: + raise ValueError("num_col_oov_buckets must be >= 0, but received %d" % + num_col_oov_buckets) + + if bool(old_row_vocab_file) != bool(new_row_vocab_file): + raise ValueError( + "old_row_vocab_file and new_row_vocab_file must both be specified or " + "left unspecified. old_row_vocab_file='{}', new_row_vocab_file='{}'". + format(old_row_vocab_file, new_row_vocab_file)) + if bool(old_col_vocab_file) != bool(new_col_vocab_file): + raise ValueError( + "old_col_vocab_file and new_col_vocab_file must both be specified or " + "left unspecified. old_col_vocab_file='{}', new_col_vocab_file='{}'". + format(old_col_vocab_file, new_col_vocab_file)) + + remap_rows = new_row_vocab_file and old_row_vocab_file + remap_cols = new_col_vocab_file and old_col_vocab_file + if not (remap_rows or remap_cols): + raise ValueError( + "Must provide either row or column vocab files. If no remapping is " + "necessary, consider using `tf.contrib.framework.init_from_checkpoint` " + "instead.") + + num_rows_present = num_rows_to_load + if remap_rows: + row_remapping, num_rows_present = ( + gen_checkpoint_ops._generate_vocab_remapping( # pylint: disable=protected-access + new_vocab_file=new_row_vocab_file, + old_vocab_file=old_row_vocab_file, + new_vocab_offset=new_row_vocab_offset, + num_new_vocab=num_rows_to_load)) + else: + # Even when the rows are not being reordered, we still need to generate a + # remapping to account for initializing partitioned Variables (when + # new_row_vocab_offset is non-zero). + row_remapping = math_ops.range( + new_row_vocab_offset, + new_row_vocab_offset + num_rows_to_load, + dtype=dtypes.int64) + + col_remapping = [] + num_cols_present = new_col_vocab_size + if remap_cols: + col_remapping, num_cols_present = ( + gen_checkpoint_ops._generate_vocab_remapping( # pylint: disable=protected-access + new_vocab_file=new_col_vocab_file, + old_vocab_file=old_col_vocab_file, + new_vocab_offset=0, # Offset is unused for cols (no partitioning). + num_new_vocab=new_col_vocab_size)) + + init_vals = initializer([ + num_rows_to_load * new_col_vocab_size - + num_rows_present * num_cols_present, 1 + ]) + return_tensor = gen_checkpoint_ops._load_and_remap_matrix( # pylint: disable=protected-access + ckpt_path=ckpt_path, + old_tensor_name=old_tensor_name, + row_remapping=row_remapping, + col_remapping=col_remapping, + initializing_values=init_vals, + num_rows=num_rows_to_load, + num_cols=new_col_vocab_size, + max_rows_in_memory=max_rows_in_memory) + + # Add OOV row(s) and column(s). + if num_row_oov_buckets > 0: + init_row_oov_val = initializer([num_row_oov_buckets, new_col_vocab_size]) + init_row_oov_val = ops.convert_to_tensor(init_row_oov_val) + return_tensor = array_ops.concat([return_tensor, init_row_oov_val], 0) + if num_col_oov_buckets > 0: + # We need to add any row OOV to the new column shape. + init_col_oov_val = initializer( + [num_rows_to_load + num_row_oov_buckets, num_col_oov_buckets]) + init_col_oov_val = ops.convert_to_tensor(init_col_oov_val) + return_tensor = array_ops.concat([return_tensor, init_col_oov_val], 1) + + return return_tensor + + +def _load_and_remap_matrix_initializer(ckpt_path, + old_tensor_name, + new_row_vocab_size, + new_col_vocab_size, + old_row_vocab_file=None, + new_row_vocab_file=None, + old_col_vocab_file=None, + new_col_vocab_file=None, + num_row_oov_buckets=0, + num_col_oov_buckets=0, + initializer=None, + max_rows_in_memory=-1): + r"""Returns a var initializer for loading and remapping a 2-D (matrix) tensor. + + The returned initializer loads a 2-D (matrix) `Tensor` with name + `old_tensor_name` from the checkpoint at `ckpt_path`. It will reorder the + rows/columns according to the specified vocab files and append additional + out-of-vocabulary rows/columns according to the number of OOV buckets. + + The format of the file at the `{old,new}_{row,col}_vocab_file` path should be + a text file, with each line containing a single entity within the vocabulary. + Let the function `line_of(f, "x")` return the 0-indexed line number of the + entity "x" in file f, and the function `entity_at(f, i)` return the entity at + line i of file f. Then, row i of the new output matrix will be taken from row + `line_of(old_row_vocab_file, entity_at(new_row_vocab_file, i))` of the old + matrix. If any entity in `new_row_vocab_file` is not found in + `old_row_vocab_file`, that row is considered a "missing" row, and its values + will be initialized using the `initializer` arg. The same logic also applies + for the columns. + + For example, assuming that: + + * `old_row_vocab_file` contains "mercury\nvenus\nmars" + * `new_row_vocab_file` contains "venus\njupiter\nmercury" + * `old_col_vocab_file` contains "good\nbetter\nbest" + * `new_col_vocab_file` contains "good\nbest\nfantastic" + * `initializer` returns the natural numbers `[1, 2, 3, 4, ...]` + * `w(i, j)` represents the value from row i, column j of the old matrix + + Then the new output matrix will look like: + + `[[w(1, 0), w(1, 2), 1], + [2, 3, 4], + [w(0, 0), w(0, 2), 5]]` + + If we further specify that: + + * `num_row_oov_buckets` == 2 + * `num_col_oov_buckets` == 1 + + Then the new output matrix will look like: + + `[[w(1, 0), w(1, 2), 1, 12], + [2, 3, 4, 13], + [w(0, 0), w(0, 2), 5, 14], + [6, 7, 8, 15], + [9, 10, 11, 16]]` + + If `{old,new}_row_vocab_file` are None, we assume that the old and new row + vocab files are the same, and no row remapping is done. If + `{old,new}_col_vocab_file` are None, we assume that the old and new column + vocab files are the same, and no column remapping is done. + + The returned initializer only supports div-partitioning along the row axis. It + does not support partitioning along the column axis or mod-partitioning. + + NOTE: When this is used to warm-start variables, client code should use + `tf.lookup.index_table_from_tensor()` like + contrib/layers/python/layers/feature_column.py does, as opposed to + `tf.feature_to_id()` - in order to ensure the underlying lookup tables are the + same. + + Args: + ckpt_path: Path to the TensorFlow checkpoint (version 2, `TensorBundle`) + from which the old matrix `Tensor` will be loaded. + old_tensor_name: Name of the 2-D `Tensor` to load from checkpoint. + new_row_vocab_size: `int` specifying the number of entries in + `new_row_vocab_file`. If no row remapping is needed (no row vocab + provided), this should be equal to the number of rows to load from the old + matrix (which can theoretically be smaller than the number of rows in the + old matrix). + new_col_vocab_size: `int` specifying the number of entries in + `new_col_vocab_file`. If no column remapping is needed (no column vocab + provided), this should be equal to the number of columns in the old + matrix. + old_row_vocab_file: A scalar `Tensor` of type `string` containing the + path to the old row vocabulary file. Can be None, which represents no + remapping on the row axis. + new_row_vocab_file: A scalar `Tensor` of type `string` containing the path + to the new row vocabulary file. Can be None, which represents no remapping + on the row axis. + old_col_vocab_file: A scalar `Tensor` of type `string` containing the + path to the old column vocabulary file. Can be None, which represents no + remapping on the column axis. + new_col_vocab_file: A scalar `Tensor` of type `string` containing the path + to the new column vocabulary file. Can be None, which represents no + remapping on the column axis. + num_row_oov_buckets: `int` specifying the number of out-of-vocabulary rows + to append. Must be >= 0. + num_col_oov_buckets: `int` specifying the number of out-of-vocabulary + columns to append. Must be >= 0. + initializer: Initializer function to initialize missing values. Accepts a + 1-D tensor as the arg to specify the shape of the returned tensor. If + `None`, defaults to using `zeros_initializer()`. + max_rows_in_memory: `int` specifying the maximum number of rows to load from + the checkpoint at once. If less than or equal to 0, the entire matrix will + be loaded into memory. Setting this arg trades increased disk reads for + lower memory usage. + + Returns: + A variable initializer function that should be used to initialize a + (potentially partitioned) `Variable` whose complete shape is + `[new_row_vocab_size + num_row_oov_buckets, new_col_vocab_size + + num_col_oov_buckets]`. + + Raises: + TypeError: If `initializer` is specified but not callable. + """ + if initializer is None: + # TODO(b/25671353): Consider using sqrt(6/(fan_in + fan_out)) instead, from + # Glorot and Bengio, 2010. + initializer = init_ops.zeros_initializer() + + if not callable(initializer): + raise TypeError( + "initializer must be callable, instead of being {} of type {}.".format( + initializer, type(initializer))) + + def _initializer(shape, dtype=dtypes.float32, partition_info=None): + """Variable initializer. + + Args: + shape: Shape of `Tensor` to return. Should include OOV on both axes. + dtype: Must be float32. + partition_info: variable_scope._PartitionInfo. + + Returns: + `Tensor` of shape `shape`. + + Raises: + TypeError: If `dtype` is anything other than float32. + ValueError: For shape mismatch upon invocation. + """ + # Sanity checks. + if dtype != dtypes.float32: + raise TypeError( + "Currently, only float32 is supported. Received dtype: {}".format( + dtype)) + if len(shape) != 2: + raise ValueError("Expected 2-dim shape, but received: {}".format(shape)) + if shape[0] <= 0: + raise ValueError( + "Expected 1st dim of shape to be > 0, but received shape: {}".format( + shape)) + if shape[1] != (new_col_vocab_size + num_col_oov_buckets): + raise ValueError( + "Expected 2nd dim of shape to be new_col_vocab_size ({}) + " + "num_col_oov_buckets ({}) = {}, but received shape: {}".format( + new_col_vocab_size, num_col_oov_buckets, + new_col_vocab_size + num_col_oov_buckets, shape)) + + offset = 0 + if partition_info is not None: + offset = partition_info.single_offset(shape) + + if offset + shape[0] > new_row_vocab_size + num_row_oov_buckets: + raise ValueError( + "Trying to initialize {} additional rows after {} rows have already " + "been initialized, which would exceed expected total row count of " + "new_row_vocab_size ({}) + num_row_oov_buckets ({}) = {}.".format( + shape[0], offset, new_row_vocab_size, num_row_oov_buckets, + new_row_vocab_size + num_row_oov_buckets)) + + row_oov_buckets_to_use = min(shape[0], + max(0, offset + shape[0] - new_row_vocab_size)) + num_rows_to_load = shape[0] - row_oov_buckets_to_use + + return _load_and_remap_matrix( + ckpt_path=ckpt_path, + old_tensor_name=old_tensor_name, + new_row_vocab_offset=offset, + num_rows_to_load=num_rows_to_load, + new_col_vocab_size=new_col_vocab_size, + initializer=initializer, + old_row_vocab_file=old_row_vocab_file, + new_row_vocab_file=new_row_vocab_file, + old_col_vocab_file=old_col_vocab_file, + new_col_vocab_file=new_col_vocab_file, + num_row_oov_buckets=row_oov_buckets_to_use, + num_col_oov_buckets=num_col_oov_buckets, + max_rows_in_memory=max_rows_in_memory) + + return _initializer + + +def _load_embedding_initializer(ckpt_path, + embedding_tensor_name, + new_vocab_size, + embedding_dim, + old_vocab_file, + new_vocab_file, + num_oov_buckets=0, + initializer=None, + max_rows_in_memory=-1): + """Returns a variable initializer for loading pre-trained embeddings. + + Wrapper around `load_and_remap_matrix_initializer()` specialized for loading + embedding weights and remapping according to the provided vocab files. See + docs for `load_and_remap_matrix_initializer()` for more details. + + NOTE: Only for use with div-partitioned variables / vocabularies. + + Args: + ckpt_path: Path to the TensorFlow checkpoint (version 2, `TensorBundle`) + from which the old matrix `Tensor` will be loaded. + embedding_tensor_name: Name of the 2-D `Tensor` to load from checkpoint. + new_vocab_size: Number of entries in the new vocab. + embedding_dim: `int` specifying the dimension of the embedding vectors from + the checkpoint. Must match the number of columns in the old embedding + matrix. + old_vocab_file: A scalar `Tensor` of type `string` containing the + path to the old vocabulary file. + new_vocab_file: A scalar `Tensor` of type `string` containing the + path to the new vocabulary file. + num_oov_buckets: `int` specifying the number of out-of-vocabulary + buckets to use. Must be >= 0. + initializer: Initializer function that accepts a 1-D tensor as the arg to + specify the shape of the returned tensor. If `None`, defaults to using + `truncated_normal_initializer()`. + max_rows_in_memory: `int` specifying the maximum number of rows to load from + the checkpoint at once. If less than or equal to 0, the entire matrix will + be loaded into memory. Setting this arg trades increased disk reads for + lower memory usage. + + Returns: + A variable initializer function. + """ + if initializer is None: + # TODO(b/25671353): This should be kept in sync with the stddev used by + # feature_column.py's _EmbeddingColumn. + initializer = init_ops.truncated_normal_initializer( + stddev=1.0 / math.sqrt(embedding_dim)) + + return _load_and_remap_matrix_initializer( + ckpt_path=ckpt_path, + old_tensor_name=embedding_tensor_name, + new_row_vocab_size=new_vocab_size, + new_col_vocab_size=embedding_dim, + old_row_vocab_file=old_vocab_file, + new_row_vocab_file=new_vocab_file, + old_col_vocab_file=None, + new_col_vocab_file=None, + num_row_oov_buckets=num_oov_buckets, + num_col_oov_buckets=0, + initializer=initializer, + max_rows_in_memory=max_rows_in_memory) diff --git a/tensorflow/python/training/checkpoint_ops_test.py b/tensorflow/python/training/checkpoint_ops_test.py new file mode 100644 index 00000000000..39c4d2911f2 --- /dev/null +++ b/tensorflow/python/training/checkpoint_ops_test.py @@ -0,0 +1,305 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Functional tests for Python wrappers around warm-starting.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import numpy as np + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import init_ops +from tensorflow.python.ops import partitioned_variables +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables +from tensorflow.python.platform import test +from tensorflow.python.training import checkpoint_ops +from tensorflow.python.training import saver as saver_lib + + +class LoadAndRemapWrappersTest(test.TestCase): + """Tests for the functionality of the Python wrappers.""" + + def setUp(self): + ops.reset_default_graph() + # Create the checkpoint file in a temporary directory. + checkpoint_prefix = os.path.join(self.get_temp_dir(), 'model') + # 0., 1., ..., 79. reshaped into [5, 16]. + initializer = init_ops.constant_initializer( + np.reshape(np.linspace(0.0, 79, 5 * 16), (5, 16))) + with self.test_session() as sess: + with variable_scope.variable_scope('some_scope'): + variable_scope.get_variable(name='embeddings', shape=[5, 16], + initializer=initializer) + sess.run(variables.global_variables_initializer()) + saver = saver_lib.Saver() + saver.save(sess, checkpoint_prefix, global_step=5) + self.checkpoint_file = '{}-5'.format(checkpoint_prefix) + + # Create the vocabulary files. + self.new_feature_vocab_file = os.path.join( + self.get_temp_dir(), 'new_feature_vocab.txt') + with open(self.new_feature_vocab_file, 'w') as f: + f.write('\n'.join(['zero', 'one', 'two', 'three', 'four']) + '\n') + + self.old_feature_vocab_file = os.path.join( + self.get_temp_dir(), 'old_feature_vocab.txt') + with open(self.old_feature_vocab_file, 'w') as f: + f.write('\n'.join(['zero', 'one', 'two', 'three']) + '\n') + + self.new_class_vocab_file = os.path.join( + self.get_temp_dir(), 'new_class_vocab.txt') + with open(self.new_class_vocab_file, 'w') as f: + f.write('\n'.join(['MISSING', 'knitting', 'flask', 'eminem']) + '\n') + + self.old_class_vocab_file = os.path.join( + self.get_temp_dir(), 'old_class_vocab.txt') + with open(self.old_class_vocab_file, 'w') as f: + f.write('\n'.join(['knitting', 'eminem', 'MISSING']) + '\n') + + self.init_val = 42 + + def _init_val_initializer(shape, dtype=None, partition_info=None): + del dtype, partition_info # Unused by this unit-testing initializer. + return array_ops.tile( + constant_op.constant([[self.init_val]], dtype=dtypes.float32), shape) + + self.initializer = _init_val_initializer + + def test_load_and_remap_matrix(self): + """Tests the end-to-end loading / remapping of weights.""" + # _load_and_remap_matrix() is the generalized wrapper that takes in row and + # column vocabulary files, calls the relevant remappings, and returns the + # weight matrix. Take this example to be linear multi-class by providing + # both row and column vocabularies. + remapped_matrix = checkpoint_ops._load_and_remap_matrix( + new_row_vocab_file=self.new_feature_vocab_file, + old_row_vocab_file=self.old_feature_vocab_file, + num_rows_to_load=4, + new_col_vocab_file=self.new_class_vocab_file, + old_col_vocab_file=self.old_class_vocab_file, + new_col_vocab_size=4, + old_tensor_name='some_scope/embeddings', + ckpt_path=[self.checkpoint_file], + new_row_vocab_offset=1, + initializer=self.initializer, + num_row_oov_buckets=1, + num_col_oov_buckets=1) + + # [4 in vocab + 1 oov features, 4 in vocab + 1 oov classes]. The offset + # means we read + expected_remapped_matrix = np.concatenate( + [ + np.reshape([18, 34, 50, self.init_val, self.init_val], [5, 1]), + np.reshape([16, 32, 48, self.init_val, self.init_val], [5, 1]), + np.reshape([self.init_val] * 5, [5, 1]), + np.reshape([17, 33, 49, self.init_val, self.init_val], [5, 1]), + np.reshape([self.init_val] * 5, [5, 1]) + ], + axis=1) + + with self.test_session(): + self.assertAllClose(expected_remapped_matrix, remapped_matrix.eval()) + + def test_load_and_remap_output_layer_weight_initializer_linear(self): + """Tests for the output layer initializer in the linear multi-class case.""" + loading_initializer = (checkpoint_ops._load_and_remap_matrix_initializer( + new_row_vocab_size=5, + new_col_vocab_file=self.new_class_vocab_file, + old_col_vocab_file=self.old_class_vocab_file, + new_col_vocab_size=4, + old_tensor_name='some_scope/embeddings', + ckpt_path=[self.checkpoint_file], + new_row_vocab_file=self.new_feature_vocab_file, + old_row_vocab_file=self.old_feature_vocab_file, + num_row_oov_buckets=1, + num_col_oov_buckets=1, + initializer=self.initializer)) + + expected_remapped_matrix = np.concatenate( + [ + np.reshape([2, 18, 34, 50, self.init_val, self.init_val], [6, 1]), + np.reshape([0, 16, 32, 48, self.init_val, self.init_val], [6, 1]), + np.reshape([self.init_val] * 6, [6, 1]), + np.reshape([1, 17, 33, 49, self.init_val, self.init_val], [6, 1]), + np.reshape([self.init_val] * 6, [6, 1]) + ], + axis=1) + + # The new weight matrix is of size + # [5 feature vocab + 1 feature OOV, 4 class vocab + 1 class OOV]. Use a + # partitioned variable to confirm that the offset logic works. + remapped_matrix = variable_scope.get_variable( + name='linear/obtained_weight_matrix', + shape=[6, 5], + initializer=loading_initializer, + partitioner=partitioned_variables.fixed_size_partitioner(2)) + + with self.test_session(): + variables.global_variables_initializer().run() + self.assertAllClose(expected_remapped_matrix, + remapped_matrix.as_tensor().eval()) + + def test_load_and_remap_output_layer_weight_initializer_dnn_output(self): + """Tests for the output layer initializer in the DNN output case.""" + loading_initializer = (checkpoint_ops._load_and_remap_matrix_initializer( + new_row_vocab_size=5, + new_col_vocab_file=self.new_class_vocab_file, + old_col_vocab_file=self.old_class_vocab_file, + new_col_vocab_size=4, + old_tensor_name='some_scope/embeddings', + ckpt_path=[self.checkpoint_file], + num_col_oov_buckets=1, + initializer=self.initializer)) + + expected_remapped_matrix = np.concatenate( + [ + np.reshape([2, 18, 34, 50, 66], [5, 1]), + np.reshape([0, 16, 32, 48, 64], [5, 1]), + np.reshape([self.init_val] * 5, [5, 1]), + np.reshape([1, 17, 33, 49, 65], [5, 1]), + np.reshape([self.init_val] * 5, [5, 1]) + ], + axis=1) + + # The new weight matrix is of size + # [5-sized input layer, 4 class vocab + 1 class OOV]. + remapped_matrix = variable_scope.get_variable( + name='dnn_output/obtained_weight_matrix', + shape=[5, 5], + initializer=loading_initializer, + partitioner=partitioned_variables.fixed_size_partitioner(2)) + + with self.test_session(): + variables.global_variables_initializer().run() + self.assertAllClose(expected_remapped_matrix, + remapped_matrix.as_tensor().eval()) + + def test_initializer_with_oov_only_partition(self): + """Tests for the output layer initializer where one partition is all OOV.""" + loading_initializer = (checkpoint_ops._load_and_remap_matrix_initializer( + new_row_vocab_size=5, + new_col_vocab_file=self.new_class_vocab_file, + old_col_vocab_file=self.old_class_vocab_file, + new_col_vocab_size=4, + old_tensor_name='some_scope/embeddings', + ckpt_path=[self.checkpoint_file], + new_row_vocab_file=self.new_feature_vocab_file, + old_row_vocab_file=self.old_feature_vocab_file, + num_row_oov_buckets=5, + num_col_oov_buckets=1, + initializer=self.initializer)) + + expected_remapped_matrix = np.concatenate( + [ + np.reshape([2, 18, 34, 50] + [self.init_val] * 6, [10, 1]), + np.reshape([0, 16, 32, 48] + [self.init_val] * 6, [10, 1]), + np.reshape([self.init_val] * 10, [10, 1]), + np.reshape([1, 17, 33, 49] + [self.init_val] * 6, [10, 1]), + np.reshape([self.init_val] * 10, [10, 1]), + ], + axis=1) + + # The new weight matrix is of size + # [5 feature vocab + 5 feature OOV, 4 class vocab + 1 class OOV]. The + # second partition has only OOV. + remapped_matrix = variable_scope.get_variable( + name='linear_all_oov/obtained_weight_matrix', + shape=[10, 5], + initializer=loading_initializer, + partitioner=partitioned_variables.fixed_size_partitioner(2)) + + with self.test_session(): + variables.global_variables_initializer().run() + self.assertAllClose(expected_remapped_matrix, + remapped_matrix.as_tensor().eval()) + + def test_load_and_remap_linear_multiclass_initializer_default_init(self): + """Tests where the zeros_initializer default is used for linear.""" + loading_initializer = (checkpoint_ops._load_and_remap_matrix_initializer( + new_row_vocab_size=5, + new_col_vocab_file=self.new_class_vocab_file, + old_col_vocab_file=self.old_class_vocab_file, + new_col_vocab_size=4, + old_tensor_name='some_scope/embeddings', + ckpt_path=[self.checkpoint_file], + new_row_vocab_file=self.new_feature_vocab_file, + old_row_vocab_file=self.old_feature_vocab_file, + num_row_oov_buckets=1, + num_col_oov_buckets=1)) + + expected_remapped_matrix = np.concatenate( + [ + np.reshape([2, 18, 34, 50, 0, 0], [6, 1]), + np.reshape([0, 16, 32, 48, 0, 0], [6, 1]), + np.reshape([0] * 6, [6, 1]), + np.reshape([1, 17, 33, 49, 0, 0], [6, 1]), + np.reshape([0] * 6, [6, 1]) + ], + axis=1) + + remapped_matrix = variable_scope.get_variable( + name='linear_init_fallback/obtained_weight_matrix', + shape=[6, 5], + initializer=loading_initializer, + partitioner=partitioned_variables.fixed_size_partitioner(2)) + + with self.test_session(): + variables.global_variables_initializer().run() + self.assertAllClose(expected_remapped_matrix, + remapped_matrix.as_tensor().eval()) + + def test_load_embedding_initializer(self): + """Tests for the load_embedding_initializer wrapper.""" + embedding_loading_initializer = (checkpoint_ops._load_embedding_initializer( + new_vocab_file=self.new_feature_vocab_file, + old_vocab_file=self.old_feature_vocab_file, + new_vocab_size=5, + embedding_dim=16, + embedding_tensor_name='some_scope/embeddings', + ckpt_path=[self.checkpoint_file], + num_oov_buckets=1, + initializer=self.initializer)) + + expected_remapped_embeddings = np.concatenate( + [ + np.reshape(range(64), [4, 16]), + np.reshape([self.init_val] * 32, [2, 16]), + ], + axis=0) + + # The new weight matrix is of size + # [5 feature vocab + 1 feature OOV, 16 (embedding dimension)], where the + # last vocab row (2nd last row) is newly initialized (wasn't found in + # previous vocab) and the actual last row is OOV and also newly initialized. + # Use a partitioned variable to confirm that the offset logic works. + remapped_embeddings = variable_scope.get_variable( + name='embedding/obtained_embedding_matrix', + shape=[6, 16], + initializer=embedding_loading_initializer, + partitioner=partitioned_variables.fixed_size_partitioner(2)) + + with self.test_session(): + variables.global_variables_initializer().run() + self.assertAllClose(expected_remapped_embeddings, + remapped_embeddings.as_tensor().eval()) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/tools/ci_build/update_version.py b/tensorflow/tools/ci_build/update_version.py index 6f3c3f15103..4405678a6b8 100755 --- a/tensorflow/tools/ci_build/update_version.py +++ b/tensorflow/tools/ci_build/update_version.py @@ -277,8 +277,8 @@ def check_for_lingering_string(lingering_string): formatted_string = lingering_string.replace(".", r"\.") try: linger_str_output = subprocess.check_output( - ['grep', '-rnoH', formatted_string, TF_SRC_DIR]) - linger_strs = linger_str_output.decode('utf8').split("\n") + ["grep", "-rnoH", formatted_string, TF_SRC_DIR]) + linger_strs = linger_str_output.decode("utf8").split("\n") except subprocess.CalledProcessError: linger_strs = [] diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 34cb19f9cb7..d62316964f8 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -158,6 +158,7 @@ sh_binary( "//tensorflow/contrib/ndlstm:ndlstm", "//tensorflow/contrib/nn:nn_py", "//tensorflow/contrib/predictor:predictor_pip", + "//tensorflow/contrib/receptive_field:receptive_field_pip", "//tensorflow/contrib/session_bundle:session_bundle_pip", "//tensorflow/contrib/signal:signal_py", "//tensorflow/contrib/slim:slim", diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 868b5c6e42e..176719fabb4 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -683,11 +683,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): native.new_http_archive( name = "cub_archive", urls = [ - "http://mirror.bazel.build/github.com/NVlabs/cub/archive/69ceda618313df8e9cac6659d607b08949455d14.tar.gz", - "https://github.com/NVlabs/cub/archive/69ceda618313df8e9cac6659d607b08949455d14.tar.gz", + "http://mirror.bazel.build/github.com/NVlabs/cub/archive/1.7.3.zip", + "https://github.com/NVlabs/cub/archive/1.7.3.zip", ], - sha256 = "87e856522c283b8ea887c3b61d7d5b252d2dd74abac4f1d756d776e721223e82", - strip_prefix = "cub-69ceda618313df8e9cac6659d607b08949455d14", + sha256 = "b7ead9e291d34ffa8074243541c1380d63be63f88de23de8ee548db573b72ebe", + strip_prefix = "cub-1.7.3", build_file = str(Label("//third_party:cub.BUILD")), ) diff --git a/third_party/gpus/crosstool/BUILD.tpl b/third_party/gpus/crosstool/BUILD.tpl index 7d8b6005135..98cb326572e 100644 --- a/third_party/gpus/crosstool/BUILD.tpl +++ b/third_party/gpus/crosstool/BUILD.tpl @@ -12,12 +12,12 @@ cc_toolchain_suite( cc_toolchain( name = "cc-compiler-local", - all_files = ":crosstool_wrapper_driver_is_not_gcc", + all_files = "%{linker_files}", compiler_files = ":empty", cpu = "local", dwp_files = ":empty", dynamic_runtime_libs = [":empty"], - linker_files = ":crosstool_wrapper_driver_is_not_gcc", + linker_files = "%{linker_files}", objcopy_files = ":empty", static_runtime_libs = [":empty"], strip_files = ":empty", @@ -30,12 +30,12 @@ cc_toolchain( cc_toolchain( name = "cc-compiler-darwin", - all_files = ":crosstool_wrapper_driver_is_not_gcc", + all_files = "%{linker_files}", compiler_files = ":empty", cpu = "darwin", dwp_files = ":empty", dynamic_runtime_libs = [":empty"], - linker_files = ":crosstool_wrapper_driver_is_not_gcc", + linker_files = "%{linker_files}", objcopy_files = ":empty", static_runtime_libs = [":empty"], strip_files = ":empty", diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl index b85e565f362..4a0f4710881 100644 --- a/third_party/gpus/cuda_configure.bzl +++ b/third_party/gpus/cuda_configure.bzl @@ -971,7 +971,6 @@ def _create_local_cuda_repository(repository_ctx): ' ":cudnn-include",') }) # Set up crosstool/ - _file(repository_ctx, "crosstool:BUILD") cc = find_cc(repository_ctx) host_compiler_includes = _host_compiler_includes(repository_ctx, cc) cuda_defines = { @@ -981,11 +980,14 @@ def _create_local_cuda_repository(repository_ctx): } if _use_cuda_clang(repository_ctx): cuda_defines["%{clang_path}"] = cc + _tpl(repository_ctx, "crosstool:BUILD", {"%{linker_files}": ":empty"}) _tpl(repository_ctx, "crosstool:CROSSTOOL_clang", cuda_defines, out="crosstool/CROSSTOOL") else: nvcc_path = str(repository_ctx.path("%s/bin/nvcc%s" % (cuda_config.cuda_toolkit_path, ".exe" if cuda_config.cpu_value == "Windows" else ""))) + _tpl(repository_ctx, "crosstool:BUILD", + {"%{linker_files}": ":crosstool_wrapper_driver_is_not_gcc"}) _tpl(repository_ctx, "crosstool:CROSSTOOL_nvcc", cuda_defines, out="crosstool/CROSSTOOL") _tpl(repository_ctx, "crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc",