diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 37a04a2f3c3..5b9517a0e55 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -129,8 +129,6 @@ filegroup( "//tensorflow/contrib/tensorboard:all_files", "//tensorflow/contrib/testing:all_files", "//tensorflow/contrib/tfprof/python/tools/tfprof:all_files", - "//tensorflow/contrib/tfprof/tools/tfprof:all_files", - "//tensorflow/contrib/tfprof/tools/tfprof/internal:all_files", "//tensorflow/contrib/training:all_files", "//tensorflow/contrib/util:all_files", "//tensorflow/core:all_files", @@ -188,6 +186,8 @@ filegroup( "//tensorflow/tools/proto_text:all_files", "//tensorflow/tools/quantization:all_files", "//tensorflow/tools/test:all_files", + "//tensorflow/tools/tfprof:all_files", + "//tensorflow/tools/tfprof/internal:all_files", "//tensorflow/user_ops:all_files", "//third_party/hadoop:all_files", ], diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD index 1bcbba22675..57579065923 100644 --- a/tensorflow/cc/BUILD +++ b/tensorflow/cc/BUILD @@ -430,6 +430,7 @@ tf_cc_test( "//tensorflow/core:core_cpu", "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", "//tensorflow/core:tensorflow", "//tensorflow/core:test", diff --git a/tensorflow/cc/saved_model/BUILD b/tensorflow/cc/saved_model/BUILD index eeedaaff27b..90c87210b18 100644 --- a/tensorflow/cc/saved_model/BUILD +++ b/tensorflow/cc/saved_model/BUILD @@ -34,6 +34,7 @@ cc_library( ":constants", "//tensorflow/core:core_cpu", "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", "//tensorflow/core:tensorflow", "//tensorflow/core/util/tensor_bundle:naming", @@ -63,7 +64,6 @@ tf_cc_test( filegroup( name = "saved_model_half_plus_two", srcs = glob([ - "testdata/half_plus_two/**", "testdata/half_plus_two_pbtxt/**", "testdata/half_plus_two_sharded/**", ]), diff --git a/tensorflow/cc/saved_model/constants.h b/tensorflow/cc/saved_model/constants.h index f67c56ba1ca..654e7651702 100644 --- a/tensorflow/cc/saved_model/constants.h +++ b/tensorflow/cc/saved_model/constants.h @@ -30,6 +30,9 @@ constexpr char kSavedModelFilenamePb[] = "saved_model.pb"; // SavedModel text format proto filename. constexpr char kSavedModelFilenamePbTxt[] = "saved_model.pbtxt"; +// SavedModel legacy init op key. +constexpr char kSavedModelLegacyInitOpKey[] = "legacy_init_op"; + // Directory in which to save the SavedModel variables. constexpr char kSavedModelVariablesDirectory[] = "variables"; diff --git a/tensorflow/cc/saved_model/loader.cc b/tensorflow/cc/saved_model/loader.cc index 1f952293550..c654d56e8a1 100644 --- a/tensorflow/cc/saved_model/loader.cc +++ b/tensorflow/cc/saved_model/loader.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/core/lib/io/path.h" #include "tensorflow/core/lib/monitoring/counter.h" #include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/protobuf_internal.h" #include "tensorflow/core/protobuf/saved_model.pb.h" #include "tensorflow/core/public/session.h" #include "tensorflow/core/public/session_options.h" @@ -83,10 +84,32 @@ Status LoadMetaGraphIntoSession(const MetaGraphDef& meta_graph_def, return (*session)->Create(meta_graph_def.graph_def()); } -Status Restore(const RunOptions& run_options, const string& export_dir, - const StringPiece restore_op_name, - const StringPiece variable_filename_const_op_name, - Session* session) { +Tensor CreateStringTensor(const string& value) { + Tensor tensor(DT_STRING, TensorShape({})); + tensor.scalar()() = value; + return tensor; +} + +void AddAssetsTensorsToInputs(const StringPiece export_dir, + const std::vector& asset_file_defs, + std::vector>* inputs) { + if (asset_file_defs.empty()) { + return; + } + for (auto& asset_file_def : asset_file_defs) { + Tensor assets_file_path_tensor = CreateStringTensor(io::JoinPath( + export_dir, kSavedModelAssetsDirectory, asset_file_def.filename())); + inputs->push_back( + {asset_file_def.tensor_info().name(), assets_file_path_tensor}); + } +} + +Status RunRestore(const RunOptions& run_options, const string& export_dir, + const StringPiece restore_op_name, + const StringPiece variable_filename_const_op_name, + const std::vector& asset_file_defs, + Session* session) { + LOG(INFO) << "Restoring SavedModel bundle."; // Find path to variables to be restored in export directory. const string variables_directory = io::JoinPath(export_dir, kSavedModelVariablesDirectory); @@ -109,11 +132,54 @@ Status Restore(const RunOptions& run_options, const string& export_dir, std::vector> inputs = { {variable_filename_const_op_name.ToString(), variables_path_tensor}}; + AddAssetsTensorsToInputs(export_dir, asset_file_defs, &inputs); + RunMetadata run_metadata; return session->Run(run_options, inputs, {}, {restore_op_name.ToString()}, nullptr /* outputs */, &run_metadata); } +Status RunLegacyInitOp(const RunOptions& run_options, const string& export_dir, + const MetaGraphDef& meta_graph_def, + const std::vector& asset_file_defs, + Session* session) { + LOG(INFO) << "Running LegacyInitOp on SavedModel bundle."; + const auto& collection_def_map = meta_graph_def.collection_def(); + const auto init_op_it = collection_def_map.find(kSavedModelLegacyInitOpKey); + if (init_op_it != collection_def_map.end()) { + if (init_op_it->second.node_list().value_size() != 1) { + return errors::FailedPrecondition(strings::StrCat( + "Expected exactly one serving init op in : ", export_dir)); + } + std::vector> inputs; + AddAssetsTensorsToInputs(export_dir, asset_file_defs, &inputs); + RunMetadata run_metadata; + const StringPiece legacy_init_op_name = + init_op_it->second.node_list().value(0); + return session->Run(run_options, inputs, {}, + {legacy_init_op_name.ToString()}, nullptr /* outputs */, + &run_metadata); + } + return Status::OK(); +} + +Status GetAssetFileDefs(const MetaGraphDef& meta_graph_def, + std::vector* asset_file_defs) { + const auto& collection_def_map = meta_graph_def.collection_def(); + const auto assets_it = collection_def_map.find(kSavedModelAssetsKey); + if (assets_it == collection_def_map.end()) { + return Status::OK(); + } + const auto& any_assets = assets_it->second.any_list().value(); + for (const auto& any_asset : any_assets) { + AssetFileDef asset_file_def; + TF_RETURN_IF_ERROR( + ParseAny(any_asset, &asset_file_def, "tensorflow.AssetFileDef")); + asset_file_defs->push_back(asset_file_def); + } + return Status::OK(); +} + Status LoadSavedModelInternal(const SessionOptions& session_options, const RunOptions& run_options, const string& export_dir, @@ -134,12 +200,19 @@ Status LoadSavedModelInternal(const SessionOptions& session_options, TF_RETURN_IF_ERROR(LoadMetaGraphIntoSession( bundle->meta_graph_def, session_options, &bundle->session)); + std::vector asset_file_defs; TF_RETURN_IF_ERROR( - Restore(run_options, export_dir, - bundle->meta_graph_def.saver_def().restore_op_name(), - bundle->meta_graph_def.saver_def().filename_tensor_name(), - bundle->session.get())); - + GetAssetFileDefs(bundle->meta_graph_def, &asset_file_defs)); + TF_RETURN_IF_ERROR( + RunRestore(run_options, export_dir, + bundle->meta_graph_def.saver_def().restore_op_name(), + bundle->meta_graph_def.saver_def().filename_tensor_name(), + asset_file_defs, bundle->session.get())); + // TODO(sukritiramesh): Add support for a single main op to run upon load, + // which will supersede the legacy_init_op and separate RunRestore. + TF_RETURN_IF_ERROR(RunLegacyInitOp(run_options, export_dir, + bundle->meta_graph_def, asset_file_defs, + bundle->session.get())); return Status::OK(); } diff --git a/tensorflow/cc/saved_model/loader_test.cc b/tensorflow/cc/saved_model/loader_test.cc index a7e4d6cfde8..55a22e4e817 100644 --- a/tensorflow/cc/saved_model/loader_test.cc +++ b/tensorflow/cc/saved_model/loader_test.cc @@ -29,7 +29,6 @@ limitations under the License. namespace tensorflow { namespace { -constexpr char kTestDataPb[] = "cc/saved_model/testdata/half_plus_two"; constexpr char kTestDataPbTxt[] = "cc/saved_model/testdata/half_plus_two_pbtxt"; constexpr char kTestDataSharded[] = "cc/saved_model/testdata/half_plus_two_sharded"; @@ -45,12 +44,26 @@ class LoaderTest : public ::testing::Test { return example.SerializeAsString(); } + void ValidateAssets(const string& export_dir, + const SavedModelBundle& bundle) { + const string asset_directory = + io::JoinPath(export_dir, kSavedModelAssetsDirectory); + const string asset_filename = "foo.txt"; + const string asset_filepath = io::JoinPath(asset_directory, asset_filename); + EXPECT_TRUE(Env::Default()->FileExists(asset_filepath)); + + std::vector path_outputs; + TF_ASSERT_OK( + bundle.session->Run({}, {"filename_tensor:0"}, {}, &path_outputs)); + ASSERT_EQ(1, path_outputs.size()); + + test::ExpectTensorEqual( + test::AsTensor({"foo.txt"}, TensorShape({})), path_outputs[0]); + } + void CheckSavedModelBundle(const string& export_dir, const SavedModelBundle& bundle) { - const string asset_path = - io::JoinPath(export_dir, kSavedModelAssetsDirectory, "foo.txt"); - EXPECT_TRUE(Env::Default()->FileExists(asset_path)); - + ValidateAssets(export_dir, bundle); // Retrieve the regression signature from meta graph def. const auto signature_def_map = bundle.meta_graph_def.signature_def(); const auto signature_def = signature_def_map.at(kRegressMethodName); @@ -151,18 +164,6 @@ TEST_F(LoaderTest, PbtxtFormat) { CheckSavedModelBundle(export_dir, bundle); } -TEST_F(LoaderTest, SingleShardVariables) { - SavedModelBundle bundle; - SessionOptions session_options; - RunOptions run_options; - - const string export_dir = - io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataPb); - TF_ASSERT_OK(LoadSavedModel(session_options, run_options, export_dir, - {kSavedModelTagServe}, &bundle)); - CheckSavedModelBundle(export_dir, bundle); -} - TEST_F(LoaderTest, InvalidExportPath) { SavedModelBundle bundle; RunOptions run_options; diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two/assets/foo.txt b/tensorflow/cc/saved_model/testdata/half_plus_two/assets/foo.txt deleted file mode 100644 index f9ff0366880..00000000000 --- a/tensorflow/cc/saved_model/testdata/half_plus_two/assets/foo.txt +++ /dev/null @@ -1 +0,0 @@ -asset-file-contents \ No newline at end of file diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two/saved_model.pb b/tensorflow/cc/saved_model/testdata/half_plus_two/saved_model.pb deleted file mode 100644 index e894f9b1011..00000000000 Binary files a/tensorflow/cc/saved_model/testdata/half_plus_two/saved_model.pb and /dev/null differ diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two/variables/variables.data-00000-of-00001 b/tensorflow/cc/saved_model/testdata/half_plus_two/variables/variables.data-00000-of-00001 deleted file mode 100644 index 20bc7d454dd..00000000000 Binary files a/tensorflow/cc/saved_model/testdata/half_plus_two/variables/variables.data-00000-of-00001 and /dev/null differ diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two/variables/variables.index b/tensorflow/cc/saved_model/testdata/half_plus_two/variables/variables.index deleted file mode 100644 index e7df518f5b5..00000000000 Binary files a/tensorflow/cc/saved_model/testdata/half_plus_two/variables/variables.index and /dev/null differ diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/saved_model.pbtxt b/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/saved_model.pbtxt index 2e714d262db..693262eb4d7 100644 --- a/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/saved_model.pbtxt +++ b/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/saved_model.pbtxt @@ -102,6 +102,24 @@ meta_graphs { type: "type" } } + op { + name: "MergeV2Checkpoints" + input_arg { + name: "checkpoint_prefixes" + type: DT_STRING + } + input_arg { + name: "destination_prefix" + type: DT_STRING + } + attr { + name: "delete_old_dirs" + type: "bool" + default_value { + b: true + } + } + } op { name: "Mul" input_arg { @@ -140,6 +158,35 @@ meta_graphs { op { name: "NoOp" } + op { + name: "Pack" + input_arg { + name: "values" + type_attr: "T" + number_attr: "N" + } + output_arg { + name: "output" + type_attr: "T" + } + attr { + name: "N" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "T" + type: "type" + } + attr { + name: "axis" + type: "int" + default_value { + i: 0 + } + } + } op { name: "ParseExample" input_arg { @@ -267,9 +314,9 @@ meta_graphs { } } op { - name: "SaveSlices" + name: "SaveV2" input_arg { - name: "filename" + name: "prefix" type: DT_STRING } input_arg { @@ -277,15 +324,15 @@ meta_graphs { type: DT_STRING } input_arg { - name: "shapes_and_slices" + name: "shape_and_slices" type: DT_STRING } input_arg { - name: "data" - type_list_attr: "T" + name: "tensors" + type_list_attr: "dtypes" } attr { - name: "T" + name: "dtypes" type: "list(type)" has_minimum: true minimum: 1 @@ -311,19 +358,29 @@ meta_graphs { } } op { - name: "ShardedFilespec" + name: "StringJoin" input_arg { - name: "basename" + name: "inputs" type: DT_STRING - } - input_arg { - name: "num_shards" - type: DT_INT32 + number_attr: "N" } output_arg { - name: "filename" + name: "output" type: DT_STRING } + attr { + name: "N" + type: "int" + has_minimum: true + minimum: 1 + } + attr { + name: "separator" + type: "string" + default_value { + s: "" + } + } } op { name: "Variable" @@ -899,6 +956,244 @@ meta_graphs { } } } + node { + name: "Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "/tmp/original/export/assets/foo.txt" + } + } + } + } + node { + name: "filename_tensor/initial_value" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "foo.txt" + } + } + } + } + node { + name: "filename_tensor" + op: "Variable" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "shape" + value { + shape { + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } + } + node { + name: "filename_tensor/Assign" + op: "Assign" + input: "filename_tensor" + input: "filename_tensor/initial_value" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_class" + value { + list { + s: "loc:@filename_tensor" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } + } + node { + name: "filename_tensor/read" + op: "Identity" + input: "filename_tensor" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_class" + value { + list { + s: "loc:@filename_tensor" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + node { + name: "Assign/value" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "foo.txt" + } + } + } + } + node { + name: "Assign" + op: "Assign" + input: "filename_tensor" + input: "Assign/value" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_class" + value { + list { + s: "loc:@filename_tensor" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } + } node { name: "Identity" op: "Identity" @@ -931,6 +1226,11 @@ meta_graphs { input: "^a/Assign" input: "^b/Assign" } + node { + name: "group_deps" + op: "NoOp" + input: "^Assign" + } node { name: "save/Const" op: "Const" @@ -961,6 +1261,63 @@ meta_graphs { } } } + node { + name: "save/StringJoin/inputs_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "_temp_ff2bd25218b646ea9ed224eecdce5e79/part" + } + } + } + } + node { + name: "save/StringJoin" + op: "StringJoin" + input: "save/Const" + input: "save/StringJoin/inputs_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "separator" + value { + s: "" + } + } + } node { name: "save/num_shards" op: "Const" @@ -1024,7 +1381,7 @@ meta_graphs { node { name: "save/ShardedFilename" op: "ShardedFilename" - input: "save/Const" + input: "save/StringJoin" input: "save/ShardedFilename/shard" input: "save/num_shards" attr { @@ -1038,7 +1395,7 @@ meta_graphs { } } node { - name: "save/save/tensor_names" + name: "save/SaveV2/tensor_names" op: "Const" attr { key: "_output_shapes" @@ -1075,7 +1432,7 @@ meta_graphs { } } node { - name: "save/save/shapes_and_slices" + name: "save/SaveV2/shape_and_slices" op: "Const" attr { key: "_output_shapes" @@ -1112,15 +1469,15 @@ meta_graphs { } } node { - name: "save/save" - op: "SaveSlices" + name: "save/SaveV2" + op: "SaveV2" input: "save/ShardedFilename" - input: "save/save/tensor_names" - input: "save/save/shapes_and_slices" + input: "save/SaveV2/tensor_names" + input: "save/SaveV2/shape_and_slices" input: "a" input: "b" attr { - key: "T" + key: "dtypes" value { list { type: DT_FLOAT @@ -1133,7 +1490,7 @@ meta_graphs { name: "save/control_dependency" op: "Identity" input: "save/ShardedFilename" - input: "^save/save" + input: "^save/SaveV2" attr { key: "T" value { @@ -1159,11 +1516,65 @@ meta_graphs { } } node { - name: "save/ShardedFilespec" - op: "ShardedFilespec" - input: "save/Const" - input: "save/num_shards" + name: "save/MergeV2Checkpoints/checkpoint_prefixes" + op: "Pack" + input: "save/ShardedFilename" input: "^save/control_dependency" + attr { + key: "N" + value { + i: 1 + } + } + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "axis" + value { + i: 0 + } + } + } + node { + name: "save/MergeV2Checkpoints" + op: "MergeV2Checkpoints" + input: "save/MergeV2Checkpoints/checkpoint_prefixes" + input: "save/Const" + attr { + key: "delete_old_dirs" + value { + b: true + } + } + } + node { + name: "save/Identity" + op: "Identity" + input: "save/Const" + input: "^save/control_dependency" + input: "^save/MergeV2Checkpoints" + attr { + key: "T" + value { + type: DT_STRING + } + } attr { key: "_output_shapes" value { @@ -1467,12 +1878,39 @@ meta_graphs { } saver_def { filename_tensor_name: "save/Const:0" - save_tensor_name: "save/ShardedFilespec:0" + save_tensor_name: "save/Identity:0" restore_op_name: "save/restore_all" max_to_keep: 5 sharded: true keep_checkpoint_every_n_hours: 10000.0 - version: V1 + version: V2 + } + collection_def { + key: "asset_filepaths" + value { + node_list { + value: "Const:0" + } + } + } + collection_def { + key: "legacy_init_op" + value { + node_list { + value: "group_deps" + } + } + } + collection_def { + key: "saved_model_assets" + value { + any_list { + value { + type_url: "type.googleapis.com/tensorflow.AssetFileDef" + value: "\n\t\n\007Const:0\022\007foo.txt" + } + } + } } collection_def { key: "trainable_variables" diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/saved_model.pb b/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/saved_model.pb index e894f9b1011..0df49f21685 100644 Binary files a/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/saved_model.pb and b/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/saved_model.pb differ diff --git a/tensorflow/cc/training/queue_runner.cc b/tensorflow/cc/training/queue_runner.cc index 81f49c5dcfc..79d306f3676 100644 --- a/tensorflow/cc/training/queue_runner.cc +++ b/tensorflow/cc/training/queue_runner.cc @@ -54,7 +54,8 @@ Status QueueRunner::Init(const QueueRunnerDef& queue_runner_def) { } QueueRunner::~QueueRunner() { - should_stop_ = true; + // Cannot run Stop() here because the session might already be closed or + // destroyed. Join(); } @@ -72,6 +73,15 @@ Status QueueRunner::Start(Session* sess) { return Status::OK(); } +Status QueueRunner::Stop(Session* sess) { + should_stop_ = true; + if (cancel_op_name_.empty()) { + return Status::OK(); + } else { + return sess->Run({}, {}, {cancel_op_name_}, nullptr); + } +} + Status QueueRunner::Join() { thread_pool_.reset(); started_ = false; @@ -80,9 +90,8 @@ Status QueueRunner::Join() { void QueueRunner::Run(Session* sess, const string& enqueue_op) { bool decremented = false; - while (!should_stop_) { - std::vector outputs; - auto status = sess->Run({}, {}, {enqueue_op}, &outputs); + while (!should_stop_.load()) { + auto status = sess->Run({}, {}, {enqueue_op}, nullptr); if (status.ok()) { continue; } else if (queue_closed_exception_types_.count( @@ -94,19 +103,25 @@ void QueueRunner::Run(Session* sess, const string& enqueue_op) { // If all enqueue ops have finished, run the close op. if (runs_ == 0 && !close_op_name_.empty()) { - std::vector outputs; - auto s = sess->Run({}, {}, {close_op_name_}, &outputs); - if (!s.ok()) { - status_ = status; + auto s = sess->Run({}, {}, {close_op_name_}, nullptr); + if (!s.ok() && status_.ok() && + queue_closed_exception_types_.count(static_cast(s.code())) == + 0) { + status_ = s; } } } else { - mutex_lock l(mu_); - should_stop_ = true; - // Only record the first failure status. - if (status_.ok()) { - status_ = status; + { + mutex_lock l(mu_); + should_stop_ = true; + // Only record the first failure status. + if (status_.ok()) { + status_ = status; + } } + // Stop the queue runner immediately to propagate the error to + // subsequent queues. + Stop(sess); } } diff --git a/tensorflow/cc/training/queue_runner.h b/tensorflow/cc/training/queue_runner.h index 7eeab8bd45a..c3fe4026efe 100644 --- a/tensorflow/cc/training/queue_runner.h +++ b/tensorflow/cc/training/queue_runner.h @@ -20,6 +20,7 @@ limitations under the License. #include #include #include + #include "tensorflow/core/lib/core/error_codes.pb.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/threadpool.h" @@ -49,6 +50,9 @@ class QueueRunner { // Starts the queue runner with the given session. Status Start(Session* sess); + // Requests to stop and runs the cancel op. + Status Stop(Session* sess); + // Joins all the threads. Returns okay if all threads run successfully; // otherwise returns the first captured failure status. Status Join(); @@ -60,14 +64,14 @@ class QueueRunner { string queue_name_; std::vector enqueue_op_names_; string close_op_name_; - // The cancel op is not being called currently. string cancel_op_name_; // code::Code casted to int to avoid a hash function. std::unordered_set queue_closed_exception_types_; std::unique_ptr thread_pool_; - bool should_stop_; + std::atomic should_stop_; std::atomic started_; + condition_variable wait_to_close_; mutex mu_; // TODO(yuefengz): implement c++ coordinator. int runs_ = 0; diff --git a/tensorflow/cc/training/queue_runner_test.cc b/tensorflow/cc/training/queue_runner_test.cc index 8719677274a..29165778c5c 100644 --- a/tensorflow/cc/training/queue_runner_test.cc +++ b/tensorflow/cc/training/queue_runner_test.cc @@ -14,8 +14,10 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/cc/training/queue_runner.h" + #include #include + #include "tensorflow/cc/framework/scope.h" #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/framework/graph.pb.h" @@ -23,39 +25,42 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/lib/core/error_codes.pb.h" +#include "tensorflow/core/lib/core/notification.h" #include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/protobuf/queue_runner.pb.h" #include "tensorflow/core/public/session.h" +namespace tensorflow { namespace { -using ::tensorflow::DataType; -using ::tensorflow::error::Code; -using ::tensorflow::GraphDef; -using ::tensorflow::ops::Assign; -using ::tensorflow::ops::Const; -using ::tensorflow::ops::CountUpTo; -using ::tensorflow::ops::FIFOQueue; -using ::tensorflow::ops::InputList; -using ::tensorflow::ops::QueueClose; -using ::tensorflow::ops::QueueDequeue; -using ::tensorflow::ops::QueueEnqueue; -using ::tensorflow::ops::Square; -using ::tensorflow::ops::Variable; -using ::tensorflow::QueueRunner; -using ::tensorflow::QueueRunnerDef; -using ::tensorflow::Scope; -using ::tensorflow::Session; -using ::tensorflow::SessionOptions; -using ::tensorflow::Tensor; -using ::tensorflow::TensorShape; +using error::Code; +using ops::Assign; +using ops::Const; +using ops::CountUpTo; +using ops::FIFOQueue; +using ops::QueueClose; +using ops::QueueDequeue; +using ops::QueueEnqueue; +using ops::Square; +using ops::Variable; constexpr char kAssignOpName[] = "assign"; +constexpr char kCancelOp0[] = "cancel0"; +constexpr char kCancelOp1[] = "cancel1"; +constexpr char kCloseOp0[] = "close0"; +constexpr char kCloseOp1[] = "close1"; constexpr char kCountUpToOpName[] = "count"; +constexpr char kDequeueOp0[] = "dequeue0"; +constexpr char kDequeueOp1[] = "dequeue1"; +constexpr char kEnqueueOp0[] = "enqueue0"; +constexpr char kEnqueueOp1[] = "enqueue1"; constexpr char kIllegalOpName1[] = "would fail"; constexpr char kIllegalOpName2[] = "fail again"; constexpr char kQueueName[] = "unit_test"; +constexpr char kQueueName0[] = "q0"; +constexpr char kQueueName1[] = "q1"; constexpr char kSquareOpName[] = "square"; constexpr char kVarOpName[] = "var"; @@ -75,7 +80,7 @@ GraphDef BuildSimpleGraph() { QueueRunnerDef BuildQueueRunnerDef( const std::string& queue_name, const std::vector& enqueue_ops, - const std::string& close_op, + const std::string& close_op, const std::string& cancel_op, const std::vector& queue_closed_error_codes) { QueueRunnerDef queue_runner_def; *queue_runner_def.mutable_queue_name() = kQueueName; @@ -83,6 +88,7 @@ QueueRunnerDef BuildQueueRunnerDef( *queue_runner_def.mutable_enqueue_op_name()->Add() = enqueue_op; } *queue_runner_def.mutable_close_op_name() = close_op; + *queue_runner_def.mutable_cancel_op_name() = cancel_op; for (const auto& error_code : queue_closed_error_codes) { *queue_runner_def.mutable_queue_closed_exception_types()->Add() = error_code; @@ -96,8 +102,7 @@ std::unique_ptr BuildSessionAndInitVariable( std::unique_ptr session(NewSession(options)); TF_CHECK_OK(session->Create(graph_def)); - std::vector nothing; - TF_CHECK_OK(session->Run({}, {}, {kAssignOpName}, ¬hing)); + TF_CHECK_OK(session->Run({}, {}, {kAssignOpName}, nullptr)); return session; } @@ -106,7 +111,7 @@ TEST(QueueRunnerTest, BasicTest) { auto session = BuildSessionAndInitVariable(graph_def); QueueRunnerDef queue_runner_def = BuildQueueRunnerDef( - kQueueName, {kCountUpToOpName, kCountUpToOpName}, kSquareOpName, {}); + kQueueName, {kCountUpToOpName, kCountUpToOpName}, kSquareOpName, "", {}); QueueRunner qr(queue_runner_def); qr.Start(session.get()); @@ -123,7 +128,7 @@ TEST(QueueRunnerTest, QueueClosedCode) { auto session = BuildSessionAndInitVariable(graph_def); QueueRunnerDef queue_runner_def = - BuildQueueRunnerDef(kQueueName, {kCountUpToOpName}, kSquareOpName, + BuildQueueRunnerDef(kQueueName, {kCountUpToOpName}, kSquareOpName, "", {Code::OUT_OF_RANGE, Code::CANCELLED}); QueueRunner qr(queue_runner_def); @@ -141,60 +146,167 @@ TEST(QueueRunnerDef, CatchErrorInJoin) { auto session = BuildSessionAndInitVariable(graph_def); QueueRunnerDef queue_runner_def = BuildQueueRunnerDef( - kQueueName, {kIllegalOpName1, kIllegalOpName2}, kCountUpToOpName, {}); + kQueueName, {kIllegalOpName1, kIllegalOpName2}, kCountUpToOpName, "", {}); QueueRunner qr(queue_runner_def); qr.Start(session.get()); EXPECT_EQ(qr.Join().code(), Code::NOT_FOUND); } -TEST(QueueRunnerTest, RealEnqueueDequeue) { +GraphDef BuildDoubleQueueGraph() { Scope root = Scope::NewRootScope(); - auto q0 = FIFOQueue(root.WithOpName("q0"), {DataType::DT_INT32}); + auto q0 = FIFOQueue(root.WithOpName(kQueueName0), {DataType::DT_INT32}); auto ten = Const(root, 10); - auto enqueue0 = QueueEnqueue(root.WithOpName("enqueue0"), q0, {ten}); - auto close0 = QueueClose(root.WithOpName("close0"), q0); - auto q1 = FIFOQueue(root.WithOpName("q1"), {DataType::DT_INT32}); + auto enqueue0 = QueueEnqueue(root.WithOpName(kEnqueueOp0), q0, {ten}); + auto close0 = QueueClose(root.WithOpName(kCloseOp0), q0); + auto cancel0 = QueueClose(root.WithOpName(kCancelOp0), q0, + QueueClose::CancelPendingEnqueues(true)); + auto q1 = FIFOQueue(root.WithOpName(kQueueName1), {DataType::DT_INT32}); auto dequeue0 = - QueueDequeue(root.WithOpName("dequeue0"), q0, {DataType::DT_INT32}); - auto enqueue1 = QueueEnqueue(root.WithOpName("enqueue1"), q1, {dequeue0[0]}); + QueueDequeue(root.WithOpName(kDequeueOp0), q0, {DataType::DT_INT32}); + auto enqueue1 = QueueEnqueue(root.WithOpName(kEnqueueOp1), q1, {dequeue0[0]}); auto dequeue1 = - QueueDequeue(root.WithOpName("dequeue1"), q1, {DataType::DT_INT32}); - auto close1 = QueueClose(root.WithOpName("close1"), q1); + QueueDequeue(root.WithOpName(kDequeueOp1), q1, {DataType::DT_INT32}); + auto close1 = QueueClose(root.WithOpName(kCloseOp1), q1); + auto cancel1 = QueueClose(root.WithOpName(kCancelOp1), q1, + QueueClose::CancelPendingEnqueues(true)); GraphDef graph_def; TF_EXPECT_OK(root.ToGraphDef(&graph_def)); + return graph_def; +} + +TEST(QueueRunnerTest, RealEnqueueDequeue) { + auto graph_def = BuildDoubleQueueGraph(); SessionOptions options; std::unique_ptr session(NewSession(options)); TF_CHECK_OK(session->Create(graph_def)); QueueRunnerDef queue_runner_def = - BuildQueueRunnerDef(kQueueName, {"enqueue1"}, "close1", {}); + BuildQueueRunnerDef(kQueueName, {kEnqueueOp1}, kCloseOp1, "", {}); QueueRunner qr; qr.Init(queue_runner_def); TF_CHECK_OK(qr.Start(session.get())); - std::vector outputs; - TF_EXPECT_OK(session->Run({}, {}, {"enqueue0"}, &outputs)); - TF_EXPECT_OK(session->Run({}, {}, {"enqueue0"}, &outputs)); - TF_EXPECT_OK(session->Run({}, {}, {"close0"}, &outputs)); + TF_EXPECT_OK(session->Run({}, {}, {kEnqueueOp0}, nullptr)); + TF_EXPECT_OK(session->Run({}, {}, {kEnqueueOp0}, nullptr)); + // Closing queue 0 would also close the queue runner. + TF_EXPECT_OK(session->Run({}, {}, {kCloseOp0}, nullptr)); TF_EXPECT_OK(qr.Join()); std::vector dq1; - TF_EXPECT_OK(session->Run({}, {"dequeue1"}, {}, &dq1)); + TF_EXPECT_OK(session->Run({}, {kDequeueOp1}, {}, &dq1)); EXPECT_EQ(*dq1[0].scalar().data(), 10); std::vector dq2; - TF_EXPECT_OK(session->Run({}, {"dequeue1"}, {}, &dq2)); + TF_EXPECT_OK(session->Run({}, {kDequeueOp1}, {}, &dq2)); EXPECT_EQ(*dq2[0].scalar().data(), 10); - EXPECT_EQ(session->Run({}, {"dequeue1"}, {}, &dq1).code(), + EXPECT_EQ(session->Run({}, {kDequeueOp1}, {}, nullptr).code(), Code::OUT_OF_RANGE); } +void JoinThread(QueueRunner* queue_runner, bool* join_succeeded, + Notification* join_done) { + EXPECT_EQ(queue_runner->Join().code(), Code::CANCELLED); + *join_succeeded = true; + join_done->Notify(); +} + +TEST(QueueRunnerTest, SessionCloseCancelPendingEnqueue) { + auto graph_def = BuildDoubleQueueGraph(); + + SessionOptions options; + std::unique_ptr session(NewSession(options)); + TF_CHECK_OK(session->Create(graph_def)); + + QueueRunnerDef queue_runner_def = BuildQueueRunnerDef( + kQueueName1, {kEnqueueOp1}, kCloseOp1, kCancelOp1, {}); + QueueRunner qr; + qr.Init(queue_runner_def); + TF_CHECK_OK(qr.Start(session.get())); + + TF_EXPECT_OK(session->Run({}, {}, {kEnqueueOp0}, nullptr)); + + std::vector dq1; + TF_EXPECT_OK(session->Run({}, {kDequeueOp1}, {}, &dq1)); + EXPECT_EQ(*dq1[0].scalar().data(), 10); + + // The expected behavior is the QueueRunner::Join() call is blocked until + // Session::Close() is called. + bool join_succeeded = false; + Notification join_done; + Env::Default()->SchedClosure( + std::bind(&JoinThread, &qr, &join_succeeded, &join_done)); + + Env::Default()->SleepForMicroseconds(10000000); + EXPECT_EQ(join_succeeded, false); + + // Closing the session is required to cancel pending enqueue nodes. + TF_EXPECT_OK(session->Close()); + + join_done.WaitForNotification(); + EXPECT_EQ(join_succeeded, true); +} + +TEST(QueueRunnerTest, Stop) { + auto graph_def = BuildDoubleQueueGraph(); + + SessionOptions options; + std::unique_ptr session(NewSession(options)); + TF_CHECK_OK(session->Create(graph_def)); + + QueueRunnerDef queue_runner_def = BuildQueueRunnerDef( + kQueueName1, {kEnqueueOp1}, kCloseOp1, kCancelOp1, {}); + QueueRunner qr; + qr.Init(queue_runner_def); + TF_CHECK_OK(qr.Start(session.get())); + + TF_EXPECT_OK(qr.Stop(session.get())); + + TF_EXPECT_OK(session->Run({}, {}, {kEnqueueOp0}, nullptr)); + + EXPECT_EQ(session->Run({}, {kDequeueOp1}, {}, nullptr).code(), + Code::OUT_OF_RANGE); + + // qr is already stopped + TF_EXPECT_OK(qr.Join()); +} + +TEST(QueueRunnerTest, StopTwoQueues) { + auto graph_def = BuildDoubleQueueGraph(); + + SessionOptions options; + std::unique_ptr session(NewSession(options)); + TF_CHECK_OK(session->Create(graph_def)); + + QueueRunnerDef queue_runner0 = + BuildQueueRunnerDef(kQueueName0, {kEnqueueOp0}, kCloseOp0, kCancelOp0, + {Code::OUT_OF_RANGE, Code::CANCELLED}); + QueueRunnerDef queue_runner1 = + BuildQueueRunnerDef(kQueueName1, {kEnqueueOp1}, kCloseOp1, kCancelOp1, + {Code::OUT_OF_RANGE, Code::CANCELLED}); + QueueRunner qr0; + qr0.Init(queue_runner0); + TF_CHECK_OK(qr0.Start(session.get())); + QueueRunner qr1; + qr1.Init(queue_runner1); + TF_CHECK_OK(qr1.Start(session.get())); + + std::vector dq; + TF_EXPECT_OK(session->Run({}, {kDequeueOp1}, {}, &dq)); + EXPECT_EQ(*dq[0].scalar().data(), 10); + + TF_EXPECT_OK(qr0.Stop(session.get())); + TF_EXPECT_OK(qr1.Stop(session.get())); + + TF_EXPECT_OK(qr0.Join()); + TF_EXPECT_OK(qr1.Join()); +} + TEST(QueueRunnerTest, EmptyEnqueueOps) { QueueRunnerDef queue_runner_def = - BuildQueueRunnerDef(kQueueName, {}, kCountUpToOpName, {}); + BuildQueueRunnerDef(kQueueName, {}, kCountUpToOpName, "", {}); QueueRunner qr; EXPECT_EQ(qr.Init(queue_runner_def).code(), Code::INVALID_ARGUMENT); @@ -203,8 +315,8 @@ TEST(QueueRunnerTest, EmptyEnqueueOps) { TEST(QueueRunnerTest, InitAfterStart) { GraphDef graph_def = BuildSimpleGraph(); auto session = BuildSessionAndInitVariable(graph_def); - QueueRunnerDef queue_runner_def = - BuildQueueRunnerDef(kQueueName, {kCountUpToOpName}, kCountUpToOpName, {}); + QueueRunnerDef queue_runner_def = BuildQueueRunnerDef( + kQueueName, {kCountUpToOpName}, kCountUpToOpName, "", {}); QueueRunner qr; TF_EXPECT_OK(qr.Init(queue_runner_def)); @@ -213,3 +325,4 @@ TEST(QueueRunnerTest, InitAfterStart) { } } // namespace +} // namespace tensorflow diff --git a/tensorflow/contrib/cmake/external/gemmlowp.cmake b/tensorflow/contrib/cmake/external/gemmlowp.cmake index 11868d44dd6..024c064cf43 100644 --- a/tensorflow/contrib/cmake/external/gemmlowp.cmake +++ b/tensorflow/contrib/cmake/external/gemmlowp.cmake @@ -1,7 +1,7 @@ include (ExternalProject) -set(gemmlowp_URL http://github.com/google/gemmlowp/archive/c0bacf11fb509a2cbe15a97362a2df067ffd57a2.tar.gz) -set(gemmlowp_HASH SHA256=dc64a38f9927db18748d9024987c9b102115e25bc2be4b76aa8e422b8f83d882) +set(gemmlowp_URL http://github.com/google/gemmlowp/archive/a6f29d8ac48d63293f845f2253eccbf86bc28321.tar.gz) +set(gemmlowp_HASH SHA256=75d40ea8e68b0d1644f052fffe8f14a410b2a73d40ccb859a95c0578d194ec26) set(gemmlowp_BUILD ${CMAKE_BINARY_DIR}/gemmlowp/src/gemmlowp) set(gemmlowp_INCLUDE_DIR ${CMAKE_BINARY_DIR}/gemmlowp/src/gemmlowp) diff --git a/tensorflow/contrib/cmake/patches/gif/unistd.h b/tensorflow/contrib/cmake/patches/gif/unistd.h index e69de29bb2d..cd52ce31d4d 100644 --- a/tensorflow/contrib/cmake/patches/gif/unistd.h +++ b/tensorflow/contrib/cmake/patches/gif/unistd.h @@ -0,0 +1,14 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake index 91ad74d9e76..8b3a2d75f48 100644 --- a/tensorflow/contrib/cmake/tf_core_kernels.cmake +++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake @@ -89,6 +89,7 @@ if(WIN32) "${tensorflow_source_dir}/tensorflow/core/kernels/fact_op.cc" "${tensorflow_source_dir}/tensorflow/core/kernels/immutable_constant_op.cc" "${tensorflow_source_dir}/tensorflow/core/kernels/immutable_constant_op.h" + "${tensorflow_source_dir}/tensorflow/core/kernels/meta_support.*" "${tensorflow_source_dir}/tensorflow/core/kernels/sparse_matmul_op.cc" "${tensorflow_source_dir}/tensorflow/core/kernels/sparse_matmul_op.h" "${tensorflow_source_dir}/tensorflow/core/kernels/*quantiz*.h" diff --git a/tensorflow/contrib/cmake/tf_tools.cmake b/tensorflow/contrib/cmake/tf_tools.cmake index 91776fd5c82..4b3b93f890f 100644 --- a/tensorflow/contrib/cmake/tf_tools.cmake +++ b/tensorflow/contrib/cmake/tf_tools.cmake @@ -13,7 +13,10 @@ add_executable(${proto_text} $ ) -target_link_libraries(${proto_text} PUBLIC ${tensorflow_EXTERNAL_LIBRARIES}) +target_link_libraries(${proto_text} PUBLIC + ${tensorflow_EXTERNAL_LIBRARIES} + tf_protos_cc +) add_dependencies(${proto_text} tf_core_lib diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index 6df6dd5c248..850cbf8d26a 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -36,7 +36,7 @@ cuda_py_tests( cuda_py_tests( name = "operator_pd_cholesky_test", - size = "small", + size = "medium", srcs = ["python/kernel_tests/operator_pd_cholesky_test.py"], additional_deps = [ ":distributions_py", @@ -60,7 +60,7 @@ cuda_py_tests( cuda_py_tests( name = "operator_pd_full_test", - size = "small", + size = "medium", srcs = ["python/kernel_tests/operator_pd_full_test.py"], additional_deps = [ ":distributions_py", @@ -72,7 +72,7 @@ cuda_py_tests( cuda_py_tests( name = "operator_pd_identity_test", - size = "small", + size = "medium", srcs = ["python/kernel_tests/operator_pd_identity_test.py"], additional_deps = [ ":distributions_py", diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijector_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijector_test.py index f42406e90bc..7356511a127 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijector_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijector_test.py @@ -614,6 +614,67 @@ class SigmoidCenteredBijectorTest(tf.test.TestCase): atol=0., rtol=1e-7) +class CholeskyOuterProductBijectorTest(tf.test.TestCase): + """Tests the correctness of the Y = X * X^T transformation.""" + + def testBijectorMatrix(self): + with self.test_session(): + bijector = bijectors.CholeskyOuterProduct(event_ndims=2, + validate_args=True) + self.assertEqual("cholesky_outer_product", bijector.name) + x = [[[1., 0], + [2, 1]], + [[math.sqrt(2.), 0], + [math.sqrt(8.), 1]]] + y = np.matmul(x, np.transpose(x, axes=(0, 2, 1))) + # Fairly easy to compute differentials since we have 2x2. + dx_dy = [[[2.*1, 0, 0], + [2, 1, 0], + [0, 2*2, 2*1]], + [[2*math.sqrt(2.), 0, 0], + [math.sqrt(8.), math.sqrt(2.), 0], + [0, 2*math.sqrt(8.), 2*1]]] + ildj = -np.sum( + np.log(np.asarray(dx_dy).diagonal(offset=0, axis1=1, axis2=2)), + axis=1) + self.assertAllEqual((2, 2, 2), bijector.forward(x).get_shape()) + self.assertAllEqual((2, 2, 2), bijector.inverse(y).get_shape()) + self.assertAllClose(y, bijector.forward(x).eval()) + self.assertAllClose(x, bijector.inverse(y).eval()) + self.assertAllClose(ildj, + bijector.inverse_log_det_jacobian(y).eval(), + atol=0., rtol=1e-7) + self.assertAllClose(-bijector.inverse_log_det_jacobian(y).eval(), + bijector.forward_log_det_jacobian(x).eval(), + atol=0., rtol=1e-7) + + def testBijectorScalar(self): + with self.test_session(): + bijector = bijectors.CholeskyOuterProduct(event_ndims=0, + validate_args=True) + self.assertEqual("cholesky_outer_product", bijector.name) + x = [[[1., 5], + [2, 1]], + [[math.sqrt(2.), 3], + [math.sqrt(8.), 1]]] + y = np.square(x) + ildj = -math.log(2.) - np.log(x) + self.assertAllClose(y, bijector.forward(x).eval()) + self.assertAllClose(x, bijector.inverse(y).eval()) + self.assertAllClose(ildj, + bijector.inverse_log_det_jacobian(y).eval(), + atol=0., rtol=1e-7) + self.assertAllClose(-bijector.inverse_log_det_jacobian(y).eval(), + bijector.forward_log_det_jacobian(x).eval(), + atol=0., rtol=1e-7) + + def testScalarCongruency(self): + with self.test_session(): + bijector = bijectors.CholeskyOuterProduct(event_ndims=0, + validate_args=True) + assert_scalar_congruency(bijector, lower_x=1e-3, upper_x=1.5, rtol=0.05) + + class ChainBijectorTest(tf.test.TestCase): """Tests the correctness of the Y = Chain(bij1, bij2, bij3) transformation.""" diff --git a/tensorflow/contrib/distributions/python/kernel_tests/distribution_test.py b/tensorflow/contrib/distributions/python/kernel_tests/distribution_test.py index e02b6439186..f4da88e5350 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/distribution_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/distribution_test.py @@ -41,11 +41,34 @@ class DistributionTest(tf.test.TestCase): for cls in classes: for sample_shape in sample_shapes: param_shapes = cls.param_shapes(sample_shape) - print(param_shapes) params = dict([(name, tf.random_normal(shape)) for name, shape in param_shapes.items()]) dist = cls(**params) self.assertAllEqual(sample_shape, tf.shape(dist.sample()).eval()) + dist_copy = dist.copy() + self.assertAllEqual(sample_shape, + tf.shape(dist_copy.sample()).eval()) + self.assertEqual(dist.parameters, dist_copy.parameters) + + def testCopyExtraArgs(self): + with self.test_session(): + # Note: we cannot easily test all distributions since each requires + # different initialization arguments. We therefore spot test a few. + normal = dists.Normal(mu=1., sigma=2., validate_args=True) + self.assertEqual(normal.parameters, normal.copy().parameters) + wishart = dists.WishartFull(df=2, scale=[[1., 2], [2, 5]], + validate_args=True) + self.assertEqual(wishart.parameters, wishart.copy().parameters) + + def testCopyOverride(self): + with self.test_session(): + normal = dists.Normal(mu=1., sigma=2., validate_args=True) + normal_copy = normal.copy(validate_args=False) + base_params = normal.parameters.copy() + copy_params = normal.copy(validate_args=False).parameters.copy() + self.assertNotEqual(base_params.pop("validate_args"), + copy_params.pop("validate_args")) + self.assertEqual(base_params, copy_params) if __name__ == '__main__': diff --git a/tensorflow/contrib/distributions/python/ops/bijector.py b/tensorflow/contrib/distributions/python/ops/bijector.py index 054facb9a24..2472c12d3f3 100644 --- a/tensorflow/contrib/distributions/python/ops/bijector.py +++ b/tensorflow/contrib/distributions/python/ops/bijector.py @@ -14,7 +14,7 @@ # ============================================================================== r"""Bijector Ops. -An API for reversible (bijective) transformations of random variables. +An API for invertible, differentiable transformations of random variables. ## Background @@ -31,6 +31,7 @@ To apply a `Bijector`, use `distributions.TransformedDistribution`. @@Bijector @@Chain +@@CholeskyOuterProduct @@Exp @@Identity @@Inline @@ -46,7 +47,9 @@ from __future__ import division from __future__ import print_function import abc +import collections import contextlib +import math import re import numpy as np import six @@ -58,18 +61,112 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops +__all__ = [ + "Bijector", + "Chain", + "CholeskyOuterProduct", + "Exp", + "Identity", + "Inline", + "Invert", + "ScaleAndShift", + "SigmoidCentered", + "SoftmaxCentered", + "Softplus", +] + + +class _Mapping(collections.namedtuple("_Mapping", + ["x", "y", "ildj", "condition_kwargs"])): + """Helper class to make it easier to manage caching in `Bijector`.""" + + def __new__(cls, x=None, y=None, ildj=None, condition_kwargs=None): + """Custom __new__ so namedtuple items have defaults. + + Args: + x: `Tensor`. Forward. + y: `Tensor`. Inverse. + ildj: `Tensor`. Inverse log det Jacobian. + condition_kwargs: Python dictionary. Extra args supplied to + forward/inverse/etc functions. + + Returns: + mapping: New instance of _Mapping. + """ + return super(_Mapping, cls).__new__(cls, x, y, ildj, condition_kwargs) + + @property + def x_key(self): + """Returns key used for caching Y=g(X).""" + return (self.x,) + self._deep_tuple(tuple(sorted( + self.condition_kwargs.items()))) + + @property + def y_key(self): + """Returns key used for caching X=g^{-1}(Y).""" + return (self.y,) + self._deep_tuple(tuple(sorted( + self.condition_kwargs.items()))) + + def merge(self, x=None, y=None, ildj=None, + condition_kwargs=None, mapping=None): + """Returns new _Mapping with args merged with self. + + Args: + x: `Tensor`. Forward. + y: `Tensor`. Inverse. + ildj: `Tensor`. Inverse log det Jacobian. + condition_kwargs: Python dictionary. Extra args supplied to + forward/inverse/etc functions. + mapping: Instance of _Mapping to merge. Can only be specified if no other + arg is specified. + + Returns: + mapping: New instance of `_Mapping` which has inputs merged with self. + + Raises: + ValueError: if mapping and any other arg is not `None`. + """ + if mapping is None: + mapping = _Mapping(x=x, y=y, ildj=ildj, + condition_kwargs=condition_kwargs) + elif not all([arg is None for arg in [x, y, ildj, condition_kwargs]]): + raise ValueError("Cannot specify mapping and individual args.") + return _Mapping( + x=self._merge(self.x, mapping.x), + y=self._merge(self.y, mapping.y), + ildj=self._merge(self.ildj, mapping.ildj), + condition_kwargs=self._merge(self.condition_kwargs, + mapping.condition_kwargs)) + + def _merge(self, old, new): + """Helper to merge which handles merging one value.""" + if old is None: + return new + elif new is not None and old != new: + raise ValueError("Incompatible values: %s != %s" % (old, new)) + return old + + def _deep_tuple(self, x): + """Converts lists of lists to tuples of tuples.""" + return (tuple(map(self._deep_tuple, x)) + if isinstance(x, (list, tuple)) else x) + @six.add_metaclass(abc.ABCMeta) class Bijector(object): - """Interface for transforming a `Distribution` via `TransformedDistribution`. + """Interface for transforming a `Distribution` sample. - A `Bijector` implements a bijective, differentiable function by transforming - an input `Tensor`. The output `Tensor` shape is constrained by the input - `sample`, `batch`, and `event` shape. A `Bijector` is characterized by three + A `Bijector` implements a + [diffeomorphism](https://en.wikipedia.org/wiki/Diffeomorphism), i.e., a + bijective, differentiable function. A `Bijector` is used by + `TransformedDistribution` but can be generally used for transforming a + `Distribution` generated `Tensor`. A `Bijector` is characterized by three operations: 1. Forward Evaluation @@ -210,7 +307,8 @@ class Bijector(object): - The inverse `log o det o Jacobian` can be implemented as the negative of the forward `log o det o Jacobian`. This is useful if the `inverse` is implemented as a cache or the inverse Jacobian is computationally more - expensive. The following demonstrates the suggested implementation. + expensive (e.g., `CholeskyOuterProduct` `Bijector`). The following + demonstrates the suggested implementation. ```python def _inverse_and_log_det_jacobian(self, y): @@ -300,6 +398,11 @@ class Bijector(object): self._is_constant_jacobian = is_constant_jacobian self._validate_args = validate_args self._dtype = dtype + self._from_y = {} + self._from_x = {} + # Using abbreviation ildj for "inverse log det Jacobian." + # This variable is not `None` iff is_constant_jacobian is `True`. + self._constant_ildj = None if name: self._name = name else: @@ -368,7 +471,12 @@ class Bijector(object): with self._name_scope(name, [x]): x = ops.convert_to_tensor(x, name="x") self._maybe_assert_dtype(x) - return self._forward(x, **condition_kwargs) + mapping = self._lookup(x=x, condition_kwargs=condition_kwargs) + if mapping.y is not None: + return mapping.y + mapping = mapping.merge(y=self._forward(x, **condition_kwargs)) + self._cache(mapping) + return mapping.y def _inverse(self, y): raise NotImplementedError("inverse is not implemented") @@ -393,16 +501,28 @@ class Bijector(object): with self._name_scope(name, [y]): y = ops.convert_to_tensor(y, name="y") self._maybe_assert_dtype(y) + mapping = self._lookup(y=y, condition_kwargs=condition_kwargs) + if mapping.x is not None: + return mapping.x + ildj = None try: - return self._inverse(y, **condition_kwargs) + x = self._inverse(y, **condition_kwargs) except NotImplementedError as original_error: # Since _inverse was not implemented, try to see if it's implemented # by the _inverse_and_inverse_log_det_jacobian member. try: - return self._inverse_and_inverse_log_det_jacobian( - y, **condition_kwargs)[0] + x, ildj = self._inverse_and_inverse_log_det_jacobian( + y, **condition_kwargs) + if self._constant_ildj is not None: + ildj = self._constant_ildj # Use the "global" result. + elif self.is_constant_jacobian: + self._constant_ildj = ildj except NotImplementedError: raise original_error + x = x if mapping.x is None else mapping.x + mapping = mapping.merge(x=x, ildj=ildj) + self._cache(mapping) + return mapping.x def _inverse_log_det_jacobian(self, y): raise NotImplementedError("inverse_log_det_jacobian is not implemented.") @@ -430,18 +550,32 @@ class Bijector(object): `_inverse_and_inverse_log_det_jacobian` are implemented. """ with self._name_scope(name, [y]): + if self._constant_ildj is not None: + return self._constant_ildj y = ops.convert_to_tensor(y, name="y") self._maybe_assert_dtype(y) + mapping = self._lookup(y=y, condition_kwargs=condition_kwargs) + if mapping.ildj is not None: + return mapping.ildj try: - return self._inverse_log_det_jacobian(y, **condition_kwargs) + x = mapping.x + ildj = self._inverse_log_det_jacobian(y, **condition_kwargs) except NotImplementedError as original_error: # Since _inverse_log_det_jacobian was not implemented, try to see if # it's implemented by the _inverse_and_inverse_log_det_jacobian member. try: - return self._inverse_and_inverse_log_det_jacobian( - y, **condition_kwargs)[1] + x, ildj = self._inverse_and_inverse_log_det_jacobian( + y, **condition_kwargs) + if mapping.x is not None: + x = mapping.x except NotImplementedError: raise original_error + if self.is_constant_jacobian: + self._constant_ildj = ildj + x = x if mapping.x is None else mapping.x + mapping = mapping.merge(x=x, ildj=ildj) + self._cache(mapping) + return mapping.ildj def _inverse_and_inverse_log_det_jacobian(self, y): raise NotImplementedError( @@ -473,18 +607,30 @@ class Bijector(object): with self._name_scope(name, [y]): y = ops.convert_to_tensor(y, name="y") self._maybe_assert_dtype(y) + mapping = self._lookup(y=y, condition_kwargs=condition_kwargs) + if mapping.x is not None and mapping.ildj is not None: + return mapping.x, mapping.ildj try: - return self._inverse_and_inverse_log_det_jacobian( + x, ildj = self._inverse_and_inverse_log_det_jacobian( y, **condition_kwargs) except NotImplementedError as original_error: # Since _inverse_and_inverse_log_det_jacobian was not implemented, try # to see if we can separately use _inverse and # _inverse_log_det_jacobian members. try: - return (self._inverse(y, **condition_kwargs), - self._inverse_log_det_jacobian(y, **condition_kwargs)) + x = self._inverse(y, **condition_kwargs) + if self._constant_ildj is None: + ildj = self._inverse_log_det_jacobian(y, **condition_kwargs) except NotImplementedError: raise original_error + if self._constant_ildj is not None: + ildj = self._constant_ildj # Ignore any ildj we may/not have. + elif self.is_constant_jacobian: + self._constant_ildj = ildj + x = x if mapping.x is None else mapping.x + mapping = mapping.merge(x=x, ildj=ildj) + self._cache(mapping) + return mapping.x, mapping.ildj def _forward_log_det_jacobian(self, x): raise NotImplementedError( @@ -509,16 +655,29 @@ class Bijector(object): nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented. """ with self._name_scope(name, [x]): + if self._constant_ildj is not None: + # Need "-1. *" to avoid invalid-unary-operand-type linter warning. + return -1. * self._constant_ildj x = ops.convert_to_tensor(x, name="x") self._maybe_assert_dtype(x) + mapping = self._lookup(x=x, condition_kwargs=condition_kwargs) + if mapping.ildj is not None: + return -mapping.ildj + y = None try: - return self._forward_log_det_jacobian(x, **condition_kwargs) + ildj = -self._forward_log_det_jacobian(x, **condition_kwargs) except NotImplementedError as original_error: try: - y = self.inverse(x, **condition_kwargs) - return -self.inverse_log_det_jacobian(y, **condition_kwargs) + y = self.inverse(x, **condition_kwargs) if y is None else y + ildj = self.inverse_log_det_jacobian(y, **condition_kwargs) except NotImplementedError: raise original_error + if self.is_constant_jacobian: + self._constant_ildj = ildj + y = y if mapping.y is None else mapping.y + mapping = mapping.merge(y=y, ildj=ildj) + self._cache(mapping) + return -mapping.ildj @contextlib.contextmanager def _name_scope(self, name=None, values=None): @@ -534,6 +693,31 @@ class Bijector(object): raise TypeError("Input had dtype %s but expected %s." % (self.dtype, x.dtype)) + def _cache(self, mapping): + """Helper which stores mapping info in forward/inverse dicts.""" + if self._constant_ildj is not None: + # Fold in ildj if known constant Jacobian. + mapping = mapping.merge(ildj=self._constant_ildj) + # Merging from lookup is an added check that we're not overwriting anything + # which is not None. + mapping = mapping.merge(mapping=self._lookup( + mapping.x, mapping.y, mapping.condition_kwargs)) + if mapping.x is None or mapping.y is None: + ValueError("Caching expects both (x,y) to be known, i.e., not None.") + self._from_x[mapping.x_key] = mapping + self._from_y[mapping.y_key] = mapping + + def _lookup(self, x=None, y=None, condition_kwargs=None): + """Helper which retrieves mapping info from forward/inverse dicts.""" + mapping = _Mapping(x=x, y=y, condition_kwargs=condition_kwargs) + # Since _cache requires both x,y to be set, we only need to do one cache + # lookup since the mapping is always in both or neither. + if mapping.x is not None: + return self._from_x.get(mapping.x_key, mapping) + if mapping.y is not None: + return self._from_y.get(mapping.y_key, mapping) + return mapping + class Inline(Bijector): # pylint: disable=line-too-long @@ -547,7 +731,7 @@ class Inline(Bijector): inverse_fn=tf.log, inverse_log_det_jacobian_fn=( lambda y: -tf.reduce_sum(tf.log(y), reduction_indices=-1)), - name="Exp") + name="exp") ``` The above example is equivalent to the `Bijector` `Exp(event_ndims=1)`. @@ -573,8 +757,8 @@ class Inline(Bijector): log o det o jacobian of the forward transformation. is_constant_jacobian: `Boolean` indicating that the Jacobian is constant for all input arguments. - validate_args: `Boolean` indicated whether arguments should be checked for - correctness. + validate_args: `Boolean` indicating whether arguments should be checked + for correctness. name: `String`, name given to ops managed by this object. """ super(Inline, self).__init__( @@ -643,8 +827,8 @@ class Invert(Bijector): Args: bijector: Bijector instance. - validate_args: `Boolean` indicated whether arguments should be checked for - correctness. + validate_args: `Boolean` indicating whether arguments should be checked + for correctness. name: `String`, name given to ops managed by this object. """ @@ -713,8 +897,8 @@ class Chain(Bijector): Args: bijectors: Python list of bijector instances. An empty list makes this bijector equivalent to the `Identity` bijector. - validate_args: `Boolean` indicated whether arguments should be checked for - correctness. + validate_args: `Boolean` indicating whether arguments should be checked + for correctness. name: `String`, name given to ops managed by this object. Default: E.g., `Chain([Exp(), Softplus()]).name == "chain_of_exp_of_softplus"`. @@ -794,12 +978,9 @@ class Identity(Bijector): def __init__(self, validate_args=False, name="identity"): super(Identity, self).__init__( - batch_ndims=0, - event_ndims=0, is_constant_jacobian=True, validate_args=validate_args, name=name) - self._is_constant_jacobian = True def _forward(self, x): return x @@ -841,8 +1022,8 @@ class Exp(Bijector): Args: event_ndims: Scalar `int32` `Tensor` indicating the number of dimensions associated with a particular draw from the distribution. - validate_args: `Boolean` indicated whether arguments should be checked for - correctness. + validate_args: `Boolean` indicating whether arguments should be checked + for correctness. name: `String` name given to ops managed by this object. """ @@ -923,8 +1104,8 @@ class ScaleAndShift(Bijector): scale: `Tensor` used to scale input, i.e., `Y = g(X) = scale * X + shift`. event_ndims: Scalar `int32` `Tensor` indicating the number of dimensions associated with a particular draw from the distribution. - validate_args: `Boolean` indicated whether arguments should be checked for - correctness. + validate_args: `Boolean` indicating whether arguments should be checked + for correctness. name: `String` name given to ops managed by this object. """ @@ -1271,3 +1452,150 @@ class SigmoidCentered(SoftmaxCentered): def __init__(self, validate_args=False, name="sigmoid_centered"): super(SigmoidCentered, self).__init__( validate_args=validate_args, name=name) + + +class CholeskyOuterProduct(Bijector): + # pylint: disable=line-too-long + """Bijector which computes Y = g(X) = X X^T where X is a lower-triangular, positive-diagonal matrix. + + `event_ndims` must be 0 or 2, i.e., scalar or matrix. + + Note: the upper-triangular part of X is ignored (whether or not its zero). + + Examples: + + ```python + bijector.CholeskyOuterProduct(event_ndims=2).forward(x=[[1., 0], [2, 1]]) + # Result: [[1, 1], [1, 5]], i.e., x x^T + + bijector.SoftmaxCentered(event_ndims=2).inverse(y=[[1., 1], [1, 5]]) + # Result: [[1, 0], [2, 1]], i.e., chol(y). + ``` + + """ + # pylint: enable=line-too-long + + def __init__(self, event_ndims=2, validate_args=False, + name="cholesky_outer_product"): + """Instantiates the `CholeskyOuterProduct` bijector. + + Args: + event_ndims: `constant` `int32` scalar `Tensor` indicating the number of + dimensions associated with a particular draw from the distribution. Must + be 0 or 2. + validate_args: `Boolean` indicating whether arguments should be checked + for correctness. + name: `String` name given to ops managed by this object. + + Raises: + ValueError: if event_ndims is neither 0 or 2. + """ + self._parameters = {} + self._name = name + with self._name_scope("init", values=[event_ndims]): + event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims") + event_ndims = tensor_util.constant_value(event_ndims) + if event_ndims is None or event_ndims not in [0, 2]: + raise ValueError("`event_ndims` must be a TF constant which is 0 or 2") + self._static_event_ndims = event_ndims + super(CholeskyOuterProduct, self).__init__( + validate_args=validate_args, + name=name) + + def _forward(self, x): + if self._static_event_ndims == 0: + return math_ops.square(x) + if self.validate_args: + is_matrix = check_ops.assert_rank_at_least(x, 2) + shape = array_ops.shape(x) + is_square = check_ops.assert_equal(shape[-2], shape[-1]) + x = control_flow_ops.with_dependencies([is_matrix, is_square], x) + # For safety, explicitly zero-out the upper triangular part. + x = array_ops.matrix_band_part(x, -1, 0) + return math_ops.batch_matmul(x, x, adj_y=True) + + def _inverse_and_inverse_log_det_jacobian(self, y): + x = (math_ops.sqrt(y) if self._static_event_ndims == 0 + else linalg_ops.cholesky(y)) + return x, -self._forward_log_det_jacobian(x) + + def _forward_log_det_jacobian(self, x): + # Let Y be a symmetric, positive definite matrix and write: + # Y = X X^T + # where X is lower-triangular. + # + # Observe that, + # dY[i,j]/dX[a,b] + # = d/dX[a,b] { X[i,:] X[j,:] } + # = sum_{d=1}^p { I[i=a] I[d=b] X[j,d] + I[j=a] I[d=b] X[i,d] } + # + # To compute the Jacobian dX/dY we must represent X,Y as vectors. Since Y is + # symmetric and X is lower-triangular, we need vectors of dimension: + # d = p (p + 1) / 2 + # where X, Y are p x p matrices, p > 0. We use a row-major mapping, i.e., + # k = { i (i + 1) / 2 + j i>=j + # { undef ij thus i,j!=a. + # + # Since the Jacobian is lower-triangular, we need only compute the product + # of diagonal elements: + # d vec[Y] / d vec[X] @[k(i,j), k(i,j)] + # = X[j,j] + I[i=j] X[i,j] + # = 2 X[j,j]. + # Since there is a 2 X[j,j] term for every lower-triangular element of X we + # conclude: + # |Jac(d vec[Y]/d vec[X])| = 2^p prod_{j=0}^{p-1} X[j,j]^{p-j}. + if self._static_event_ndims == 0: + if self.validate_args: + is_positive = check_ops.assert_positive( + x, message="All elements must be positive.") + x = control_flow_ops.with_dependencies([is_positive], x) + return math.log(2.) + math_ops.log(x) + + diag = array_ops.matrix_diag_part(x) + if self.validate_args: + is_matrix = check_ops.assert_rank_at_least( + x, 2, message="Input must be a (batch of) matrix.") + shape = array_ops.shape(x) + is_square = check_ops.assert_equal( + shape[-2], shape[-1], + message="Input must be a (batch of) square matrix.") + # Assuming lower-triangular means we only need check diag>0. + is_positive_definite = check_ops.assert_positive( + diag, message="Input must be positive definite.") + x = control_flow_ops.with_dependencies( + [is_matrix, is_square, is_positive_definite], x) + + # Create a column vector equal to: [p, p-1, ..., 2, 1]^T. + if x.get_shape().ndims is None or x.get_shape()[-1].value is None: + p = array_ops.shape(x)[-1] + else: + p = x.get_shape()[-1].value + exponents = array_ops.expand_dims( + math_ops.linspace(math_ops.cast(p, dtype=x.dtype), 1., p), + dim=1) + + sum_weighted_log_diag = array_ops.squeeze( + math_ops.batch_matmul(math_ops.log(diag), exponents), + squeeze_dims=-1) + fldj = p * math.log(2.) + sum_weighted_log_diag + + if x.get_shape().ndims is not None: + fldj.set_shape(x.get_shape()[:-2]) + + return fldj diff --git a/tensorflow/contrib/distributions/python/ops/distribution.py b/tensorflow/contrib/distributions/python/ops/distribution.py index 2bfd272e71d..5a3583c22a3 100644 --- a/tensorflow/contrib/distributions/python/ops/distribution.py +++ b/tensorflow/contrib/distributions/python/ops/distribution.py @@ -327,12 +327,13 @@ class Distribution(_BaseDistribution): for i, t in enumerate(graph_parents): if t is None or not contrib_framework.is_tensor(t): raise ValueError("Graph parent item %d is not a Tensor; %s." % (i, t)) + parameters = parameters or {} self._dtype = dtype self._is_continuous = is_continuous self._is_reparameterized = is_reparameterized self._allow_nan_stats = allow_nan_stats self._validate_args = validate_args - self._parameters = parameters or {} + self._parameters = parameters self._graph_parents = graph_parents self._name = name or type(self).__name__ @@ -434,6 +435,27 @@ class Distribution(_BaseDistribution): """Python boolean indicated possibly expensive checks are enabled.""" return self._validate_args + def copy(self, **override_parameters_kwargs): + """Creates a deep copy of the distribution. + + Note: the copy distribution may continue to depend on the original + intialization arguments. + + Args: + **override_parameters_kwargs: String/value dictionary of initialization + arguments to override with new values. + + Returns: + distribution: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + """ + parameters = dict(self.parameters, **override_parameters_kwargs) + # Python3 leaks "__class__" into `locals()` so we remove if present. + # TODO(b/32376812): Remove this pop. + parameters.pop("__class__", None) + return type(self)(**parameters) + def _batch_shape(self): raise NotImplementedError("batch_shape is not implemented") diff --git a/tensorflow/contrib/distributions/python/ops/transformed_distribution.py b/tensorflow/contrib/distributions/python/ops/transformed_distribution.py index 9a4af741a4d..47f9f36aec5 100644 --- a/tensorflow/contrib/distributions/python/ops/transformed_distribution.py +++ b/tensorflow/contrib/distributions/python/ops/transformed_distribution.py @@ -19,7 +19,6 @@ from __future__ import print_function from tensorflow.contrib.distributions.python.ops import distribution as distributions from tensorflow.contrib.distributions.python.ops import distribution_util -from tensorflow.python.framework import ops from tensorflow.python.ops import math_ops @@ -160,7 +159,6 @@ class TransformedDistribution(distributions.Distribution): name = name or bijector.name + distribution.name self._distribution = distribution self._bijector = bijector - self._inverse_cache = {} super(TransformedDistribution, self).__init__( dtype=self._distribution.dtype, is_continuous=self._distribution.is_continuous, @@ -202,9 +200,7 @@ class TransformedDistribution(distributions.Distribution): **distribution_kwargs) # Recall that a bijector is named for its forward transform, i.e., # `Y = g(X)`, - y = self.bijector.forward(x, **bijector_kwargs) - self._inverse_cache[y] = x - return y + return self.bijector.forward(x, **bijector_kwargs) @distribution_util.AppendDocstring( """Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`, @@ -216,11 +212,9 @@ class TransformedDistribution(distributions.Distribution): def _log_prob(self, y, bijector_kwargs=None, distribution_kwargs=None): bijector_kwargs = bijector_kwargs or {} distribution_kwargs = distribution_kwargs or {} - x = self._inverse_possibly_from_cache(y, bijector_kwargs) - inverse_log_det_jacobian = self.bijector.inverse_log_det_jacobian( + x, ildj = self.bijector.inverse_and_inverse_log_det_jacobian( y, **bijector_kwargs) - return (self.distribution.log_prob(x, **distribution_kwargs) + - inverse_log_det_jacobian) + return ildj + self.distribution.log_prob(x, **distribution_kwargs) @distribution_util.AppendDocstring( """Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the @@ -232,18 +226,16 @@ class TransformedDistribution(distributions.Distribution): def _prob(self, y, bijector_kwargs=None, distribution_kwargs=None): bijector_kwargs = bijector_kwargs or {} distribution_kwargs = distribution_kwargs or {} - x = self._inverse_possibly_from_cache(y, bijector_kwargs) - inverse_det_jacobian = math_ops.exp(self.bijector.inverse_log_det_jacobian( - y, **bijector_kwargs)) - return (self.distribution.prob(x, **distribution_kwargs) * - inverse_det_jacobian) + x, ildj = self.bijector.inverse_and_inverse_log_det_jacobian( + y, **bijector_kwargs) + return math_ops.exp(ildj) * self.distribution.prob(x, **distribution_kwargs) @distribution_util.AppendDocstring( condition_kwargs_dict=_condition_kwargs_dict) def _log_cdf(self, y, bijector_kwargs=None, distribution_kwargs=None): bijector_kwargs = bijector_kwargs or {} distribution_kwargs = distribution_kwargs or {} - x = self._inverse_possibly_from_cache(y, bijector_kwargs) + x = self.bijector.inverse(y, **bijector_kwargs) return self.distribution.log_cdf(x, distribution_kwargs) @distribution_util.AppendDocstring( @@ -251,7 +243,7 @@ class TransformedDistribution(distributions.Distribution): def _cdf(self, y, bijector_kwargs=None, distribution_kwargs=None): bijector_kwargs = bijector_kwargs or {} distribution_kwargs = distribution_kwargs or {} - x = self._inverse_possibly_from_cache(y, bijector_kwargs) + x = self.bijector.inverse(y, **bijector_kwargs) return self.distribution.cdf(x, **distribution_kwargs) @distribution_util.AppendDocstring( @@ -260,7 +252,7 @@ class TransformedDistribution(distributions.Distribution): bijector_kwargs=None, distribution_kwargs=None): bijector_kwargs = bijector_kwargs or {} distribution_kwargs = distribution_kwargs or {} - x = self._inverse_possibly_from_cache(y, bijector_kwargs) + x = self.bijector.inverse(y, **bijector_kwargs) return self.distribution.log_survival_function(x, **distribution_kwargs) @distribution_util.AppendDocstring( @@ -269,13 +261,5 @@ class TransformedDistribution(distributions.Distribution): bijector_kwargs=None, distribution_kwargs=None): bijector_kwargs = bijector_kwargs or {} distribution_kwargs = distribution_kwargs or {} - x = self._inverse_possibly_from_cache(y, bijector_kwargs) + x = self.bijector.inverse(y, **bijector_kwargs) return self.distribution.survival_function(x, **distribution_kwargs) - - def _inverse_possibly_from_cache(self, y, bijector_kwargs): - """Return `self._inverse(y)`, possibly using cached value.""" - y = ops.convert_to_tensor(y, name="y") - if y in self._inverse_cache: - return self._inverse_cache[y] - else: - return self.bijector.inverse(y, **bijector_kwargs) diff --git a/tensorflow/contrib/factorization/examples/mnist.py b/tensorflow/contrib/factorization/examples/mnist.py index b238e2e174d..b0451f8fbca 100644 --- a/tensorflow/contrib/factorization/examples/mnist.py +++ b/tensorflow/contrib/factorization/examples/mnist.py @@ -327,6 +327,6 @@ if __name__ == '__main__': default=True, help='Use fake input data.' ) - FLAGS = parser.parse_args() + FLAGS, unparsed = parser.parse_known_args() tf.test.main() diff --git a/tensorflow/contrib/factorization/python/ops/kmeans.py b/tensorflow/contrib/factorization/python/ops/kmeans.py index 88cf5f084d8..3228c1f3dfe 100644 --- a/tensorflow/contrib/factorization/python/ops/kmeans.py +++ b/tensorflow/contrib/factorization/python/ops/kmeans.py @@ -243,6 +243,7 @@ class KMeansClustering(estimator.Estimator, ).training_graph() incr_step = tf.assign_add(tf.contrib.framework.get_global_step(), 1) self._loss = tf.reduce_sum(losses) + tf.scalar_summary('loss/raw', self._loss) training_op = with_dependencies([training_op, incr_step], self._loss) return training_op, self._loss diff --git a/tensorflow/contrib/layers/python/layers/optimizers.py b/tensorflow/contrib/layers/python/layers/optimizers.py index ca914c79265..a31882fecb4 100644 --- a/tensorflow/contrib/layers/python/layers/optimizers.py +++ b/tensorflow/contrib/layers/python/layers/optimizers.py @@ -24,16 +24,20 @@ from tensorflow.contrib import framework as contrib_framework from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops from tensorflow.python.ops import clip_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import logging_ops +from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import variable_scope as vs from tensorflow.python.ops import variables as vars_ +from tensorflow.python.training import moving_averages from tensorflow.python.training import optimizer as optimizer_ from tensorflow.python.training import training as train + OPTIMIZER_CLS_NAMES = { "Adagrad": train.AdagradOptimizer, "Adam": train.AdamOptimizer, @@ -104,7 +108,11 @@ def optimize_loss(loss, gradient_multipliers: dict of variables or variable names to floats. If present, gradients for specified variables will be multiplied by given constant. - clip_gradients: float or `None`, clips gradients by this value. + clip_gradients: float, callable or `None`. If float, is provided, a global + clipping is applied to prevent the norm of the gradient to exceed this + value. Alternatively, a callable can be provided e.g.: adaptive_clipping. + This callable takes a `list` of `(gradients, variables)` `tuple`s and + returns the same thing with the gradients modified. learning_rate_decay_fn: function, takes `learning_rate` and `global_step` `Tensor`s, returns `Tensor`. Can be used to implement any learning rate decay @@ -132,6 +140,7 @@ def optimize_loss(loss, * `global_step` is an invalid type or shape. * `learning_rate` is an invalid type or value. * `optimizer` is wrong type. + * `clip_gradients' is not float or callable. * `learning_rate` and `learning_rate_decay_fn` are supplied, but no `global_step` is available. """ @@ -224,9 +233,18 @@ def optimize_loss(loss, if gradient_multipliers is not None: gradients = _multiply_gradients(gradients, gradient_multipliers) + if "gradient_norm" in summaries: + logging_ops.scalar_summary("global_norm/gradient_norm", + clip_ops.global_norm(zip(*gradients)[0])) + # Optionally clip gradients by global norm. - if clip_gradients is not None: + if isinstance(clip_gradients, float): gradients = _clip_gradients_by_norm(gradients, clip_gradients) + elif callable(clip_gradients): + gradients = clip_gradients(gradients) + elif clip_gradients is not None: + raise ValueError( + "Unknown type %s for clip_gradients" % type(clip_gradients)) # Add scalar summary for loss. if "loss" in summaries: @@ -241,11 +259,15 @@ def optimize_loss(loss, if grad_values is not None: if "gradients" in summaries: - logging_ops.histogram_summary(variable.name + "/gradients", + logging_ops.histogram_summary("gradients/" + variable.name, grad_values) if "gradient_norm" in summaries: - logging_ops.histogram_summary(variable.name + "/gradient_norm", - clip_ops.global_norm([grad_values])) + logging_ops.scalar_summary("gradient_norm/" + variable.name, + clip_ops.global_norm([grad_values])) + + if clip_gradients is not None and "gradient_norm" in summaries: + logging_ops.scalar_summary("global_norm/clipped_gradient_norm", + clip_ops.global_norm(zip(*gradients)[0])) # Create gradient updates. grad_updates = opt.apply_gradients(gradients, @@ -266,6 +288,101 @@ def _clip_gradients_by_norm(grads_and_vars, clip_gradients): return list(zip(clipped_gradients, variables)) +def _adaptive_max_norm(norm, std_factor, decay, global_step, epsilon, name): + """Find max_norm given norm and previous average.""" + with vs.variable_scope(name, "AdaptiveMaxNorm", [norm]): + log_norm = math_ops.log(norm + epsilon) + + def moving_average(name, value, decay): + moving_average_variable = vs.get_variable( + name, shape=value.get_shape(), dtype=value.dtype, + initializer=init_ops.zeros_initializer, trainable=False) + return moving_averages.assign_moving_average( + moving_average_variable, value, decay) + + # quicker adaptation at the beginning + if global_step is not None: + n = math_ops.to_float(global_step) + decay = math_ops.minimum(decay, n / (n + 1.)) + + # update averages + mean = moving_average("mean", log_norm, decay) + sq_mean = moving_average("sq_mean", math_ops.square(log_norm), decay) + + variance = sq_mean - math_ops.square(mean) + std = math_ops.sqrt(math_ops.maximum(epsilon, variance)) + max_norms = math_ops.exp(mean + std_factor*std) + return max_norms, mean + + +def adaptive_clipping_fn(std_factor=2., + decay=0.95, + static_max_norm=None, + global_step=None, + report_summary=False, + epsilon=1e-8, + name=None): + """Adapt the clipping value using statistics on the norms. + + Implement adaptive gradient as presented in section 3.2.1 of + https://arxiv.org/abs/1412.1602. + + Keeps a moving average of the mean and std of the log(norm) of the gradient. + if the norm exceeds `exp(mean + std_factor*std)`, all gradients are rescaled + such that the global norm becomes `exp(mean)`. + + Args: + std_factor: Python scaler (or tensor). + `max_norm = exp(mean + std_factor*std)` + decay: The smoothing factor of the moving averages. + static_max_norm: If provided, will threshold the norm to this value as an + extra safety. + global_step: Optional global_step. If provided, `decay = decay*n/(n+1)`. + This provides a quicker adaptation of the mean for the first steps. + report_summary: If `True`, will add histogram summaries of the `max_norm`. + epsilon: Small value chosen to avoid zero variance. + name: The name for this operation is used to scope operations and summaries. + + Returns: + A function for applying gradient clipping. + """ + def gradient_clipping(grads_and_vars): + """Internal function for adaptive clipping.""" + grads, variables = zip(*grads_and_vars) + + norm = clip_ops.global_norm(grads) + + max_norm, log_mean = _adaptive_max_norm( + norm, std_factor, decay, global_step, epsilon, name) + + # reports the max gradient norm for debugging + if report_summary: + logging_ops.scalar_summary( + "global_norm/adaptive_max_gradient_norm", max_norm) + + # factor will be 1. if norm is smaller than max_norm + factor = math_ops.select(norm < max_norm, + array_ops.ones_like(norm), + math_ops.exp(log_mean) / norm) + + if static_max_norm is not None: + factor = math_ops.minimum(static_max_norm / norm, factor) + + # apply factor + clipped_grads = [] + for grad in grads: + if grad is None: + clipped_grads.append(None) + elif isinstance(grad, ops.IndexedSlices): + clipped_grads.append(ops.IndexedSlices( + grad.values * factor, grad.indices, grad.dense_shape)) + else: + clipped_grads.append(grad * factor) + + return list(zip(clipped_grads, variables)) + return gradient_clipping + + def _add_scaled_noise_to_gradients(grads_and_vars, gradient_noise_scale): """Adds scaled noise from a 0-mean normal distribution to gradients.""" gradients, variables = zip(*grads_and_vars) diff --git a/tensorflow/contrib/layers/python/layers/optimizers_test.py b/tensorflow/contrib/layers/python/layers/optimizers_test.py index fb76fd20b4a..a7de611a664 100644 --- a/tensorflow/contrib/layers/python/layers/optimizers_test.py +++ b/tensorflow/contrib/layers/python/layers/optimizers_test.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np import tensorflow as tf @@ -179,6 +180,26 @@ class OptimizersTest(tf.test.TestCase): self.assertAlmostEqual(var_value, 9.98999, 4) self.assertEqual(global_step_value, 1) + def testAdaptiveGradientClip(self): + with self.test_session() as session: + x, var, loss, global_step = _setup_model() + clip_gradients = tf.contrib.layers.adaptive_clipping_fn() + train = tf.contrib.layers.optimize_loss(loss, + global_step, + learning_rate=0.1, + optimizer="SGD", + clip_gradients=clip_gradients) + tf.initialize_all_variables().run() + session.run(train, feed_dict={x: 5}) + var_value, global_step_value = session.run([var, global_step]) + self.assertAlmostEqual(var_value, 9.8916, 4) + self.assertEqual(global_step_value, 1) + var_count = 0 + for var in tf.all_variables(): + if var.name.startswith("OptimizeLoss/AdaptiveMaxNorm"): + var_count += 1 + self.assertEqual(2, var_count) + def testGradientMultiply(self): with self.test_session() as session: x, var, loss, global_step = _setup_model() @@ -332,5 +353,70 @@ class OptimizersTest(tf.test.TestCase): self.assertEqual(update_var_value, 20) self.assertEqual(global_step_value, 1) + +class AdaptiveClipping(tf.test.TestCase): + + def testAverages(self): + with self.test_session() as session: + scale = 2. + grad = tf.ones([3, 4]) * scale + log_norm = np.log(np.sqrt(scale**2 * grad.get_shape().num_elements())) + grads_and_vars = [(grad, grad)] + grads_and_vars = tf.contrib.layers.adaptive_clipping_fn( + decay=0.5)(grads_and_vars) + + var_dict = {} + for var in tf.all_variables(): + if var.name.startswith("AdaptiveMaxNorm"): + var_dict[var.name.split(":")[0]] = var + self.assertEqual(2, len(var_dict)) + moving_mean = var_dict["AdaptiveMaxNorm/mean"] + moving_sq_mean = var_dict["AdaptiveMaxNorm/sq_mean"] + tf.initialize_all_variables().run() + mean, sq_mean = session.run([moving_mean, moving_sq_mean]) + self.assertEqual([0], mean) + self.assertEqual([0], sq_mean) + for i in range(20): + mean, sq_mean, _ = session.run( + [moving_mean, moving_sq_mean, grads_and_vars[0][0]]) + if i == 0: + self.assertLess(mean, 0.9 * log_norm) + self.assertLess(sq_mean, 0.9 * log_norm**2) + + self.assertAlmostEqual(float(mean), log_norm, places=4) + self.assertAlmostEqual(float(sq_mean), log_norm**2, places=4) + + def testClip(self): + with self.test_session() as session: + spike = 1000. + multiplier = tf.placeholder(tf.float32, [], "multiplier") + step = tf.placeholder(tf.int32, [], "step") + + grad = tf.ones([3, 4]) * multiplier + grads_and_vars = [(grad, grad)] + grads_and_vars = tf.contrib.layers.adaptive_clipping_fn( + decay=0.9, global_step=step)(grads_and_vars) + + tf.initialize_all_variables().run() + def run(scale, i): + return session.run(grads_and_vars[0][0], + feed_dict={multiplier: scale, step: i}) + + for i in range(20): + scale = [1., -2.][i % 2] + clipped_grad = run(scale, i) + if i > 3: + self.assertAllClose(np.ones(clipped_grad.shape)*scale, clipped_grad) + + # assert that the spike will have low influence. + clipped_grad = run(spike, 20) + self.assertTrue((clipped_grad < 25.).all()) + + # assert that a repeated spike will converge to this new value. + for i in range(10): + clipped_grad = run(spike, i + 21) + + self.assertAllClose(np.ones(clipped_grad.shape)*spike, clipped_grad) + if __name__ == "__main__": tf.test.main() diff --git a/tensorflow/contrib/learn/python/learn/estimators/__init__.py b/tensorflow/contrib/learn/python/learn/estimators/__init__.py index 07dd12ebc38..b5b1dbb6355 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/__init__.py +++ b/tensorflow/contrib/learn/python/learn/estimators/__init__.py @@ -35,6 +35,6 @@ from tensorflow.contrib.learn.python.learn.estimators.linear import LinearClassi from tensorflow.contrib.learn.python.learn.estimators.linear import LinearRegressor from tensorflow.contrib.learn.python.learn.estimators.logistic_regressor import LogisticRegressor from tensorflow.contrib.learn.python.learn.estimators.random_forest import TensorForestEstimator -from tensorflow.contrib.learn.python.learn.estimators.random_forest import TensorForestLossMonitor +from tensorflow.contrib.learn.python.learn.estimators.random_forest import TensorForestLossHook from tensorflow.contrib.learn.python.learn.estimators.run_config import RunConfig from tensorflow.contrib.learn.python.learn.estimators.svm import SVM diff --git a/tensorflow/contrib/learn/python/learn/estimators/classifier.py b/tensorflow/contrib/learn/python/learn/estimators/classifier.py index 978ab9339b9..cf9ea7e82ae 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/classifier.py +++ b/tensorflow/contrib/learn/python/learn/estimators/classifier.py @@ -20,6 +20,7 @@ from __future__ import division from __future__ import print_function from tensorflow.contrib import metrics as metrics_lib +from tensorflow.contrib.framework import deprecated from tensorflow.contrib.framework import deprecated_arg_values from tensorflow.contrib.learn.python.learn.estimators import estimator from tensorflow.contrib.session_bundle import exporter @@ -27,6 +28,8 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn +@deprecated('2016-11-30', 'Please write an appropriate function for use with' + ' your estimator.') def classification_signature_fn(examples, unused_features, predictions): """Creates classification signature from given examples and predictions. @@ -61,6 +64,7 @@ class Classifier(estimator.Estimator): CLASS_OUTPUT = 'classes' PROBABILITY_OUTPUT = 'probabilities' + @deprecated('2016-11-30', 'Please use Estimator directly.') def __init__(self, model_fn, n_classes, model_dir=None, config=None, params=None, feature_engineering_fn=None): """Constructor for Classifier. diff --git a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py index 241b2b41e5c..ae4c97eae7c 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py +++ b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py @@ -309,7 +309,7 @@ class _DynamicRNNEstimator(estimator.BaseEstimator): inputs=rnn_outputs, num_outputs=self._target_column.num_label_columns, activation_fn=None, - trainable=False) + trainable=True) return activations, final_state @abc.abstractmethod diff --git a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator_test.py b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator_test.py index f14e65fff55..d5ca3fbeed5 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator_test.py +++ b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator_test.py @@ -429,7 +429,7 @@ class SingleValueRNNEstimatorTest(tf.test.TestCase): cell_type = 'basic_rnn' cell_size = 8 optimizer_type = 'Momentum' - learning_rate = 0.5 + learning_rate = 0.1 momentum = 0.9 loss_threshold = 0.1 diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py index 0ebd8088664..1882e1578d8 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py +++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py @@ -36,6 +36,7 @@ from tensorflow.contrib import layers from tensorflow.contrib import metrics as metrics_lib from tensorflow.contrib.framework import deprecated from tensorflow.contrib.framework import deprecated_arg_values +from tensorflow.contrib.framework import get_graph_from_inputs from tensorflow.contrib.framework import list_variables from tensorflow.contrib.framework import load_variable from tensorflow.contrib.learn.python.learn import evaluable @@ -88,8 +89,11 @@ class ModelFnOps( collections.namedtuple('ModelFnOps', ['predictions', 'loss', 'training_op', 'default_metrics', 'signature_fn'])): - def __new__(cls, predictions, loss, training_op, default_metrics, - signature_fn, mode): + def __new__(cls, mode, predictions=None, loss=None, training_op=None, + default_metrics=None, signature_fn=None): + # Assert all ops are from the same graph. + get_graph_from_inputs((predictions, loss, training_op)) + # Validate training_op. if training_op is None: if mode == ModeKeys.TRAIN: @@ -1042,13 +1046,16 @@ class Estimator(BaseEstimator): if isinstance(model_fn_results, ModelFnOps): return model_fn_results - else: - # Here model_fn_ops should be a tuple with 3 elements. - if len(model_fn_results) != 3: - raise ValueError('Unrecognized value returned by model_fn, ' - 'please return ModelFnOps.') - return ModelFnOps(model_fn_results[0], model_fn_results[1], - model_fn_results[2], None, None, mode) + + # Here model_fn_ops should be a tuple with 3 elements. + if len(model_fn_results) != 3: + raise ValueError('Unrecognized value returned by model_fn, ' + 'please return ModelFnOps.') + return ModelFnOps( + mode=mode, + predictions=model_fn_results[0], + loss=model_fn_results[1], + training_op=model_fn_results[2]) def _get_train_ops(self, features, targets): """Method that builds model graph and returns trainer ops. diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py index 04d2484e8e0..bdb3fe3589e 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/head.py +++ b/tensorflow/contrib/learn/python/learn/estimators/head.py @@ -229,20 +229,30 @@ class _Head(object): else: train_op = control_flow_ops.group(*additional_train_op) - return estimator.ModelFnOps(None, loss, train_op, - self._default_metric(), - self._create_signature_fn(), mode) + return estimator.ModelFnOps( + mode=estimator.ModeKeys.TRAIN, + loss=loss, + training_op=train_op, + default_metrics=self._default_metric(), + signature_fn=self._create_signature_fn()) + if mode == estimator.ModeKeys.INFER: - predictions = self._infer_op(logits, logits_input) - return estimator.ModelFnOps(predictions, None, None, - self._default_metric(), - self._create_signature_fn(), mode) + return estimator.ModelFnOps( + mode=estimator.ModeKeys.INFER, + predictions=self._infer_op(logits, logits_input), + default_metrics=self._default_metric(), + signature_fn=self._create_signature_fn()) + if mode == estimator.ModeKeys.EVAL: predictions, loss = self._eval_op(features, target, logits, logits_input) - return estimator.ModelFnOps(predictions, loss, None, - self._default_metric(), - self._create_signature_fn(), mode) - raise ValueError("mode=%s unrecognized" % str(mode)) + return estimator.ModelFnOps( + mode=estimator.ModeKeys.EVAL, + predictions=predictions, + loss=loss, + default_metrics=self._default_metric(), + signature_fn=self._create_signature_fn()) + + raise ValueError("mode=%s unrecognized." % str(mode)) @abc.abstractmethod def _training_loss(self, features, target, logits=None, logits_input=None, diff --git a/tensorflow/contrib/learn/python/learn/estimators/random_forest.py b/tensorflow/contrib/learn/python/learn/estimators/random_forest.py index 58b4389a000..86f8c5dd028 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/random_forest.py +++ b/tensorflow/contrib/learn/python/learn/estimators/random_forest.py @@ -17,25 +17,28 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import numpy as np -import six - from tensorflow.contrib import framework as contrib_framework from tensorflow.contrib.framework import deprecated_arg_values -from tensorflow.contrib.learn.python.learn import monitors as mon +from tensorflow.contrib.learn.python.learn import evaluable +from tensorflow.contrib.learn.python.learn import trainable from tensorflow.contrib.learn.python.learn.estimators import estimator +from tensorflow.contrib.learn.python.learn.utils import export from tensorflow.contrib.tensor_forest.client import eval_metrics from tensorflow.contrib.tensor_forest.data import data_ops from tensorflow.contrib.tensor_forest.python import tensor_forest from tensorflow.python.framework import dtypes -from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import state_ops from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.training import session_run_hook + + +KEYS_NAME = 'keys' +LOSS_NAME = 'rf_training_loss' def _assert_float32(tensors): @@ -56,58 +59,124 @@ def _assert_float32(tensors): raise TypeError('Expected dtype=float32, %s.' % tensor) -class TensorForestLossMonitor(mon.EveryN): - """Terminates training when training loss stops decreasing.""" +class TensorForestLossHook(session_run_hook.SessionRunHook): + """Monitor to request stop when loss stops decreasing.""" - def __init__(self, - early_stopping_rounds, - every_n_steps): - super(TensorForestLossMonitor, self).__init__(every_n_steps=every_n_steps) + def __init__(self, early_stopping_rounds): self.early_stopping_rounds = early_stopping_rounds self.min_loss = None - self.min_loss_step = 0 + self.last_step = -1 + # self.steps records the number of steps for which the loss has been + # non-decreasing + self.steps = 0 - def step_begin(self, step): - super(TensorForestLossMonitor, self).step_begin(step) - return [self._loss_op_name] + def before_run(self, run_context): + return session_run_hook.SessionRunArgs( + {'global_step': contrib_framework.get_global_step(), + 'current_loss': run_context.session.graph.get_operation_by_name( + LOSS_NAME).outputs[0]}) - def set_estimator(self, est): - """This function gets called in the same graph as _get_train_ops.""" - super(TensorForestLossMonitor, self).set_estimator(est) - self._loss_op_name = est.training_loss.name + def after_run(self, run_context, run_values): + current_loss = run_values.results['current_loss'] + current_step = run_values.results['global_step'] + self.steps += 1 + # Gaurd against the global step going backwards, which might happen + # if we recover from something. + if self.last_step == -1 or self.last_step > current_step: + logging.info('TensorForestLossHook resetting last_step.') + self.last_step = current_step + self.steps = 0 + return - def every_n_step_end(self, step, outputs): - super(TensorForestLossMonitor, self).every_n_step_end(step, outputs) - current_loss = outputs[self._loss_op_name] if self.min_loss is None or current_loss < self.min_loss: self.min_loss = current_loss - self.min_loss_step = step - return step - self.min_loss_step >= self.early_stopping_rounds + self.steps = 0 + if self.steps > self.early_stopping_rounds: + logging.info('TensorForestLossHook requesting stop.') + run_context.request_stop() -class TensorForestEstimator(estimator.BaseEstimator): +def get_model_fn(params, graph_builder_class, device_assigner, + weights_name=None, keys_name=None): + """Return a model function given a way to construct a graph builder.""" + def _model_fn(features, targets): + """Function that returns predictions, training loss, and training op.""" + weights = None + keys = None + if weights_name and weights_name in features: + weights = features.pop(weights_name) + if keys_name and keys_name in features: + keys = features.pop(keys_name) + processed_features, spec = data_ops.ParseDataTensorOrDict(features) + _assert_float32(processed_features) + if targets is not None: + targets = data_ops.ParseLabelTensorOrDict(targets) + _assert_float32(targets) + + graph_builder = graph_builder_class(params, device_assigner=device_assigner) + inference = {eval_metrics.INFERENCE_PROB_NAME: + graph_builder.inference_graph(processed_features, + data_spec=spec)} + if not params.regression: + inference[eval_metrics.INFERENCE_PRED_NAME] = math_ops.argmax( + inference[eval_metrics.INFERENCE_PROB_NAME], 1) + if keys: + inference[KEYS_NAME] = keys + + # targets might be None if we're doing prediction (which brings up the + # question of why we force everything to adhere to a single model_fn). + training_loss = None + training_graph = None + if targets is not None: + training_loss = graph_builder.training_loss(processed_features, targets, + data_spec=spec, + name=LOSS_NAME) + training_graph = control_flow_ops.group( + graph_builder.training_graph( + processed_features, targets, data_spec=spec, + input_weights=weights), + state_ops.assign_add(contrib_framework.get_global_step(), 1)) + # Put weights back in + if weights is not None: + features[weights_name] = weights + return (inference, training_loss, training_graph) + return _model_fn + + +class TensorForestEstimator(evaluable.Evaluable, trainable.Trainable): """An estimator that can train and evaluate a random forest.""" def __init__(self, params, device_assigner=None, model_dir=None, graph_builder_class=tensor_forest.RandomForestGraphs, - master='', accuracy_metric=None, - tf_random_seed=None, config=None, - feature_engineering_fn=None): + config=None, weights_name=None, keys_name=None, + feature_engineering_fn=None, early_stopping_rounds=100): self.params = params.fill() - self.accuracy_metric = (accuracy_metric or - ('r2' if self.params.regression else 'accuracy')) - self.data_feeder = None - self.device_assigner = ( - device_assigner or tensor_forest.RandomForestDeviceAssigner()) self.graph_builder_class = graph_builder_class - self.training_args = {} - self.construction_args = {} - self._feature_engineering_fn = ( - feature_engineering_fn or - (lambda features, targets: (features, targets))) + self.early_stopping_rounds = early_stopping_rounds + self._estimator = estimator.Estimator( + model_fn=get_model_fn(params, graph_builder_class, device_assigner, + weights_name=weights_name, keys_name=keys_name), + model_dir=model_dir, + config=config, + feature_engineering_fn=feature_engineering_fn) - super(TensorForestEstimator, self).__init__(model_dir=model_dir, - config=config) + def evaluate( + self, x=None, y=None, input_fn=None, feed_fn=None, batch_size=None, + steps=None, metrics=None, name=None): + """See evaluable.Evaluable.""" + return self._estimator.evaluate( + input_fn=input_fn, x=x, y=y, feed_fn=feed_fn, + batch_size=batch_size, steps=steps, + metrics=metrics, name=name) + + def fit(self, x=None, y=None, input_fn=None, steps=None, batch_size=None, + monitors=None, max_steps=None): + """See trainable.Trainable.""" + if not monitors: + monitors = [TensorForestLossHook(self.early_stopping_rounds)] + self._estimator.fit(input_fn=input_fn, x=x, y=y, + batch_size=batch_size, steps=steps, monitors=monitors, + max_steps=max_steps) @deprecated_arg_values( estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS, @@ -135,13 +204,14 @@ class TensorForestEstimator(estimator.BaseEstimator): Raises: ValueError: If both or neither of x and input_fn were given. """ - results = super(TensorForestEstimator, self).predict( + results = self._estimator.predict( x=x, input_fn=input_fn, batch_size=batch_size, outputs=outputs, as_iterable=as_iterable) + if as_iterable: - return (r['probabilities'] for r in results) + return (x[eval_metrics.INFERENCE_PROB_NAME] for x in results) else: - return results['probabilities'] + return results[eval_metrics.INFERENCE_PROB_NAME] @deprecated_arg_values( estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS, @@ -168,16 +238,16 @@ class TensorForestEstimator(estimator.BaseEstimator): Numpy array of predicted classes or regression values (or an iterable of predictions if as_iterable is True). """ - probabilities = self.predict_proba( + results = self._estimator.predict( x=x, input_fn=input_fn, batch_size=batch_size, outputs=outputs, as_iterable=as_iterable) - if self.params.regression: - return probabilities + + predict_name = (eval_metrics.INFERENCE_PROB_NAME if self.params.regression + else eval_metrics.INFERENCE_PRED_NAME) + if as_iterable: + return (x[predict_name] for x in results) else: - if as_iterable: - return (np.argmax(p, axis=0) for p in probabilities) - else: - return np.argmax(probabilities, axis=1) + return results[predict_name] @deprecated_arg_values( estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS, @@ -186,100 +256,40 @@ class TensorForestEstimator(estimator.BaseEstimator): self, x=None, input_fn=None, axis=None, batch_size=None, outputs=None, as_iterable=True): """Same as predict but also returns the example keys.""" - results = super(TensorForestEstimator, self).predict( + results = self._estimator.predict( x=x, input_fn=input_fn, batch_size=batch_size, outputs=outputs, as_iterable=as_iterable) - if self.params.regression: - if as_iterable: - return ((r['probabilities'], r.get('keys', None)) for r in results) - else: - return results['probabilities'], results.get('keys', None) + + predict_name = (eval_metrics.INFERENCE_PROB_NAME if self.params.regression + else eval_metrics.INFERENCE_PRED_NAME) + if as_iterable: + return ((x[predict_name], x.get(KEYS_NAME, None)) for x in results) else: - if as_iterable: - return ((np.argmax(r['probabilities'], axis=0), - r.get('keys', None)) for r in results) - - else: - return np.argmax(results['probabilities'], axis=1), results.get('keys', - None) - - def _get_train_ops(self, features, targets): - """Method that builds model graph and returns trainer ops. - - Args: - features: `Tensor` or `dict` of `Tensor` objects. - targets: `Tensor` or `dict` of `Tensor` objects. - - Returns: - Tuple of train `Operation` and loss `Tensor`. - """ - features, _, weights, spec = data_ops.ParseDataTensorOrDict(features) - labels = data_ops.ParseLabelTensorOrDict(targets) - features, labels = self._feature_engineering_fn(features, labels) - _assert_float32(features) - _assert_float32(labels) - - if weights is not None: - if 'input_weights' in self.training_args: - logging.warning('Replacing input_weights in training_args.') - self.training_args['input_weights'] = weights - - graph_builder = self.graph_builder_class( - self.params, device_assigner=self.device_assigner, - **self.construction_args) - - epoch = None - if self.data_feeder: - epoch = self.data_feeder.make_epoch_variable() - - train = control_flow_ops.group( - graph_builder.training_graph( - features, labels, data_spec=spec, epoch=epoch, - **self.training_args), - state_ops.assign_add(contrib_framework.get_global_step(), 1)) - - self.training_loss = graph_builder.training_loss(features, targets) - - return train, self.training_loss - - def _get_predict_ops(self, features): - graph_builder = self.graph_builder_class( - self.params, device_assigner=self.device_assigner, training=False, - **self.construction_args) - features, keys, _, spec = data_ops.ParseDataTensorOrDict(features) - features, _ = self._feature_engineering_fn(features, None) - _assert_float32(features) - output_dict = { - 'probabilities': graph_builder.inference_graph(features, - data_spec=spec)} - if keys is not None: - output_dict['keys'] = keys - return output_dict - - def _get_eval_ops(self, features, targets, metrics): - features, _, _, spec = data_ops.ParseDataTensorOrDict(features) - labels = data_ops.ParseLabelTensorOrDict(targets) - features, labels = self._feature_engineering_fn(features, labels) - _assert_float32(features) - _assert_float32(labels) - - graph_builder = self.graph_builder_class( - self.params, device_assigner=self.device_assigner, training=False, - **self.construction_args) - - probabilities = graph_builder.inference_graph(features, data_spec=spec) - - # One-hot the labels. - if not self.params.regression: - labels = math_ops.to_int64(array_ops.one_hot(math_ops.to_int64( - array_ops.squeeze(labels)), self.params.num_classes, 1, 0)) - - if metrics is None: - metrics = {self.accuracy_metric: - eval_metrics.get_metric(self.accuracy_metric)} - - result = {} - for name, metric in six.iteritems(metrics): - result[name] = metric(probabilities, labels) + return results[predict_name], results.get(KEYS_NAME, None) + def export(self, + export_dir, + input_fn, + signature_fn=None, + default_batch_size=1): + """See BaseEstimator.export.""" + # Reset model function with basic device assigner. + # Servo doesn't support distributed inference + # but it will try to respect device assignments if they're there. + # pylint: disable=protected-access + orig_model_fn = self._estimator._model_fn + self._estimator._model_fn = get_model_fn( + self.params, self.graph_builder_class, + tensor_forest.RandomForestDeviceAssigner()) + result = self._estimator.export( + export_dir=export_dir, + use_deprecated_input_fn=True, + signature_fn=(signature_fn or + (export.regression_signature_fn + if self.params.regression else + export.classification_signature_fn_with_prob)), + default_batch_size=default_batch_size, + prediction_key=eval_metrics.INFERENCE_PROB_NAME) + self._estimator._model_fn = orig_model_fn + # pylint: enable=protected-access return result diff --git a/tensorflow/contrib/learn/python/learn/estimators/random_forest_test.py b/tensorflow/contrib/learn/python/learn/estimators/random_forest_test.py index a1216be1fe9..9242aa98969 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/random_forest_test.py +++ b/tensorflow/contrib/learn/python/learn/estimators/random_forest_test.py @@ -28,14 +28,30 @@ class TensorForestTrainerTests(tf.test.TestCase): def testClassification(self): """Tests multi-class classification using matrix data as input.""" hparams = tf.contrib.tensor_forest.python.tensor_forest.ForestHParams( - num_trees=3, max_nodes=1000, num_classes=3, num_features=4) - classifier = tf.contrib.learn.TensorForestEstimator(hparams) + num_trees=3, max_nodes=1000, num_classes=3, num_features=4, + split_after_samples=20) + classifier = tf.contrib.learn.TensorForestEstimator(hparams.fill()) iris = tf.contrib.learn.datasets.load_iris() data = iris.data.astype(np.float32) target = iris.target.astype(np.float32) - monitors = [tf.contrib.learn.TensorForestLossMonitor(10, 10)] + classifier.fit(x=data, y=target, steps=100, batch_size=50) + classifier.evaluate(x=data, y=target, steps=10) + + def testClassificationTrainingLoss(self): + """Tests multi-class classification using matrix data as input.""" + hparams = tf.contrib.tensor_forest.python.tensor_forest.ForestHParams( + num_trees=3, max_nodes=1000, num_classes=3, num_features=4) + classifier = tf.contrib.learn.TensorForestEstimator( + hparams, graph_builder_class=( + tf.contrib.tensor_forest.python.tensor_forest.TrainingLossForest)) + + iris = tf.contrib.learn.datasets.load_iris() + data = iris.data.astype(np.float32) + target = iris.target.astype(np.float32) + + monitors = [tf.contrib.learn.TensorForestLossHook(10)] classifier.fit(x=data, y=target, steps=100, monitors=monitors) classifier.evaluate(x=data, y=target, steps=10) @@ -44,16 +60,15 @@ class TensorForestTrainerTests(tf.test.TestCase): hparams = tf.contrib.tensor_forest.python.tensor_forest.ForestHParams( num_trees=3, max_nodes=1000, num_classes=1, num_features=13, - regression=True) + regression=True, split_after_samples=20) - regressor = tf.contrib.learn.TensorForestEstimator(hparams) + regressor = tf.contrib.learn.TensorForestEstimator(hparams.fill()) boston = tf.contrib.learn.datasets.load_boston() data = boston.data.astype(np.float32) target = boston.target.astype(np.float32) - monitors = [tf.contrib.learn.TensorForestLossMonitor(10, 10)] - regressor.fit(x=data, y=target, steps=100, monitors=monitors) + regressor.fit(x=data, y=target, steps=100, batch_size=50) regressor.evaluate(x=data, y=target, steps=10) diff --git a/tensorflow/contrib/learn/python/learn/graph_actions.py b/tensorflow/contrib/learn/python/learn/graph_actions.py index c7ce09de28c..0c5152b553f 100644 --- a/tensorflow/contrib/learn/python/learn/graph_actions.py +++ b/tensorflow/contrib/learn/python/learn/graph_actions.py @@ -627,7 +627,7 @@ def _eval_results_to_str(eval_results): def _write_summary_results(output_dir, eval_results, current_global_step): """Writes eval results into summary file in given dir.""" - logging.info('Saving evaluation summary for %d step: %s', current_global_step, + logging.info('Saving evaluation summary for step %d: %s', current_global_step, _eval_results_to_str(eval_results)) summary_writer = get_summary_writer(output_dir) summary = summary_pb2.Summary() diff --git a/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py b/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py index 21ce65b7eb4..933c7456f5d 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py @@ -253,6 +253,18 @@ def _get_shared_file_name_queue(file_names, shuffle, num_epochs, name): def _get_file_names(file_pattern, randomize_input): + """Parse list of file names from pattern, optionally shuffled. + + Args: + file_pattern: File glob pattern, or list of strings. + randomize_input: Whether to shuffle the order of file names. + + Returns: + List of file names matching `file_pattern`. + + Raises: + ValueError: If `file_pattern` is empty, or pattern matches no files. + """ if isinstance(file_pattern, list): file_names = file_pattern if not file_names: @@ -304,6 +316,36 @@ def _read_keyed_batch_examples_helper(file_pattern, parse_fn=None, setup_shared_queue=False, name=None): + """Adds operations to read, queue, batch `Example` protos. + + Args: + file_pattern: List of files or pattern of file paths containing + `Example` records. See `tf.gfile.Glob` for pattern rules. + batch_size: An int or scalar `Tensor` specifying the batch size to use. + reader: A function or class that returns an object with + `read` method, (filename tensor) -> (example tensor). + randomize_input: Whether the input should be randomized. + num_epochs: Integer specifying the number of times to read through the + dataset. If `None`, cycles through the dataset forever. + NOTE - If specified, creates a variable that must be initialized, so call + `tf.initialize_all_variables()` as shown in the tests. + queue_capacity: Capacity for input queue. + num_threads: The number of threads enqueuing examples. + read_batch_size: An int or scalar `Tensor` specifying the number of + records to read at once + parse_fn: Parsing function, takes `Example` Tensor returns parsed + representation. If `None`, no parsing is done. + setup_shared_queue: Whether to set up a shared queue for file names. + name: Name of resulting op. + + Returns: + Returns tuple of: + - `Tensor` of string keys. + - String `Tensor` of batched `Example` proto. + + Raises: + ValueError: for invalid inputs. + """ # Retrieve files to read. file_names = _get_file_names(file_pattern, randomize_input) @@ -348,10 +390,10 @@ def _read_keyed_batch_examples_helper(file_pattern, enqueue_many = read_batch_size > 1 - if num_epochs is not None: - allow_smaller_final_batch = True - else: + if num_epochs is None: allow_smaller_final_batch = False + else: + allow_smaller_final_batch = True # Setup batching queue given list of read example tensors. if randomize_input: @@ -505,7 +547,6 @@ def _read_keyed_batch_features_shared_queue(file_pattern, Adding multiple queue runners for the parsed example queue helps maintain a full queue when the subsequent computations overall are cheaper than parsing. - parser_num_threads: (Deprecated) The number of threads to parse examples. parse_fn: Parsing function, takes `Example` Tensor returns parsed representation. If `None`, no parsing is done. name: Name of resulting op. diff --git a/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py b/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py index f9f42bbfad2..8491bb707bf 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py @@ -121,7 +121,8 @@ class GraphIOTest(tf.test.TestCase): batch_size = 17 queue_capacity = 1234 name = "my_batch" - features = {"feature": tf.FixedLenFeature(shape=[0], dtype=tf.float32)} + shape = (0,) + features = {"feature": tf.FixedLenFeature(shape=shape, dtype=tf.float32)} with tf.Graph().as_default() as g, self.test_session(graph=g) as sess: features = tf.contrib.learn.io.read_batch_record_features( @@ -132,8 +133,11 @@ class GraphIOTest(tf.test.TestCase): queue_capacity=queue_capacity, reader_num_threads=2, name=name) - self.assertEqual("%s/fifo_queue_1_Dequeue:0" % name, - features["feature"].name) + self.assertTrue( + "feature" in features, "'feature' missing from %s." % features.keys()) + feature = features["feature"] + self.assertEqual("%s/fifo_queue_1_Dequeue:0" % name, feature.name) + self.assertAllEqual((batch_size,) + shape, feature.get_shape().as_list()) file_name_queue_name = "%s/file_name_queue" % name file_names_name = "%s/input" % file_name_queue_name example_queue_name = "%s/fifo_queue" % name @@ -161,6 +165,7 @@ class GraphIOTest(tf.test.TestCase): reader=tf.TFRecordReader, randomize_input=True, num_epochs=1, queue_capacity=queue_capacity, name=name) + self.assertAllEqual((None,), inputs.get_shape().as_list()) self.assertEqual("%s:1" % name, inputs.name) file_name_queue_name = "%s/file_name_queue" % name file_name_queue_limit_name = ( @@ -190,6 +195,7 @@ class GraphIOTest(tf.test.TestCase): _VALID_FILE_PATTERN, batch_size, reader=tf.TFRecordReader, randomize_input=True, queue_capacity=queue_capacity, name=name) + self.assertAllEqual((batch_size,), inputs.get_shape().as_list()) self.assertEqual("%s:1" % name, inputs.name) file_name_queue_name = "%s/file_name_queue" % name file_names_name = "%s/input" % file_name_queue_name @@ -234,6 +240,7 @@ class GraphIOTest(tf.test.TestCase): filename, batch_size, reader=tf.TextLineReader, randomize_input=False, num_epochs=1, queue_capacity=queue_capacity, name=name) + self.assertAllEqual((None,), inputs.get_shape().as_list()) session.run(tf.initialize_local_variables()) coord = tf.train.Coordinator() @@ -280,10 +287,13 @@ class GraphIOTest(tf.test.TestCase): features = {"sequence": tf.FixedLenFeature([], tf.string)} with tf.Graph().as_default() as g, self.test_session(graph=g) as session: - _, result = tf.contrib.learn.read_keyed_batch_features( + keys, result = tf.contrib.learn.read_keyed_batch_features( filename, batch_size, features, tf.TextLineReader, randomize_input=False, num_epochs=1, queue_capacity=queue_capacity, num_enqueue_threads=2, parse_fn=tf.decode_json_example, name=name) + self.assertAllEqual((None,), keys.get_shape().as_list()) + self.assertEqual(1, len(result)) + self.assertAllEqual((None,), result["sequence"].get_shape().as_list()) session.run(tf.initialize_local_variables()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(session, coord=coord) @@ -319,6 +329,7 @@ class GraphIOTest(tf.test.TestCase): filenames, batch_size, reader=tf.TextLineReader, randomize_input=False, num_epochs=1, queue_capacity=queue_capacity, name=name) + self.assertAllEqual((None,), inputs.get_shape().as_list()) session.run(tf.initialize_local_variables()) coord = tf.train.Coordinator() @@ -354,7 +365,7 @@ class GraphIOTest(tf.test.TestCase): name = "my_batch" with tf.Graph().as_default() as g, self.test_session(graph=g) as session: - _, inputs = _read_keyed_batch_examples_shared_queue( + keys, inputs = _read_keyed_batch_examples_shared_queue( filenames, batch_size, reader=tf.TextLineReader, @@ -362,6 +373,8 @@ class GraphIOTest(tf.test.TestCase): num_epochs=1, queue_capacity=queue_capacity, name=name) + self.assertAllEqual((None,), keys.get_shape().as_list()) + self.assertAllEqual((None,), inputs.get_shape().as_list()) session.run(tf.initialize_local_variables()) coord = tf.train.Coordinator() @@ -418,7 +431,7 @@ class GraphIOTest(tf.test.TestCase): with tf.Graph().as_default() as g1, tf.Session( server.target, graph=g1) as session: - _, inputs = _read_keyed_batch_examples_shared_queue( + keys, inputs = _read_keyed_batch_examples_shared_queue( filenames, batch_size, reader=tf.TextLineReader, @@ -426,6 +439,8 @@ class GraphIOTest(tf.test.TestCase): num_epochs=1, queue_capacity=queue_capacity, name=name) + self.assertAllEqual((None,), keys.get_shape().as_list()) + self.assertAllEqual((None,), inputs.get_shape().as_list()) session.run(tf.initialize_local_variables()) # Run the three queues once manually. @@ -443,7 +458,7 @@ class GraphIOTest(tf.test.TestCase): with tf.Graph().as_default() as g2, tf.Session( server.target, graph=g2) as session: - _, inputs = _read_keyed_batch_examples_shared_queue( + keys, inputs = _read_keyed_batch_examples_shared_queue( filenames, batch_size, reader=tf.TextLineReader, @@ -451,6 +466,8 @@ class GraphIOTest(tf.test.TestCase): num_epochs=1, queue_capacity=queue_capacity, name=name) + self.assertAllEqual((None,), keys.get_shape().as_list()) + self.assertAllEqual((None,), inputs.get_shape().as_list()) # Run the worker and the example queue. self._run_queue(worker_file_name_queue_name, session) @@ -473,6 +490,7 @@ class GraphIOTest(tf.test.TestCase): [filename], batch_size, reader=tf.TextLineReader, randomize_input=False, num_epochs=1, queue_capacity=queue_capacity, read_batch_size=10, name=name) + self.assertAllEqual((None,), inputs.get_shape().as_list()) session.run(tf.initialize_local_variables()) coord = tf.train.Coordinator() @@ -499,6 +517,8 @@ class GraphIOTest(tf.test.TestCase): filename, batch_size, reader=tf.TextLineReader, randomize_input=False, num_epochs=1, queue_capacity=queue_capacity, name=name) + self.assertAllEqual((None,), keys.get_shape().as_list()) + self.assertAllEqual((None,), inputs.get_shape().as_list()) session.run(tf.initialize_local_variables()) coord = tf.train.Coordinator() @@ -537,6 +557,9 @@ class GraphIOTest(tf.test.TestCase): reader=tf.TextLineReader, randomize_input=False, num_epochs=1, queue_capacity=queue_capacity, parse_fn=parse_fn, name=name) + self.assertAllEqual((None,), keys.get_shape().as_list()) + self.assertEqual(1, len(inputs)) + self.assertAllEqual((None, 1), inputs["age"].get_shape().as_list()) session.run(tf.initialize_local_variables()) coord = tf.train.Coordinator() diff --git a/tensorflow/contrib/learn/python/learn/utils/export.py b/tensorflow/contrib/learn/python/learn/utils/export.py index 5313dd3a4ea..4dbd23b5f6a 100644 --- a/tensorflow/contrib/learn/python/learn/utils/export.py +++ b/tensorflow/contrib/learn/python/learn/utils/export.py @@ -24,6 +24,7 @@ from tensorflow.contrib.framework import deprecated_arg_values from tensorflow.contrib.framework.python.ops import variables as contrib_variables from tensorflow.contrib.session_bundle import exporter from tensorflow.contrib.session_bundle import gc +from tensorflow.core.protobuf import saver_pb2 from tensorflow.python.client import session as tf_session from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -53,7 +54,7 @@ def _get_saver(): else: saver = None if saver is None and variables.all_variables(): - saver = tf_saver.Saver() + saver = tf_saver.Saver(write_version=saver_pb2.SaverDef.V1) ops.add_to_collection(ops.GraphKeys.SAVERS, saver) return saver diff --git a/tensorflow/contrib/makefile/tf_op_files.txt b/tensorflow/contrib/makefile/tf_op_files.txt index d7a4d8873c9..ed5d6539b3b 100644 --- a/tensorflow/contrib/makefile/tf_op_files.txt +++ b/tensorflow/contrib/makefile/tf_op_files.txt @@ -21,6 +21,7 @@ tensorflow/core/kernels/strided_slice_op_inst_4.cc tensorflow/core/kernels/strided_slice_op_inst_3.cc tensorflow/core/kernels/strided_slice_op_inst_2.cc tensorflow/core/kernels/strided_slice_op_inst_1.cc +tensorflow/core/kernels/strided_slice_op_inst_0.cc tensorflow/core/kernels/strided_slice_op.cc tensorflow/core/kernels/stack_ops.cc tensorflow/core/kernels/split_op.cc @@ -142,6 +143,7 @@ tensorflow/core/kernels/avgpooling_op.cc tensorflow/core/kernels/argmax_op.cc tensorflow/core/kernels/aggregate_ops.cc tensorflow/core/kernels/dequantize_op.cc +tensorflow/core/kernels/meta_support.cc tensorflow/core/kernels/quantization_utils.cc tensorflow/core/kernels/quantize_down_and_shrink_range.cc tensorflow/core/kernels/quantize_op.cc @@ -153,6 +155,7 @@ tensorflow/core/kernels/quantized_conv_ops.cc tensorflow/core/kernels/quantized_matmul_op.cc tensorflow/core/kernels/quantized_pooling_ops.cc tensorflow/core/kernels/quantized_reshape_op.cc +tensorflow/core/kernels/requantization_range_op.cc tensorflow/core/kernels/requantize.cc tensorflow/core/ops/training_ops.cc tensorflow/core/ops/string_ops.cc diff --git a/tensorflow/contrib/metrics/__init__.py b/tensorflow/contrib/metrics/__init__.py index a0b7b1ccfff..fc98a8d3df4 100644 --- a/tensorflow/contrib/metrics/__init__.py +++ b/tensorflow/contrib/metrics/__init__.py @@ -95,11 +95,6 @@ Certain metrics, such as streaming_mean or streaming_accuracy, can be weighted via a `weights` argument. The `weights` tensor must be the same size as the labels and predictions tensors and results in a weighted average of the metric. -Other metrics, such as streaming_recall, streaming_precision, and streaming_auc, -are not well defined with regard to weighted samples. However, a binary -`ignore_mask` argument can be used to ignore certain values at graph executation -time. - ## Metric `Ops` @@streaming_accuracy diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py index a15783149f4..c7d20613713 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py @@ -23,7 +23,6 @@ from __future__ import division from __future__ import print_function from tensorflow.contrib.framework import deprecated -from tensorflow.contrib.framework import deprecated_args from tensorflow.contrib.framework import tensor_util from tensorflow.contrib.framework.python.ops import variables as contrib_variables from tensorflow.contrib.metrics.python.ops import confusion_matrix_ops @@ -41,40 +40,6 @@ from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables -IGNORE_MASK_DATE = '2016-10-19' -IGNORE_MASK_INSTRUCTIONS = ( - '`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 ' - 'and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`.') - - -def _mask_weights(mask=None, weights=None): - """Mask a given set of weights. - - Elements are included when the corresponding `mask` element is `False`, and - excluded otherwise. - - Args: - mask: An optional, `bool` `Tensor`. - weights: An optional `Tensor` whose shape matches `mask` if `mask` is not - `None`. - - Returns: - Masked weights if `mask` and `weights` are not `None`, weights equivalent to - `mask` if `weights` is `None`, and otherwise `weights`. - - Raises: - ValueError: If `weights` and `mask` are not `None` and have mismatched - shapes. - """ - if mask is not None: - check_ops.assert_type(mask, dtypes.bool) - if weights is None: - weights = array_ops.ones_like(mask, dtype=dtypes.float32) - weights = math_ops.cast(math_ops.logical_not(mask), weights.dtype) * weights - - return weights - - def _safe_div(numerator, denominator, name): """Divides two values, returning 0 if the denominator is <= 0. @@ -516,8 +481,7 @@ def streaming_accuracy(predictions, labels, weights=None, updates_collections, name or 'accuracy') -@deprecated_args(IGNORE_MASK_DATE, IGNORE_MASK_INSTRUCTIONS, 'ignore_mask') -def streaming_precision(predictions, labels, ignore_mask=None, weights=None, +def streaming_precision(predictions, labels, weights=None, metrics_collections=None, updates_collections=None, name=None): """Computes the precision of the predictions with respect to the labels. @@ -534,14 +498,11 @@ def streaming_precision(predictions, labels, ignore_mask=None, weights=None, `weights`. If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - Alternatively, if `ignore_mask` is not `None`, then mask values where - `ignore_mask` is `True`. Args: predictions: The predicted values, a `bool` `Tensor` of arbitrary shape. labels: The ground truth values, a `bool` `Tensor` whose dimensions must match `predictions`. - ignore_mask: An optional, `bool` `Tensor` whose shape matches `predictions`. weights: An optional `Tensor` whose shape is broadcastable to `predictions`. metrics_collections: An optional list of collections that `precision` should be added to. @@ -558,9 +519,8 @@ def streaming_precision(predictions, labels, ignore_mask=None, weights=None, Raises: ValueError: If `predictions` and `labels` have mismatched shapes, or if - `ignore_mask` is not `None` and its shape doesn't match `predictions`, or - if `weights` is not `None` and its shape doesn't match `predictions`, or - if either `metrics_collections` or `updates_collections` are not a list or + `weights` is not `None` and its shape doesn't match `predictions`, or if + either `metrics_collections` or `updates_collections` are not a list or tuple. """ with variable_scope.variable_scope( @@ -570,7 +530,6 @@ def streaming_precision(predictions, labels, ignore_mask=None, weights=None, predictions, labels, weights) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) - weights = _mask_weights(ignore_mask, weights) true_positives, true_positives_update_op = _streaming_true_positives( predictions, labels, weights, metrics_collections=None, updates_collections=None, name=None) @@ -599,8 +558,7 @@ def streaming_precision(predictions, labels, ignore_mask=None, weights=None, return precision, update_op -@deprecated_args(IGNORE_MASK_DATE, IGNORE_MASK_INSTRUCTIONS, 'ignore_mask') -def streaming_recall(predictions, labels, ignore_mask=None, weights=None, +def streaming_recall(predictions, labels, weights=None, metrics_collections=None, updates_collections=None, name=None): """Computes the recall of the predictions with respect to the labels. @@ -615,14 +573,11 @@ def streaming_recall(predictions, labels, ignore_mask=None, weights=None, weights each prediction by the corresponding value in `weights`. If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - Alternatively, if `ignore_mask` is not `None`, then mask values where - `ignore_mask` is `True`. Args: predictions: The predicted values, a `bool` `Tensor` of arbitrary shape. labels: The ground truth values, a `bool` `Tensor` whose dimensions must match `predictions`. - ignore_mask: An optional, `bool` `Tensor` whose shape matches `predictions`. weights: An optional `Tensor` whose shape is broadcastable to `predictions`. metrics_collections: An optional list of collections that `recall` should be added to. @@ -639,9 +594,8 @@ def streaming_recall(predictions, labels, ignore_mask=None, weights=None, Raises: ValueError: If `predictions` and `labels` have mismatched shapes, or if - `ignore_mask` is not `None` and its shape doesn't match `predictions`, or - if `weights` is not `None` and its shape doesn't match `predictions`, or - if either `metrics_collections` or `updates_collections` are not a list or + `weights` is not `None` and its shape doesn't match `predictions`, or if + either `metrics_collections` or `updates_collections` are not a list or tuple. """ with variable_scope.variable_scope(name, 'recall', [predictions, labels]): @@ -649,7 +603,6 @@ def streaming_recall(predictions, labels, ignore_mask=None, weights=None, predictions, labels, weights) predictions.get_shape().assert_is_compatible_with(labels.get_shape()) - weights = _mask_weights(ignore_mask, weights) true_positives, true_positives_update_op = _streaming_true_positives( predictions, labels, weights, metrics_collections=None, updates_collections=None, name=None) @@ -1235,10 +1188,9 @@ def _at_k_name(name, k=None, class_id=None): @deprecated('2016-11-08', 'Please use `streaming_sparse_recall_at_k`, ' 'and reshape labels from [batch_size] to [batch_size, 1].') -@deprecated_args(IGNORE_MASK_DATE, IGNORE_MASK_INSTRUCTIONS, 'ignore_mask') -def streaming_recall_at_k(predictions, labels, k, ignore_mask=None, - weights=None, metrics_collections=None, - updates_collections=None, name=None): +def streaming_recall_at_k(predictions, labels, k, weights=None, + metrics_collections=None, updates_collections=None, + name=None): """Computes the recall@k of the predictions with respect to dense labels. The `streaming_recall_at_k` function creates two local variables, `total` and @@ -1255,15 +1207,12 @@ def streaming_recall_at_k(predictions, labels, k, ignore_mask=None, increments `count` with the reduced sum of `weights`. If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - Alternatively, if `ignore_mask` is not `None`, then mask values where - `ignore_mask` is `True`. Args: predictions: A floating point tensor of dimension [batch_size, num_classes] labels: A tensor of dimension [batch_size] whose type is in `int32`, `int64`. k: The number of top elements to look at for computing recall. - ignore_mask: An optional, `bool` `Tensor` whose shape matches `predictions`. weights: An optional `Tensor` whose shape is broadcastable to `predictions`. metrics_collections: An optional list of collections that `recall_at_k` should be added to. @@ -1279,26 +1228,23 @@ def streaming_recall_at_k(predictions, labels, k, ignore_mask=None, Raises: ValueError: If `predictions` and `labels` have mismatched shapes, or if - `ignore_mask` is not `None` and its shape doesn't match `predictions`, or - if `weights` is not `None` and its shape doesn't match `predictions`, or - if either `metrics_collections` or `updates_collections` are not a list or + `weights` is not `None` and its shape doesn't match `predictions`, or if + either `metrics_collections` or `updates_collections` are not a list or tuple. """ in_top_k = math_ops.to_float(nn.in_top_k(predictions, labels, k)) return streaming_mean(in_top_k, - _mask_weights(ignore_mask, weights), + weights, metrics_collections, updates_collections, name or _at_k_name('recall', k)) # TODO(ptucker): Validate range of values in labels? -@deprecated_args(IGNORE_MASK_DATE, IGNORE_MASK_INSTRUCTIONS, 'ignore_mask') def streaming_sparse_recall_at_k(predictions, labels, k, class_id=None, - ignore_mask=None, weights=None, metrics_collections=None, updates_collections=None, @@ -1328,8 +1274,6 @@ def streaming_sparse_recall_at_k(predictions, `false_negative_at_` using these values. If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - Alternatively, if `ignore_mask` is not `None`, then mask values where - `ignore_mask` is `True`. Args: predictions: Float `Tensor` with shape [D1, ... DN, num_classes] where @@ -1347,8 +1291,6 @@ def streaming_sparse_recall_at_k(predictions, class_id: Integer class ID for which we want binary metrics. This should be in range [0, num_classes), where num_classes is the last dimension of `predictions`. If class_id is outside this range, the method returns NAN. - ignore_mask: An optional, `bool` `Tensor` whose shape is broadcastable to - the the first [D1, ... DN] dimensions of `predictions` and `labels`. weights: An optional `Tensor` whose shape is broadcastable to the the first [D1, ... DN] dimensions of `predictions` and `labels`. metrics_collections: An optional list of collections that values should @@ -1365,16 +1307,14 @@ def streaming_sparse_recall_at_k(predictions, `recall`. Raises: - ValueError: If `ignore_mask` is not `None` and its shape doesn't match - `predictions`, or if `weights` is not `None` and its shape doesn't match - `predictions`, or if either `metrics_collections` or `updates_collections` - are not a list or tuple. + ValueError: If `weights` is not `None` and its shape doesn't match + `predictions`, or if either `metrics_collections` or `updates_collections` + are not a list or tuple. """ default_name = _at_k_name('recall', k, class_id=class_id) with ops.name_scope(name, default_name, (predictions, labels)) as scope: _, top_k_idx = nn.top_k(predictions, k) top_k_idx = math_ops.to_int64(top_k_idx) - weights = _mask_weights(ignore_mask, weights) tp, tp_update = _streaming_sparse_true_positive_at_k( predictions_idx=top_k_idx, labels=labels, k=k, class_id=class_id, weights=weights) @@ -1396,7 +1336,6 @@ def _streaming_sparse_precision_at_k(top_k_idx, labels, k=None, class_id=None, - ignore_mask=None, weights=None, metrics_collections=None, updates_collections=None, @@ -1423,8 +1362,6 @@ def _streaming_sparse_precision_at_k(top_k_idx, in range [0, num_classes), where num_classes is the last dimension of `predictions`. If `class_id` is outside this range, the method returns NAN. - ignore_mask: An optional, `bool` `Tensor` whose shape is broadcastable to - the the first [D1, ... DN] dimensions of `predictions` and `labels`. weights: An optional `Tensor` whose shape is broadcastable to the the first [D1, ... DN] dimensions of `predictions` and `labels`. metrics_collections: An optional list of collections that values should @@ -1441,13 +1378,11 @@ def _streaming_sparse_precision_at_k(top_k_idx, `precision`. Raises: - ValueError: If `ignore_mask` is not `None` and its shape doesn't match - `predictions`, or if `weights` is not `None` and its shape doesn't match + ValueError: If `weights` is not `None` and its shape doesn't match `predictions`, or if either `metrics_collections` or `updates_collections` are not a list or tuple. """ top_k_idx = math_ops.to_int64(top_k_idx) - weights = _mask_weights(ignore_mask, weights) tp, tp_update = _streaming_sparse_true_positive_at_k( predictions_idx=top_k_idx, labels=labels, k=k, class_id=class_id, weights=weights) @@ -1466,12 +1401,10 @@ def _streaming_sparse_precision_at_k(top_k_idx, # TODO(ptucker): Validate range of values in labels? -@deprecated_args(IGNORE_MASK_DATE, IGNORE_MASK_INSTRUCTIONS, 'ignore_mask') def streaming_sparse_precision_at_k(predictions, labels, k, class_id=None, - ignore_mask=None, weights=None, metrics_collections=None, updates_collections=None, @@ -1502,8 +1435,6 @@ def streaming_sparse_precision_at_k(predictions, `false_positive_at_` using these values. If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - Alternatively, if `ignore_mask` is not `None`, then mask values where - `ignore_mask` is `True`. Args: predictions: Float `Tensor` with shape [D1, ... DN, num_classes] where @@ -1522,8 +1453,6 @@ def streaming_sparse_precision_at_k(predictions, in range [0, num_classes], where num_classes is the last dimension of `predictions`. If `class_id` is outside this range, the method returns NAN. - ignore_mask: An optional, `bool` `Tensor` whose shape is broadcastable to - the the first [D1, ... DN] dimensions of `predictions` and `labels`. weights: An optional `Tensor` whose shape is broadcastable to the the first [D1, ... DN] dimensions of `predictions` and `labels`. metrics_collections: An optional list of collections that values should @@ -1540,21 +1469,19 @@ def streaming_sparse_precision_at_k(predictions, `precision`. Raises: - ValueError: If `ignore_mask` is not `None` and its shape doesn't match - `predictions`, or if `weights` is not `None` and its shape doesn't match + ValueError: If `weights` is not `None` and its shape doesn't match `predictions`, or if either `metrics_collections` or `updates_collections` are not a list or tuple. """ default_name = _at_k_name('precision', k, class_id=class_id) with ops.name_scope(name, default_name, - (predictions, labels, ignore_mask, weights)) as scope: + (predictions, labels, weights)) as scope: _, top_k_idx = nn.top_k(predictions, k) return _streaming_sparse_precision_at_k( top_k_idx=top_k_idx, labels=labels, k=k, class_id=class_id, - ignore_mask=ignore_mask, weights=weights, metrics_collections=metrics_collections, updates_collections=updates_collections, @@ -1562,11 +1489,9 @@ def streaming_sparse_precision_at_k(predictions, # TODO(ptucker): Validate range of values in labels? -@deprecated_args(IGNORE_MASK_DATE, IGNORE_MASK_INSTRUCTIONS, 'ignore_mask') def streaming_sparse_precision_at_top_k(top_k_predictions, labels, class_id=None, - ignore_mask=None, weights=None, metrics_collections=None, updates_collections=None, @@ -1595,8 +1520,6 @@ def streaming_sparse_precision_at_top_k(top_k_predictions, `false_positive_at_k` using these values. If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - Alternatively, if `ignore_mask` is not `None`, then mask values where - `ignore_mask` is `True`. Args: top_k_predictions: Integer `Tensor` with shape [D1, ... DN, k] where @@ -1614,8 +1537,6 @@ def streaming_sparse_precision_at_top_k(top_k_predictions, in range [0, num_classes), where num_classes is the last dimension of `predictions`. If `class_id` is outside this range, the method returns NAN. - ignore_mask: An optional, `bool` `Tensor` whose shape is broadcastable to - the the first [D1, ... DN] dimensions of `predictions` and `labels`. weights: An optional `Tensor` whose shape is broadcastable to the the first [D1, ... DN] dimensions of `predictions` and `labels`. metrics_collections: An optional list of collections that values should @@ -1632,8 +1553,7 @@ def streaming_sparse_precision_at_top_k(top_k_predictions, `precision`. Raises: - ValueError: If `ignore_mask` is not `None` and its shape doesn't match - `predictions`, or if `weights` is not `None` and its shape doesn't match + ValueError: If `weights` is not `None` and its shape doesn't match `predictions`, or if either `metrics_collections` or `updates_collections` are not a list or tuple. ValueError: If `top_k_predictions` has rank < 2. @@ -1641,7 +1561,7 @@ def streaming_sparse_precision_at_top_k(top_k_predictions, default_name = _at_k_name('precision', class_id=class_id) with ops.name_scope( name, default_name, - (top_k_predictions, labels, ignore_mask, weights)) as scope: + (top_k_predictions, labels, weights)) as scope: rank = array_ops.rank(top_k_predictions) check_rank_op = control_flow_ops.Assert( math_ops.greater_equal(rank, 2), @@ -1651,7 +1571,6 @@ def streaming_sparse_precision_at_top_k(top_k_predictions, top_k_idx=top_k_predictions, labels=labels, class_id=class_id, - ignore_mask=ignore_mask, weights=weights, metrics_collections=metrics_collections, updates_collections=updates_collections, @@ -2760,8 +2679,7 @@ def streaming_mean_cosine_distance(predictions, labels, dim, weights=None, return mean_distance, update_op -@deprecated_args(IGNORE_MASK_DATE, IGNORE_MASK_INSTRUCTIONS, 'ignore_mask') -def streaming_percentage_less(values, threshold, ignore_mask=None, weights=None, +def streaming_percentage_less(values, threshold, weights=None, metrics_collections=None, updates_collections=None, name=None): @@ -2778,13 +2696,10 @@ def streaming_percentage_less(values, threshold, ignore_mask=None, weights=None, `percentage`. If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - Alternatively, if `ignore_mask` is not `None`, then mask values where - `ignore_mask` is `True`. Args: values: A numeric `Tensor` of arbitrary size. threshold: A scalar threshold. - ignore_mask: An optional, `bool` `Tensor` whose shape matches `values`. weights: An optional `Tensor` whose shape is broadcastable to `values`. metrics_collections: An optional list of collections that the metric value variable should be added to. @@ -2799,23 +2714,21 @@ def streaming_percentage_less(values, threshold, ignore_mask=None, weights=None, appropriately. Raises: - ValueError: If `ignore_mask` is not `None` and its shape doesn't match - `values`, or if `weights` is not `None` and its shape doesn't match - `values`, or if either `metrics_collections` or `updates_collections` are - not a list or tuple. + ValueError: If `weights` is not `None` and its shape doesn't match `values`, + or if either `metrics_collections` or `updates_collections` are not a list + or tuple. """ is_below_threshold = math_ops.to_float(math_ops.less(values, threshold)) - return streaming_mean(is_below_threshold, _mask_weights(ignore_mask, weights), + return streaming_mean(is_below_threshold, + weights, metrics_collections, updates_collections, name or 'percentage_below_threshold') -@deprecated_args(IGNORE_MASK_DATE, IGNORE_MASK_INSTRUCTIONS, 'ignore_mask') def streaming_mean_iou(predictions, labels, num_classes, - ignore_mask=None, weights=None, metrics_collections=None, updates_collections=None, @@ -2834,8 +2747,6 @@ def streaming_mean_iou(predictions, `update_op` operation that updates these variables and returns the `mean_iou`. If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - Alternatively, if `ignore_mask` is not `None`, then mask values where - `ignore_mask` is `True`. Args: predictions: A tensor of prediction results for semantic labels, whose @@ -2846,7 +2757,6 @@ def streaming_mean_iou(predictions, num_classes: The possible number of labels the prediction task can have. This value must be provided, since a confusion matrix of dimension = [num_classes, num_classes] will be allocated. - ignore_mask: An optional, `bool` `Tensor` whose shape matches `predictions`. weights: An optional `Tensor` whose shape is broadcastable to `predictions`. metrics_collections: An optional list of collections that `mean_iou` should be added to. @@ -2860,9 +2770,8 @@ def streaming_mean_iou(predictions, Raises: ValueError: If `predictions` and `labels` have mismatched shapes, or if - `ignore_mask` is not `None` and its shape doesn't match `predictions`, or - if `weights` is not `None` and its shape doesn't match `predictions`, or - if either `metrics_collections` or `updates_collections` are not a list or + `weights` is not `None` and its shape doesn't match `predictions`, or if + either `metrics_collections` or `updates_collections` are not a list or tuple. """ with variable_scope.variable_scope(name, 'mean_iou', [predictions, labels]): @@ -2888,7 +2797,6 @@ def streaming_mean_iou(predictions, if labels_rank > 1: labels = array_ops.reshape(labels, [-1]) - weights = _mask_weights(ignore_mask, weights) if weights is not None: weights_rank = weights.get_shape().ndims if weights_rank > 1: diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py index c64ce86f2fe..9e56453d227 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py @@ -671,18 +671,6 @@ class StreamingPrecisionTest(tf.test.TestCase): self.assertAlmostEqual(0.5, update_op.eval()) self.assertAlmostEqual(0.5, precision.eval()) - def testMasked(self): - predictions = tf.constant([1, 0, 1, 0, 1], shape=(1, 5)) - labels = tf.constant([0, 1, 1, 0, 1], shape=(1, 5)) - mask = tf.constant([False, False, False, False, True], shape=(1, 5)) - precision, update_op = metrics.streaming_precision( - predictions, labels, ignore_mask=mask) - - with self.test_session() as sess: - sess.run(tf.initialize_local_variables()) - self.assertAlmostEqual(0.5, update_op.eval()) - self.assertAlmostEqual(0.5, precision.eval()) - def testWeighted1d(self): predictions = tf.constant([[1, 0, 1, 0], [1, 0, 1, 0]]) labels = tf.constant([[0, 1, 1, 0], [1, 0, 0, 1]]) @@ -838,18 +826,6 @@ class StreamingRecallTest(tf.test.TestCase): self.assertAlmostEqual(0.5, update_op.eval()) self.assertAlmostEqual(0.5, recall.eval()) - def testMasked(self): - predictions = tf.constant([1, 0, 1, 0, 1], shape=(1, 5)) - labels = tf.constant([0, 1, 1, 0, 1], shape=(1, 5)) - mask = tf.constant([False, False, False, False, True], shape=(1, 5)) - recall, update_op = metrics.streaming_recall( - predictions, labels, ignore_mask=mask) - - with self.test_session() as sess: - sess.run(tf.initialize_local_variables()) - self.assertAlmostEqual(0.5, update_op.eval()) - self.assertAlmostEqual(0.5, recall.eval()) - def testWeighted1d(self): predictions = tf.constant([[1, 0, 1, 0], [0, 1, 0, 1]]) labels = tf.constant([[0, 1, 1, 0], [1, 0, 0, 1]]) @@ -1737,15 +1713,13 @@ class StreamingRecallAtKTest(tf.test.TestCase): dtype=tf.float32) labels = tf.constant( self._np_labels, shape=(self._batch_size,), dtype=tf.int64) - weights = tf.constant([0, 1, 1, 1], shape=(self._batch_size,), + weights = tf.constant([0, 1, 0, 1], shape=(self._batch_size,), dtype=tf.float32) - mask = tf.constant([False, False, True, False], shape=(self._batch_size,), - dtype=tf.bool) recall, update_op = metrics.streaming_recall_at_k( - predictions, labels, k=2, ignore_mask=mask, weights=weights) + predictions, labels, k=2, weights=weights) sp_recall, sp_update_op = metrics.streaming_sparse_recall_at_k( predictions, tf.reshape(labels, (self._batch_size, 1)), k=2, - ignore_mask=mask, weights=weights) + weights=weights) with self.test_session() as sess: sess.run(tf.initialize_local_variables()) @@ -1763,16 +1737,13 @@ class StreamingSparsePrecisionTest(tf.test.TestCase): k, expected, class_id=None, - ignore_mask=None, weights=None): with tf.Graph().as_default() as g, self.test_session(g): - if ignore_mask is not None: - ignore_mask = tf.constant(ignore_mask, tf.bool) if weights is not None: weights = tf.constant(weights, tf.float32) metric, update = metrics.streaming_sparse_precision_at_k( predictions=tf.constant(predictions, tf.float32), labels=labels, - k=k, class_id=class_id, ignore_mask=ignore_mask, weights=weights) + k=k, class_id=class_id, weights=weights) # Fails without initialized vars. self.assertRaises(tf.OpError, metric.eval) @@ -1792,17 +1763,13 @@ class StreamingSparsePrecisionTest(tf.test.TestCase): labels, expected, class_id=None, - ignore_mask=None, weights=None): with tf.Graph().as_default() as g, self.test_session(g): - if ignore_mask is not None: - ignore_mask = tf.constant(ignore_mask, tf.bool) if weights is not None: weights = tf.constant(weights, tf.float32) metric, update = metrics.streaming_sparse_precision_at_top_k( top_k_predictions=tf.constant(top_k_predictions, tf.int32), - labels=labels, class_id=class_id, ignore_mask=ignore_mask, - weights=weights) + labels=labels, class_id=class_id, weights=weights) # Fails without initialized vars. self.assertRaises(tf.OpError, metric.eval) @@ -1821,11 +1788,8 @@ class StreamingSparsePrecisionTest(tf.test.TestCase): predictions, labels, k, - expected, - ignore_mask=None): + expected): with tf.Graph().as_default() as g, self.test_session(g): - if ignore_mask is not None: - ignore_mask = tf.constant(ignore_mask, tf.bool) predictions = tf.constant(predictions, tf.float32) metric = metric_ops.sparse_average_precision_at_k( predictions, labels, k) @@ -2305,11 +2269,9 @@ class StreamingSparsePrecisionTest(tf.test.TestCase): top_k_predictions, labels, expected=NAN, class_id=class_id, weights=[[0, 0], [0, 0]]) self._test_streaming_sparse_precision_at_k( - predictions, labels, k=5, expected=NAN, ignore_mask=[[False], [True]], - weights=[[0], [1]]) + predictions, labels, k=5, expected=NAN, weights=[[0], [0]]) self._test_streaming_sparse_precision_at_top_k( - top_k_predictions, labels, expected=NAN, - ignore_mask=[[False], [True]], weights=[[0], [1]]) + top_k_predictions, labels, expected=NAN, weights=[[0], [0]]) self._test_streaming_sparse_precision_at_k( predictions, labels, k=5, expected=NAN, weights=[[0, 0], [0, 0]]) self._test_streaming_sparse_precision_at_top_k( @@ -2342,34 +2304,34 @@ class StreamingSparsePrecisionTest(tf.test.TestCase): # Class 2: 2 predictions, both correct. self._test_streaming_sparse_precision_at_k( predictions, labels, k=5, expected=2.0 / 2.0, class_id=2, - ignore_mask=[[False], [False]], weights=[[1], [0]]) + weights=[[1], [0]]) self._test_streaming_sparse_precision_at_top_k( top_k_predictions, labels, expected=2.0 / 2.0, class_id=2, - ignore_mask=[[False], [False]], weights=[[1], [0]]) + weights=[[1], [0]]) # Class 2: 2 predictions, both correct. self._test_streaming_sparse_precision_at_k( predictions, labels, k=5, expected=2.0 / 2.0, class_id=2, - ignore_mask=[[False], [False]], weights=[[0], [1]]) + weights=[[0], [1]]) self._test_streaming_sparse_precision_at_top_k( top_k_predictions, labels, expected=2.0 / 2.0, class_id=2, - ignore_mask=[[False], [False]], weights=[[0], [1]]) + weights=[[0], [1]]) # Class 7: 1 incorrect prediction. self._test_streaming_sparse_precision_at_k( predictions, labels, k=5, expected=0.0 / 1.0, class_id=7, - ignore_mask=[[False], [True]], weights=[[1], [1]]) + weights=[[1], [0]]) self._test_streaming_sparse_precision_at_top_k( top_k_predictions, labels, expected=0.0 / 1.0, class_id=7, - ignore_mask=[[False], [True]], weights=[[1], [1]]) + weights=[[1], [0]]) # Class 7: 1 correct prediction. self._test_streaming_sparse_precision_at_k( predictions, labels, k=5, expected=1.0 / 1.0, class_id=7, - ignore_mask=[[True], [False]], weights=[[1], [1]]) + weights=[[0], [1]]) self._test_streaming_sparse_precision_at_top_k( top_k_predictions, labels, expected=1.0 / 1.0, class_id=7, - ignore_mask=[[True], [False]], weights=[[1], [1]]) + weights=[[0], [1]]) # Class 7: no predictions. self._test_streaming_sparse_precision_at_k( @@ -2409,17 +2371,13 @@ class StreamingSparseRecallTest(tf.test.TestCase): k, expected, class_id=None, - ignore_mask=None, weights=None): with tf.Graph().as_default() as g, self.test_session(g): - if ignore_mask is not None: - ignore_mask = tf.constant(ignore_mask, tf.bool) if weights is not None: weights = tf.constant(weights, tf.float32) metric, update = metrics.streaming_sparse_recall_at_k( predictions=tf.constant(predictions, tf.float32), - labels=labels, k=k, class_id=class_id, ignore_mask=ignore_mask, - weights=weights) + labels=labels, k=k, class_id=class_id, weights=weights) # Fails without initialized vars. self.assertRaises(tf.OpError, metric.eval) @@ -2740,8 +2698,7 @@ class StreamingSparseRecallTest(tf.test.TestCase): predictions, labels, k=5, expected=NAN, class_id=class_id, weights=[[0, 0], [0, 0]]) self._test_streaming_sparse_recall_at_k( - predictions, labels, k=5, expected=NAN, ignore_mask=[[False], [True]], - weights=[[0], [1]]) + predictions, labels, k=5, expected=NAN, weights=[[0], [0]]) self._test_streaming_sparse_recall_at_k( predictions, labels, k=5, expected=NAN, weights=[[0, 0], [0, 0]]) @@ -2764,22 +2721,22 @@ class StreamingSparseRecallTest(tf.test.TestCase): # Class 2: 2 labels, both correct. self._test_streaming_sparse_recall_at_k( predictions, labels, k=5, expected=2.0 / 2.0, class_id=2, - ignore_mask=[[False], [False]], weights=[[1], [0]]) + weights=[[1], [0]]) # Class 2: 2 labels, both correct. self._test_streaming_sparse_recall_at_k( predictions, labels, k=5, expected=2.0 / 2.0, class_id=2, - ignore_mask=[[False], [False]], weights=[[0], [1]]) + weights=[[0], [1]]) # Class 7: 1 label, correct. self._test_streaming_sparse_recall_at_k( predictions, labels, k=5, expected=1.0 / 1.0, class_id=7, - ignore_mask=[[True], [False]], weights=[[1], [1]]) + weights=[[0], [1]]) # Class 7: 1 label, incorrect. self._test_streaming_sparse_recall_at_k( predictions, labels, k=5, expected=0.0 / 1.0, class_id=7, - ignore_mask=[[False], [True]], weights=[[1], [1]]) + weights=[[1], [0]]) # Class 7: 2 labels, 1 correct. self._test_streaming_sparse_recall_at_k( @@ -3660,16 +3617,14 @@ class PcntBelowThreshTest(tf.test.TestCase): def testSomePresentOneUpdate(self): with self.test_session() as sess: values = tf.constant([2, 4, 6, 8], shape=(1, 4), dtype=tf.float32) - mask = tf.constant([False, True, False, False], shape=(1, 4), - dtype=tf.bool) - weights = tf.constant([1, 1, 0, 1], shape=(1, 4), dtype=tf.float32) + weights = tf.constant([1, 0, 0, 1], shape=(1, 4), dtype=tf.float32) pcnt0, update_op0 = metrics.streaming_percentage_less( - values, 100, ignore_mask=mask, weights=weights, name='high') + values, 100, weights=weights, name='high') pcnt1, update_op1 = metrics.streaming_percentage_less( - values, 7, ignore_mask=mask, weights=weights, name='medium') + values, 7, weights=weights, name='medium') pcnt2, update_op2 = metrics.streaming_percentage_less( - values, 1, ignore_mask=mask, weights=weights, name='low') + values, 1, weights=weights, name='low') sess.run(tf.initialize_local_variables()) self.assertListEqual([1.0, 0.5, 0.0], @@ -3712,22 +3667,6 @@ class StreamingMeanIOUTest(tf.test.TestCase): metrics.streaming_mean_iou( predictions, labels, num_classes=2) - def testLabelsAndIgnoreMaskOfDifferentSizeRaisesValueError(self): - predictions = tf.ones([10]) - labels = tf.ones([10]) - ignore_mask = tf.cast(tf.ones([9]), tf.bool) - with self.assertRaises(ValueError): - metrics.streaming_mean_iou( - predictions, labels, num_classes=2, ignore_mask=ignore_mask) - - def testIgnoreMaskIsNotBooleanRaisesTypeError(self): - predictions = tf.ones([10]) - labels = tf.ones([10]) - ignore_mask = tf.ones([10]) - with self.assertRaises(TypeError): - metrics.streaming_mean_iou( - predictions, labels, num_classes=2, ignore_mask=ignore_mask) - def testLabelsAndWeightsOfDifferentSizeRaisesValueError(self): predictions = tf.ones([10]) labels = tf.ones([10]) @@ -3810,29 +3749,18 @@ class StreamingMeanIOUTest(tf.test.TestCase): _enqueue_vector(sess, labels_queue, [1]) labels = labels_queue.dequeue() - # Create the queue that populates the ignore_masks. - ignore_masks_queue = tf.FIFOQueue(6, dtypes=tf.bool, shapes=(1, 1)) - _enqueue_vector(sess, ignore_masks_queue, [False]) - _enqueue_vector(sess, ignore_masks_queue, [False]) - _enqueue_vector(sess, ignore_masks_queue, [False]) - _enqueue_vector(sess, ignore_masks_queue, [True]) - _enqueue_vector(sess, ignore_masks_queue, [False]) - _enqueue_vector(sess, ignore_masks_queue, [False]) - ignore_mask = ignore_masks_queue.dequeue() - # Create the queue that populates the weights. weights_queue = tf.FIFOQueue(6, dtypes=tf.float32, shapes=(1, 1)) _enqueue_vector(sess, weights_queue, [1.0]) _enqueue_vector(sess, weights_queue, [1.0]) _enqueue_vector(sess, weights_queue, [1.0]) - _enqueue_vector(sess, weights_queue, [1.0]) + _enqueue_vector(sess, weights_queue, [0.0]) _enqueue_vector(sess, weights_queue, [1.0]) _enqueue_vector(sess, weights_queue, [0.0]) weights = weights_queue.dequeue() miou, update_op = metrics.streaming_mean_iou( - predictions, labels, num_classes, ignore_mask=ignore_mask, - weights=weights) + predictions, labels, num_classes, weights=weights) sess.run(tf.initialize_local_variables()) for _ in range(6): @@ -3920,13 +3848,12 @@ class StreamingMeanIOUTest(tf.test.TestCase): labels = tf.concat(0, [tf.constant(0, shape=[3]), tf.constant(1, shape=[7])]) num_classes = 2 - mask = tf.concat(0, [tf.constant(False, shape=[9]), - tf.constant(True, shape=[1])]) weights = tf.concat(0, [tf.constant(0, shape=[1]), - tf.constant(1, shape=[9])]) + tf.constant(1, shape=[8]), + tf.constant(0, shape=[1])]) with self.test_session() as sess: miou, update_op = metrics.streaming_mean_iou( - predictions, labels, num_classes, ignore_mask=mask, weights=weights) + predictions, labels, num_classes, weights=weights) sess.run(tf.initialize_local_variables()) self.assertAllEqual([[2, 2], [0, 4]], update_op.eval()) desired_miou = np.mean([2./4., 4./6.]) diff --git a/tensorflow/contrib/opt/python/training/external_optimizer.py b/tensorflow/contrib/opt/python/training/external_optimizer.py index 7629662b079..de539a46e26 100644 --- a/tensorflow/contrib/opt/python/training/external_optimizer.py +++ b/tensorflow/contrib/opt/python/training/external_optimizer.py @@ -100,7 +100,7 @@ class ExternalOptimizerInterface(object): accumulated_dims[1:])] def minimize(self, session=None, feed_dict=None, fetches=None, - step_callback=None, loss_callback=None, grad_callback=None): + step_callback=None, loss_callback=None): """Minimize a scalar `Tensor`. Variables subject to optimization are updated in-place at the end of @@ -113,14 +113,13 @@ class ExternalOptimizerInterface(object): Args: session: A `Session` instance. feed_dict: A feed dict to be passed to calls to `session.run`. - fetches: A list of `Tensor`s to fetch and supply to `loss_callback` and - `grad_callback` as positional arguments. + fetches: A list of `Tensor`s to fetch and supply to `loss_callback` + as positional arguments. step_callback: A function to be called at each optimization step; arguments are the current values of all optimization variables flattened into a single vector. loss_callback: A function to be called every time the loss and gradients are computed, with evaluated fetches supplied as positional arguments. - grad_callback: Deprecated. """ session = session or ops.get_default_session() feed_dict = feed_dict or {} @@ -128,9 +127,6 @@ class ExternalOptimizerInterface(object): loss_callback = loss_callback or (lambda *fetches: None) step_callback = step_callback or (lambda xk: None) - # TODO(chapelle): Remove grad_callback (b/30590858) - if grad_callback: - logging.warn('grad_callback is deprecated. Please use loss_callback.') # Construct loss function and associated gradient. loss_grad_func = self._make_eval_func( diff --git a/tensorflow/contrib/opt/python/training/moving_average_optimizer.py b/tensorflow/contrib/opt/python/training/moving_average_optimizer.py index 86a828394ea..d6df49d8525 100644 --- a/tensorflow/contrib/opt/python/training/moving_average_optimizer.py +++ b/tensorflow/contrib/opt/python/training/moving_average_optimizer.py @@ -62,7 +62,8 @@ from tensorflow.python.training import saver class MovingAverageOptimizer(optimizer.Optimizer): """Optimizer wrapper that maintains a moving average of parameters.""" - def __init__(self, opt, average_decay=0.9999, sequential_update=True): + def __init__(self, opt, average_decay=0.9999, num_updates=None, + sequential_update=True): """Construct a new MovingAverageOptimizer. Args: @@ -70,6 +71,8 @@ class MovingAverageOptimizer(optimizer.Optimizer): average_decay: Float. Decay to use to maintain the moving averages of trained variables. See tf.train.ExponentialMovingAverage for details. + num_updates: Optional count of number of updates applied to variables. + See tf.train.ExponentialMovingAverage for details. sequential_update: Bool. If False, will compute the moving average at the same time as the model is updated, potentially doing benign data races. @@ -77,7 +80,8 @@ class MovingAverageOptimizer(optimizer.Optimizer): updates. """ self._optimizer = opt - self._ema = moving_averages.ExponentialMovingAverage(average_decay) + self._ema = moving_averages.ExponentialMovingAverage( + average_decay, num_updates=num_updates) self._variable_map = None self._sequential_update = sequential_update diff --git a/tensorflow/contrib/rnn/BUILD b/tensorflow/contrib/rnn/BUILD index 00123379f6f..fdac3e9e497 100644 --- a/tensorflow/contrib/rnn/BUILD +++ b/tensorflow/contrib/rnn/BUILD @@ -181,6 +181,24 @@ tf_gen_op_libs( op_lib_names = ["lstm_ops"], ) +tf_kernel_library( + name = "gru_ops_kernels", + srcs = [ + "kernels/blas_gemm.cc", + "kernels/blas_gemm.h", + ], + gpu_srcs = [ + "kernels/blas_gemm.h", + ], + prefix = "kernels/gru_ops", + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core/kernels:eigen_helpers", + "//third_party/eigen3", + ], +) + tf_kernel_library( name = "lstm_ops_kernels", srcs = [ diff --git a/tensorflow/contrib/rnn/kernels/blas_gemm.cc b/tensorflow/contrib/rnn/kernels/blas_gemm.cc index 637b872dadc..e62501e9b10 100644 --- a/tensorflow/contrib/rnn/kernels/blas_gemm.cc +++ b/tensorflow/contrib/rnn/kernels/blas_gemm.cc @@ -37,7 +37,6 @@ perftools::gputools::DeviceMemory AsDeviceMemory(const T* cuda_memory) { namespace functor { template void TensorCuBlasGemm::operator()(OpKernelContext* ctx, - perftools::gputools::Stream* stream, bool transa, bool transb, uint64 m, uint64 n, uint64 k, T alpha, const T* a, int lda, const T* b, int ldb, T beta, T* c, @@ -52,7 +51,8 @@ void TensorCuBlasGemm::operator()(OpKernelContext* ctx, auto c_ptr = AsDeviceMemory(c); bool blas_launch_status = - stream + ctx->op_device_context() + ->stream() ->ThenBlasGemm(trans[transa], trans[transb], m, n, k, alpha, a_ptr, lda, b_ptr, ldb, beta, &c_ptr, ldc) .ok(); diff --git a/tensorflow/contrib/rnn/kernels/blas_gemm.h b/tensorflow/contrib/rnn/kernels/blas_gemm.h index 9c34b8ae715..e33eceadff1 100644 --- a/tensorflow/contrib/rnn/kernels/blas_gemm.h +++ b/tensorflow/contrib/rnn/kernels/blas_gemm.h @@ -21,22 +21,15 @@ limitations under the License. #include "tensorflow/core/kernels/eigen_activations.h" #include "tensorflow/core/platform/types.h" -namespace perftools { -namespace gputools { -class Stream; -} // end namespace gputools -} // end namespace perftools - namespace tensorflow { class OpKernelContext; namespace functor { template struct TensorCuBlasGemm { - void operator()(OpKernelContext* ctx, perftools::gputools::Stream* stream, - bool transa, bool transb, uint64 m, uint64 n, uint64 k, - T alpha, const T* a, int lda, const T* b, int ldb, T beta, - T* c, int ldc); + void operator()(OpKernelContext* ctx, bool transa, bool transb, uint64 m, + uint64 n, uint64 k, T alpha, const T* a, int lda, const T* b, + int ldb, T beta, T* c, int ldc); }; template @@ -44,16 +37,15 @@ struct TensorBlasGemm; template struct TensorBlasGemm { - static void compute(OpKernelContext* ctx, perftools::gputools::Stream* stream, - const Device& d, bool transa, bool transb, T alpha, - typename TTypes::ConstMatrix a, + static void compute(OpKernelContext* ctx, const Device& d, bool transa, + bool transb, T alpha, typename TTypes::ConstMatrix a, typename TTypes::ConstMatrix b, T beta, typename TTypes::Matrix c) { int64 m = c.dimensions()[0]; int64 n = c.dimensions()[1]; int64 k = transa ? a.dimensions()[0] : a.dimensions()[1]; - TensorCuBlasGemm()(ctx, stream, transb, transa, n, m, k, alpha, b.data(), + TensorCuBlasGemm()(ctx, transb, transa, n, m, k, alpha, b.data(), transb ? k : n, a.data(), transa ? m : k, beta, c.data(), n); } @@ -61,9 +53,8 @@ struct TensorBlasGemm { template struct TensorBlasGemm { - static void compute(OpKernelContext* ctx, perftools::gputools::Stream* stream, - const Device& d, bool transa, bool transb, T alpha, - typename TTypes::ConstMatrix a, + static void compute(OpKernelContext* ctx, const Device& d, bool transa, + bool transb, T alpha, typename TTypes::ConstMatrix a, typename TTypes::ConstMatrix b, T beta, typename TTypes::Matrix c) { Eigen::array, 1> contract_pairs; diff --git a/tensorflow/contrib/rnn/kernels/gru_ops.cc b/tensorflow/contrib/rnn/kernels/gru_ops.cc index ae25322a40c..6173591d3db 100644 --- a/tensorflow/contrib/rnn/kernels/gru_ops.cc +++ b/tensorflow/contrib/rnn/kernels/gru_ops.cc @@ -15,10 +15,6 @@ limitations under the License. #define EIGEN_USE_THREADS -#if GOOGLE_CUDA -#include "tensorflow/core/platform/stream_executor.h" -#endif // GOOGLE_CUDA - #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/contrib/rnn/kernels/gru_ops.h" #include "tensorflow/core/framework/op_kernel.h" @@ -151,14 +147,9 @@ class GRUCellBlockOp : public OpKernel { const Device& device = ctx->eigen_device(); - perftools::gputools::Stream* stream = - std::is_same::value - ? ctx->op_device_context()->stream() - : nullptr; - functor::GRUBlockCellFprop(batch_size, input_size, cell_size)( - ctx, stream, device, x_tensor->matrix(), h_prev_tensor->matrix(), + ctx, device, x_tensor->matrix(), h_prev_tensor->matrix(), w_ru_tensor->matrix(), w_c_tensor->matrix(), b_ru_tensor->vec(), b_c_tensor->vec(), r_u_bar_tensor.matrix(), r_tensor->matrix(), u_tensor->matrix(), c_tensor->matrix(), @@ -362,14 +353,10 @@ class GRUBlockCellGradOp : public OpKernel { &d_x_component_2_h_prevr)); const Device& device = ctx->eigen_device(); - perftools::gputools::Stream* stream = - std::is_same::value - ? ctx->op_device_context()->stream() - : nullptr; functor::GRUBlockCellBprop(batch_size, input_size, cell_size)( - ctx, stream, device, x_tensor->matrix(), h_prev_tensor->matrix(), + ctx, device, x_tensor->matrix(), h_prev_tensor->matrix(), w_ru_tensor->matrix(), w_c_tensor->matrix(), b_ru_tensor->vec(), b_c_tensor->vec(), r_tensor->matrix(), u_tensor->matrix(), c_tensor->matrix(), d_h_tensor->matrix(), @@ -400,8 +387,8 @@ namespace functor { #define DECLARE_GPU_SPEC(T) \ template <> \ void GRUBlockCellFprop::operator()( \ - OpKernelContext* ctx, perftools::gputools::Stream* stream, \ - const GPUDevice& d, typename TTypes::ConstMatrix x, \ + OpKernelContext* ctx, const GPUDevice& d, \ + typename TTypes::ConstMatrix x, \ typename TTypes::ConstMatrix h_prev, \ typename TTypes::ConstMatrix w_ru, \ typename TTypes::ConstMatrix w_c, typename TTypes::ConstVec b_ru, \ @@ -430,9 +417,9 @@ namespace functor { #define DECLARE_GPU_SPEC(T) \ template <> \ void GRUBlockCellBprop::operator()( \ - OpKernelContext* ctx, perftools::gputools::Stream* stream, \ - const GPUDevice& d, typename TTypes::ConstMatrix x, \ - typename TTypes::ConstMatrix h, typename TTypes::ConstMatrix w_ru, \ + OpKernelContext* ctx, const GPUDevice& d, \ + typename TTypes::ConstMatrix x, typename TTypes::ConstMatrix h, \ + typename TTypes::ConstMatrix w_ru, \ typename TTypes::ConstMatrix w_c, typename TTypes::ConstVec b_ru, \ typename TTypes::ConstVec b_c, typename TTypes::ConstMatrix r, \ typename TTypes::ConstMatrix u, typename TTypes::ConstMatrix c, \ diff --git a/tensorflow/contrib/rnn/kernels/gru_ops.h b/tensorflow/contrib/rnn/kernels/gru_ops.h index e6c4ad9a032..06a56650629 100644 --- a/tensorflow/contrib/rnn/kernels/gru_ops.h +++ b/tensorflow/contrib/rnn/kernels/gru_ops.h @@ -21,12 +21,6 @@ limitations under the License. #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/platform/types.h" -namespace perftools { -namespace gputools { -class Stream; -} // end namespace gputools -} // end namespace perftools - namespace tensorflow { class OpKernelContext; @@ -77,18 +71,15 @@ struct GRUBlockCellFprop : public GRUCell { const int cell_size) : GRUCell(batch_size, input_size, cell_size) {} - void operator()(OpKernelContext* ctx, perftools::gputools::Stream* stream, - const Device& d, typename TTypes::ConstMatrix x, - typename TTypes::ConstMatrix h_prev, - typename TTypes::ConstMatrix w_ru, - typename TTypes::ConstMatrix w_c, - typename TTypes::ConstVec b_ru, - typename TTypes::ConstVec b_c, - typename TTypes::Matrix r_u_bar, - typename TTypes::Matrix r, typename TTypes::Matrix u, - typename TTypes::Matrix c, typename TTypes::Matrix h, - typename TTypes::Matrix x_h_prev, - typename TTypes::Matrix x_h_prevr) { + void operator()( + OpKernelContext* ctx, const Device& d, typename TTypes::ConstMatrix x, + typename TTypes::ConstMatrix h_prev, + typename TTypes::ConstMatrix w_ru, typename TTypes::ConstMatrix w_c, + typename TTypes::ConstVec b_ru, typename TTypes::ConstVec b_c, + typename TTypes::Matrix r_u_bar, typename TTypes::Matrix r, + typename TTypes::Matrix u, typename TTypes::Matrix c, + typename TTypes::Matrix h, typename TTypes::Matrix x_h_prev, + typename TTypes::Matrix x_h_prevr) { // Concat x_h_prev = [x, h_prev]. x_h_prev.slice(x_offsets(), x_extends()).device(d) = x; x_h_prev.slice(h_offsets(), h_extends()).device(d) = h_prev; @@ -96,9 +87,8 @@ struct GRUBlockCellFprop : public GRUCell { // r_u_bar = x_h_prev * w_ru + b_ru typename TTypes::ConstMatrix const_x_h_prev(x_h_prev.data(), x_h_prev.dimensions()); - TensorBlasGemm::compute(ctx, stream, d, false, false, - T(1), const_x_h_prev, w_ru, - T(0), r_u_bar); + TensorBlasGemm::compute( + ctx, d, false, false, T(1), const_x_h_prev, w_ru, T(0), r_u_bar); // Creating a bias matrix for adding by broadcasting 'b_ru' Eigen::array broadcast_shape({batch_size_, 1}); @@ -117,7 +107,7 @@ struct GRUBlockCellFprop : public GRUCell { typename TTypes::ConstMatrix const_x_h_prevr(x_h_prevr.data(), x_h_prevr.dimensions()); TensorBlasGemm::compute( - ctx, stream, d, false, false, T(1), const_x_h_prevr, w_c, T(0), c); + ctx, d, false, false, T(1), const_x_h_prevr, w_c, T(0), c); Eigen::array b_c_shape({1, b_c.dimensions()[0]}); c.device(d) += (b_c.reshape(b_c_shape).broadcast(broadcast_shape)); @@ -135,8 +125,7 @@ struct GRUBlockCellBprop : public GRUCell { : GRUCell(batch_size, input_size, cell_size) {} void operator()( - OpKernelContext* ctx, perftools::gputools::Stream* stream, - const Device& d, typename TTypes::ConstMatrix x, + OpKernelContext* ctx, const Device& d, typename TTypes::ConstMatrix x, typename TTypes::ConstMatrix h_prev, typename TTypes::ConstMatrix w_ru, typename TTypes::ConstMatrix w_c, typename TTypes::ConstVec b_ru, typename TTypes::ConstVec b_c, @@ -159,9 +148,9 @@ struct GRUBlockCellBprop : public GRUCell { // [2nd_component_of_d_x d_h_prevr] = d_c_bar X w_c^T typename TTypes::ConstMatrix const_d_c_bar(d_c_bar.data(), d_c_bar.dimensions()); - TensorBlasGemm::compute(ctx, stream, d, false, true, - T(1), const_d_c_bar, w_c, - T(0), d_x_comp2_and_h_prevr); + TensorBlasGemm::compute(ctx, d, false, true, T(1), + const_d_c_bar, w_c, T(0), + d_x_comp2_and_h_prevr); d_hr.device(d) = d_x_comp2_and_h_prevr.slice(h_offsets(), h_extends()); d_r_bar.device(d) = (d_hr * h_prev * r) * (r.constant(T(1)) - r); @@ -175,7 +164,7 @@ struct GRUBlockCellBprop : public GRUCell { typename TTypes::ConstMatrix const_d_r_bar_u_bar( d_r_bar_u_bar.data(), d_r_bar_u_bar.dimensions()); TensorBlasGemm::compute( - ctx, stream, d, false, true, T(1), const_d_r_bar_u_bar, w_ru, T(0), + ctx, d, false, true, T(1), const_d_r_bar_u_bar, w_ru, T(0), d_x_comp1_and_h_prev_comp1); // d_x = d_x_comp1 + d_x_comp2 diff --git a/tensorflow/contrib/rnn/kernels/lstm_ops.cc b/tensorflow/contrib/rnn/kernels/lstm_ops.cc index 7fec457a4ac..2cebcd8fb31 100644 --- a/tensorflow/contrib/rnn/kernels/lstm_ops.cc +++ b/tensorflow/contrib/rnn/kernels/lstm_ops.cc @@ -34,10 +34,6 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" -#if GOOGLE_CUDA -#include "tensorflow/core/platform/stream_executor.h" -#endif // GOOGLE_CUDA - namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; @@ -164,14 +160,10 @@ class LSTMBlockCellOp : public OpKernel { &icfo_tensor)); const Device& device = ctx->eigen_device(); - perftools::gputools::Stream* stream = - std::is_same::value - ? ctx->op_device_context()->stream() - : nullptr; functor::LSTMBlockCellFprop(batch_size, input_size, cell_size)( - ctx, stream, device, forget_bias_, cell_clip_, use_peephole_, + ctx, device, forget_bias_, cell_clip_, use_peephole_, x_tensor->matrix(), cs_prev_tensor->matrix(), h_prev_tensor->matrix(), w_tensor->matrix(), wci_tensor->vec(), wcf_tensor->vec(), wco_tensor->vec(), b_tensor->vec(), @@ -196,22 +188,21 @@ REGISTER_KERNEL(float); #if GOOGLE_CUDA namespace functor { -#define DECLARE_GPU_SPEC(T) \ - template <> \ - void LSTMBlockCellFprop::operator()( \ - OpKernelContext* ctx, perftools::gputools::Stream* stream, \ - const GPUDevice& d, const T forget_bias, const T cell_clip, \ - bool use_peephole, typename TTypes::ConstMatrix x, \ - typename TTypes::ConstMatrix cs_prev, \ - typename TTypes::ConstMatrix h_prev, \ - typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, \ - typename TTypes::ConstVec wcf, typename TTypes::ConstVec wco, \ - typename TTypes::ConstVec b, typename TTypes::Matrix xh, \ - typename TTypes::Matrix i, typename TTypes::Matrix cs, \ - typename TTypes::Matrix f, typename TTypes::Matrix o, \ - typename TTypes::Matrix ci, typename TTypes::Matrix co, \ - typename TTypes::Matrix icfo, typename TTypes::Matrix h); \ - \ +#define DECLARE_GPU_SPEC(T) \ + template <> \ + void LSTMBlockCellFprop::operator()( \ + OpKernelContext* ctx, const GPUDevice& d, const T forget_bias, \ + const T cell_clip, bool use_peephole, typename TTypes::ConstMatrix x, \ + typename TTypes::ConstMatrix cs_prev, \ + typename TTypes::ConstMatrix h_prev, \ + typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, \ + typename TTypes::ConstVec wcf, typename TTypes::ConstVec wco, \ + typename TTypes::ConstVec b, typename TTypes::Matrix xh, \ + typename TTypes::Matrix i, typename TTypes::Matrix cs, \ + typename TTypes::Matrix f, typename TTypes::Matrix o, \ + typename TTypes::Matrix ci, typename TTypes::Matrix co, \ + typename TTypes::Matrix icfo, typename TTypes::Matrix h); \ + \ extern template struct LSTMBlockCellFprop; DECLARE_GPU_SPEC(float); @@ -445,10 +436,6 @@ class LSTMBlockCellGradOp : public OpKernel { &di_tensor)); const Device& device = ctx->eigen_device(); - perftools::gputools::Stream* stream = - std::is_same::value - ? ctx->op_device_context()->stream() - : nullptr; functor::TensorZero()(device, wci_grad_tensor->flat()); functor::TensorZero()(device, wcf_grad_tensor->flat()); @@ -456,7 +443,7 @@ class LSTMBlockCellGradOp : public OpKernel { functor::LSTMBlockCellBprop(batch_size, input_size, cell_size)( - ctx, stream, device, use_peephole_, x_tensor->matrix(), + ctx, device, use_peephole_, x_tensor->matrix(), cs_prev_tensor->matrix(), h_prev_tensor->matrix(), w_tensor->matrix(), wci_tensor->vec(), wcf_tensor->vec(), wco_tensor->vec(), b_tensor->vec(), i_tensor->matrix(), @@ -486,8 +473,7 @@ namespace functor { #define DECLARE_GPU_SPEC(T) \ template <> \ void LSTMBlockCellBprop::operator()( \ - OpKernelContext* ctx, perftools::gputools::Stream* stream, \ - const GPUDevice& d, bool use_peephole, \ + OpKernelContext* ctx, const GPUDevice& d, bool use_peephole, \ typename TTypes::ConstMatrix x, \ typename TTypes::ConstMatrix cs_prev, \ typename TTypes::ConstMatrix h_prev, \ @@ -769,10 +755,6 @@ class BlockLSTMOp : public OpKernel { &icfo_tensor)); const Device& device = ctx->eigen_device(); - perftools::gputools::Stream* stream = - std::is_same::value - ? ctx->op_device_context()->stream() - : nullptr; const int64 seq_len_max = seq_len_max_tensor->scalar()(); SliceHelper slicer(ctx); @@ -794,7 +776,7 @@ class BlockLSTMOp : public OpKernel { functor::LSTMBlockCellFprop(batch_size, input_size, cell_size)( - ctx, stream, device, forget_bias_, cell_clip_, use_peephole_, + ctx, device, forget_bias_, cell_clip_, use_peephole_, x_tensor.matrix(), cs_prev_tensor2.matrix(), h_prev_tensor2.matrix(), w_tensor->matrix(), wci_tensor->vec(), wcf_tensor->vec(), wco_tensor->vec(), @@ -1020,10 +1002,6 @@ class BlockLSTMGradOp : public OpKernel { const Device& device = ctx->eigen_device(); - perftools::gputools::Stream* stream = - std::is_same::value - ? ctx->op_device_context()->stream() - : nullptr; functor::TensorZero()(device, cs_grad_tensor.flat()); functor::TensorZero()(device, @@ -1073,7 +1051,7 @@ class BlockLSTMGradOp : public OpKernel { Tensor x_grad_tensor = slicer.OutputSlice(x_grad, t, "x_grad"); functor::BlockLSTMBprop(batch_size, input_size, cell_size)( - ctx, stream, device, use_peephole_, x_tensor.matrix(), + ctx, device, use_peephole_, x_tensor.matrix(), cs_prev_tensor2.matrix(), h_prev_tensor2.matrix(), w_tensor->matrix(), wci_tensor->vec(), wcf_tensor->vec(), wco_tensor->vec(), b_tensor->vec(), xh_tensor.matrix(), @@ -1134,8 +1112,7 @@ namespace functor { \ template <> \ void BlockLSTMBprop::operator()( \ - OpKernelContext* ctx, perftools::gputools::Stream* stream, \ - const GPUDevice& d, bool use_peephole, \ + OpKernelContext* ctx, const GPUDevice& d, bool use_peephole, \ typename TTypes::ConstMatrix x, \ typename TTypes::ConstMatrix cs_prev, \ typename TTypes::ConstMatrix h_prev, \ diff --git a/tensorflow/contrib/rnn/kernels/lstm_ops.h b/tensorflow/contrib/rnn/kernels/lstm_ops.h index 1332b880026..d9ed9e3ab71 100644 --- a/tensorflow/contrib/rnn/kernels/lstm_ops.h +++ b/tensorflow/contrib/rnn/kernels/lstm_ops.h @@ -22,12 +22,6 @@ limitations under the License. #include "tensorflow/core/kernels/eigen_activations.h" #include "tensorflow/core/platform/types.h" -namespace perftools { -namespace gputools { -class Stream; -} // end namespace gputools -} // end namespace perftools - namespace tensorflow { class OpKernelContext; @@ -153,29 +147,26 @@ struct LSTMBlockCellFprop : public LSTMBlockCell { const int cell_size) : LSTMBlockCell(batch_size, input_size, cell_size) {} - void operator()(OpKernelContext* ctx, perftools::gputools::Stream* stream, - const Device& d, const T forget_bias, const T cell_clip, - bool use_peephole, typename TTypes::ConstMatrix x, - typename TTypes::ConstMatrix cs_prev, - typename TTypes::ConstMatrix h_prev, - typename TTypes::ConstMatrix w, - typename TTypes::ConstVec wci, - typename TTypes::ConstVec wcf, - typename TTypes::ConstVec wco, - typename TTypes::ConstVec b, typename TTypes::Matrix xh, - typename TTypes::Matrix i, typename TTypes::Matrix cs, - typename TTypes::Matrix f, typename TTypes::Matrix o, - typename TTypes::Matrix ci, typename TTypes::Matrix co, - typename TTypes::Matrix icfo, - typename TTypes::Matrix h) { + void operator()( + OpKernelContext* ctx, const Device& d, const T forget_bias, + const T cell_clip, bool use_peephole, typename TTypes::ConstMatrix x, + typename TTypes::ConstMatrix cs_prev, + typename TTypes::ConstMatrix h_prev, typename TTypes::ConstMatrix w, + typename TTypes::ConstVec wci, typename TTypes::ConstVec wcf, + typename TTypes::ConstVec wco, typename TTypes::ConstVec b, + typename TTypes::Matrix xh, typename TTypes::Matrix i, + typename TTypes::Matrix cs, typename TTypes::Matrix f, + typename TTypes::Matrix o, typename TTypes::Matrix ci, + typename TTypes::Matrix co, typename TTypes::Matrix icfo, + typename TTypes::Matrix h) { // Concat xh = [x, h]. xh.slice(xh_x_offsets(), xh_x_extents()).device(d) = x; xh.slice(xh_h_offsets(), xh_h_extents()).device(d) = h_prev; // states1 = xh * w + b typename TTypes::ConstMatrix const_xh(xh.data(), xh.dimensions()); - TensorBlasGemm::compute( - ctx, stream, d, false, false, T(1), const_xh, w, T(0), icfo); + TensorBlasGemm::compute(ctx, d, false, false, T(1), + const_xh, w, T(0), icfo); Eigen::array b_shape({1, b.dimensions()[0]}); Eigen::array broadcast_shape({batch_size_, 1}); icfo.device(d) += b.reshape(b_shape).broadcast(broadcast_shape); @@ -239,8 +230,8 @@ struct LSTMBlockCellBprop : public LSTMBlockCell { : LSTMBlockCell(batch_size, input_size, cell_size) {} void operator()( - OpKernelContext* ctx, perftools::gputools::Stream* stream, - const Device& d, bool use_peephole, typename TTypes::ConstMatrix x, + OpKernelContext* ctx, const Device& d, bool use_peephole, + typename TTypes::ConstMatrix x, typename TTypes::ConstMatrix cs_prev, typename TTypes::ConstMatrix h_prev, typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, typename TTypes::ConstVec wcf, @@ -305,8 +296,8 @@ struct BlockLSTMBprop : public LSTMBlockCell { : LSTMBlockCell(batch_size, input_size, cell_size) {} void operator()( - OpKernelContext* ctx, perftools::gputools::Stream* stream, - const Device& d, bool use_peephole, typename TTypes::ConstMatrix x, + OpKernelContext* ctx, const Device& d, bool use_peephole, + typename TTypes::ConstMatrix x, typename TTypes::ConstMatrix cs_prev, typename TTypes::ConstMatrix h_prev, typename TTypes::ConstMatrix w, typename TTypes::ConstVec wci, typename TTypes::ConstVec wcf, @@ -364,7 +355,7 @@ struct BlockLSTMBprop : public LSTMBlockCell { typename TTypes::ConstMatrix const_dicfo(dicfo.data(), dicfo.dimensions()); TensorBlasGemm::compute( - ctx, stream, d, false, true, T(1), const_dicfo, w, T(0), xh_grad); + ctx, d, false, true, T(1), const_dicfo, w, T(0), xh_grad); // xh. xh.slice(xh_x_offsets(), xh_x_extents()).device(d) = x; @@ -377,7 +368,7 @@ struct BlockLSTMBprop : public LSTMBlockCell { // w_grad. TensorBlasGemm::compute( - ctx, stream, d, true, false, T(1), const_xh, const_dicfo, T(1), w_grad); + ctx, d, true, false, T(1), const_xh, const_dicfo, T(1), w_grad); // b_grad. b_grad.device(d) += dicfo.sum(Eigen::array({0})); diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py index 850b9547168..c1c25ba0942 100644 --- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py @@ -1005,7 +1005,7 @@ _linear = rnn_cell._linear class AttentionCellWrapper(rnn_cell.RNNCell): """Basic attention cell wrapper. - Implementation based on https://arxiv.org/pdf/1601.06733.pdf. + Implementation based on https://arxiv.org/abs/1409.0473. """ def __init__(self, cell, attn_length, attn_size=None, attn_vec_size=None, diff --git a/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py b/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py index ada4e0611ec..7f1b53ae356 100644 --- a/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py +++ b/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py @@ -51,7 +51,7 @@ from tensorflow.contrib.slim.python.slim.data import parallel_reader class DatasetDataProvider(data_provider.DataProvider): def __init__(self, dataset, num_readers=1, shuffle=True, num_epochs=None, - common_queue_capacity=256, common_queue_min=128): + common_queue_capacity=256, common_queue_min=128, seed=None): """Creates a DatasetDataProvider. Args: @@ -64,6 +64,7 @@ class DatasetDataProvider(data_provider.DataProvider): common_queue_capacity: The capacity of the common queue. common_queue_min: The minimum number of elements in the common queue after a dequeue. + seed: The seed to use if shuffling. """ _, data = parallel_reader.parallel_read( dataset.data_sources, @@ -72,7 +73,8 @@ class DatasetDataProvider(data_provider.DataProvider): num_readers=num_readers, shuffle=shuffle, capacity=common_queue_capacity, - min_after_dequeue=common_queue_min) + min_after_dequeue=common_queue_min, + seed=seed) items = dataset.decoder.list_items() tensors = dataset.decoder.decode(data, items) diff --git a/tensorflow/contrib/slim/python/slim/data/parallel_reader.py b/tensorflow/contrib/slim/python/slim/data/parallel_reader.py index e8f6de31496..f1cbf563e3b 100644 --- a/tensorflow/contrib/slim/python/slim/data/parallel_reader.py +++ b/tensorflow/contrib/slim/python/slim/data/parallel_reader.py @@ -170,7 +170,8 @@ def parallel_read(data_sources, shuffle=True, dtypes=None, capacity=256, - min_after_dequeue=128): + min_after_dequeue=128, + seed=None): """Reads multiple records in parallel from data_sources using n readers. It uses a ParallelReader to read from multiple files in parallel using @@ -199,6 +200,7 @@ def parallel_read(data_sources, capacity: integer, capacity of the common_queue. min_after_dequeue: integer, minimum number of records in the common_queue after dequeue. Needed for a good shuffle. + seed: A seed for RandomShuffleQueue. Returns: key, value: a tuple of keys and values from the data_source. @@ -212,7 +214,8 @@ def parallel_read(data_sources, common_queue = data_flow_ops.RandomShuffleQueue( capacity=capacity, min_after_dequeue=min_after_dequeue, - dtypes=dtypes) + dtypes=dtypes, + seed=seed) else: common_queue = data_flow_ops.FIFOQueue(capacity=capacity, dtypes=dtypes) diff --git a/tensorflow/contrib/slim/python/slim/learning.py b/tensorflow/contrib/slim/python/slim/learning.py index ed3e927560e..5595e53da1f 100644 --- a/tensorflow/contrib/slim/python/slim/learning.py +++ b/tensorflow/contrib/slim/python/slim/learning.py @@ -471,7 +471,14 @@ def create_train_op( 'LossTensor is inf or nan') # Ensure the train_tensor computes grad_updates. - return control_flow_ops.with_dependencies([grad_updates], total_loss) + train_op = control_flow_ops.with_dependencies([grad_updates], total_loss) + + # Add the operation used for training to the 'train_op' collection + train_ops = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) + if train_op not in train_ops: + train_ops.append(train_op) + + return train_op def _wait_for_step(sess, global_step, step): diff --git a/tensorflow/contrib/slim/python/slim/learning_test.py b/tensorflow/contrib/slim/python/slim/learning_test.py index 69cd4a9583b..8a9f5f825c7 100644 --- a/tensorflow/contrib/slim/python/slim/learning_test.py +++ b/tensorflow/contrib/slim/python/slim/learning_test.py @@ -301,6 +301,22 @@ class CreateTrainOpTest(tf.test.TestCase): self.assertAllClose(mean, [0] * 4) self.assertAllClose(variance, [1] * 4) + def testRecordTrainOpInCollection(self): + with tf.Graph().as_default(): + tf.set_random_seed(0) + tf_inputs = tf.constant(self._inputs, dtype=tf.float32) + tf_labels = tf.constant(self._labels, dtype=tf.float32) + + tf_predictions = LogisticClassifier(tf_inputs) + slim.losses.log_loss(tf_predictions, tf_labels) + total_loss = slim.losses.get_total_loss() + + optimizer = tf.train.GradientDescentOptimizer(learning_rate=1.0) + train_op = slim.learning.create_train_op(total_loss, optimizer) + + # Make sure the training op was recorded in the proper collection + self.assertTrue(train_op in tf.get_collection(tf.GraphKeys.TRAIN_OP)) + class TrainTest(tf.test.TestCase): diff --git a/tensorflow/contrib/tensor_forest/client/eval_metrics.py b/tensorflow/contrib/tensor_forest/client/eval_metrics.py index 6971e1861d1..be89b6f9593 100644 --- a/tensorflow/contrib/tensor_forest/client/eval_metrics.py +++ b/tensorflow/contrib/tensor_forest/client/eval_metrics.py @@ -23,43 +23,54 @@ from tensorflow.contrib.metrics.python.ops import metric_ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops - -def _accuracy(probabilities, targets): - predictions = math_ops.argmax(probabilities, 1) - # undo one-hot - labels = math_ops.argmax(targets, 1) - return metric_ops.streaming_accuracy(predictions, labels) +INFERENCE_PROB_NAME = 'inference' +INFERENCE_PRED_NAME = 'predictions' -def _r2(probabilities, targets): +def _accuracy(predictions, targets, weights=None): + return metric_ops.streaming_accuracy(predictions, targets, weights=weights) + + +def _r2(probabilities, targets, weights=None): if targets.get_shape().ndims == 1: targets = array_ops.expand_dims(targets, -1) + targets = math_ops.to_float(targets) y_mean = math_ops.reduce_mean(targets, 0) squares_total = math_ops.reduce_sum(math_ops.square(targets - y_mean), 0) squares_residuals = math_ops.reduce_sum(math_ops.square( targets - probabilities), 0) score = 1 - math_ops.reduce_sum(squares_residuals / squares_total) - return metric_ops.streaming_mean(score) + return metric_ops.streaming_mean(score, weights=weights) -def _sigmoid_entropy(probabilities, targets): +def _squeeze_and_onehot(targets, depth): + targets = array_ops.squeeze(targets, squeeze_dims=[1]) + return array_ops.one_hot(math_ops.to_int32(targets), depth) + + +def _sigmoid_entropy(probabilities, targets, weights=None): return metric_ops.streaming_mean(losses.sigmoid_cross_entropy( - probabilities, targets)) + probabilities, _squeeze_and_onehot(targets, + array_ops.shape(probabilities)[1])), + weights=weights) -def _softmax_entropy(probabilities, targets): - return metric_ops.streaming_mean(losses.softmax_cross_entropy( - probabilities, targets)) +def _softmax_entropy(probabilities, targets, weights=None): + return metric_ops.streaming_mean(losses.sparse_softmax_cross_entropy( + probabilities, math_ops.to_int32(targets)), + weights=weights) -def _predictions(probabilities, unused_targets): - return math_ops.argmax(probabilities, 1) +def _predictions(predictions, unused_targets, **unused_kwargs): + return predictions -def _log_loss(probabilities, targets): - # targets doesn't have a shape coming in, log_loss isn't too happy about it. - targets = array_ops.reshape(targets, array_ops.shape(probabilities)) - return metric_ops.streaming_mean(losses.log_loss(probabilities, targets)) +def _class_log_loss(probabilities, targets, weights=None): + return metric_ops.streaming_mean( + losses.log_loss(probabilities, + _squeeze_and_onehot(targets, + array_ops.shape(probabilities)[1])), + weights=weights) _EVAL_METRICS = {'sigmoid_entropy': _sigmoid_entropy, @@ -67,9 +78,21 @@ _EVAL_METRICS = {'sigmoid_entropy': _sigmoid_entropy, 'accuracy': _accuracy, 'r2': _r2, 'predictions': _predictions, - 'log_loss': _log_loss} + 'classification_log_loss': _class_log_loss} + + +_PREDICTION_KEYS = {'sigmoid_entropy': INFERENCE_PROB_NAME, + 'softmax_entropy': INFERENCE_PROB_NAME, + 'accuracy': INFERENCE_PRED_NAME, + 'r2': INFERENCE_PROB_NAME, + 'predictions': INFERENCE_PRED_NAME, + 'classification_log_loss': INFERENCE_PROB_NAME} def get_metric(metric_name): """Given a metric name, return the corresponding metric function.""" return _EVAL_METRICS[metric_name] + + +def get_prediction_key(metric_name): + return _PREDICTION_KEYS[metric_name] diff --git a/tensorflow/contrib/tensor_forest/data/data_ops.py b/tensorflow/contrib/tensor_forest/data/data_ops.py index c408b93d710..1dfcaf5c7a4 100644 --- a/tensorflow/contrib/tensor_forest/data/data_ops.py +++ b/tensorflow/contrib/tensor_forest/data/data_ops.py @@ -17,10 +17,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import math import threading -from tensorflow.contrib.learn.python.learn.learn_io import graph_io from tensorflow.contrib.tensor_forest.python import constants from tensorflow.python.framework import common_shapes @@ -35,8 +33,6 @@ from tensorflow.python.platform import tf_logging as logging DATA_OPS_FILE = '_data_ops.so' -EXAMPLE_WEIGHT_NAME = '__weight__' - _data_ops = None _ops_lock = threading.Lock() @@ -69,68 +65,28 @@ def Load(): def _ParseSparse(data): """Concat sparse tensors together. - A common use of sparse tensors is to treat strings as a sparse bit vector - with a large number of features representing the presence of all possible - values. Here we convert these strings to integer indices in a sparse bit - tensor. In order to pack each incoming feature into a single sparse tensor, - we add an offset to the converted indices to indicate that they came from - different features in the source data. - Args: data: A dict of name -> Tensor. Returns: - A single sparse tensor with float values and a 1-D input spec Tensor. + A single sparse tensor and a 1-D input spec Tensor. Raises: - NotImplementedError: Combining dense and sparse tensors is not yet + NotImplementedError: Combining dense and sparse tensors is not supported. ValueError: If data contains non-string Tensors. """ - convert_ops = Load() - - # Sparse tensor indices have 63 bits to use for information. We use the - # minimum number of these (MSBs) for the offset, and pack the rest with the - # actual data. - num_features = len(data) - offset_bits = int(math.ceil(math.log(num_features, 2))) - - # We condense data to 26 bits, see sparse_values_to_indices.cc - offset_increment = int(math.pow(2, 26 - offset_bits)) - offset = 0 - - sparse_tensors = [] - keys = None - weights = None for k in sorted(data.keys()): - if k == graph_io.KEY_FEATURE_NAME: - keys = data[k] - elif k == EXAMPLE_WEIGHT_NAME: - weights = data[k] - elif isinstance(data[k], ops.SparseTensor): - # TODO(gilberth): Support mixed string/float sparse tensors. - # We currently only support string (categorical) data if we're using - # sparse tensors. - if data[k].dtype != dtypes.string: - raise ValueError('Only sparse tensors of type string are supported.') - sparse_indices = data[k].indices - sparse_values = data[k].values - new_shape = array_ops.concat( - 0, [array_ops.slice(data[k].shape, [0], [1]), [offset_increment]]) + if not isinstance(data[k], ops.SparseTensor): + raise NotImplementedError( + 'Features should be either all sparse or all dense. Use a ' + 'feature engineering function to convert some of them.') - new_indices, new_values = convert_ops.sparse_values_to_indices( - sparse_indices, - sparse_values, - offset, offset_bits=offset_bits) - sparse_tensors.append(ops.SparseTensor(indices=new_indices, - values=new_values, - shape=new_shape)) - else: - # Convert dense to sparse. - raise NotImplementedError('Dense to sparse conversion not implemented.') - - return (sparse_ops.sparse_concat(1, sparse_tensors), keys, weights, - [constants.DATA_CATEGORICAL]) + data_spec = [ + constants.DATA_CATEGORICAL if data[data.keys()[0]].dtype == dtypes.string + else constants.DATA_FLOAT + ] + return sparse_ops.sparse_concat(1, data.values()), data_spec def _ParseDense(data): @@ -143,22 +99,20 @@ def _ParseDense(data): A tuple of (single dense float Tensor, keys tensor (if exists), data spec). """ convert_ops = Load() - data_spec = [constants.DATA_CATEGORICAL if data[k].dtype == dtypes.string else - constants.DATA_FLOAT for k in sorted(data.keys())] + data_spec = [constants.DATA_CATEGORICAL if (data[k].dtype == dtypes.string or + data[k].dtype == dtypes.int32 or + data[k].dtype == dtypes.int64) + else constants.DATA_FLOAT for k in sorted(data.keys())] data_spec = [constants.DATA_FLOAT] + data_spec - keys = None - weights = None features = [] for k in sorted(data.keys()): - if k == graph_io.KEY_FEATURE_NAME: - keys = data[k] - elif k == EXAMPLE_WEIGHT_NAME: - weights = data[k] + if data[k].dtype == dtypes.string: + features.append(convert_ops.string_to_float(data[k])) + elif data[k].dtype == dtypes.int64 or data[k].dtype == dtypes.int32: + features.append(math_ops.to_float(data[k])) else: - features.append( - convert_ops.string_to_float(data[k]) if data[k].dtype == dtypes.string - else data[k]) - return array_ops.concat(1, features), keys, weights, data_spec + features.append(data[k]) + return array_ops.concat(1, features), data_spec def ParseDataTensorOrDict(data): @@ -187,8 +141,7 @@ def ParseDataTensorOrDict(data): else: return _ParseDense(data) else: - return (data, None, None, - [constants.DATA_FLOAT] * data.get_shape().as_list()[1]) + return (data, [constants.DATA_FLOAT] * data.get_shape().as_list()[1]) def ParseLabelTensorOrDict(labels): diff --git a/tensorflow/contrib/tensor_forest/python/tensor_forest.py b/tensorflow/contrib/tensor_forest/python/tensor_forest.py index ee31c0eba41..17d469739f9 100644 --- a/tensorflow/contrib/tensor_forest/python/tensor_forest.py +++ b/tensorflow/contrib/tensor_forest/python/tensor_forest.py @@ -19,7 +19,9 @@ from __future__ import print_function import math import random +import sys +from tensorflow.contrib.losses.python.losses import loss_ops from tensorflow.contrib.tensor_forest.python import constants from tensorflow.contrib.tensor_forest.python.ops import inference_ops from tensorflow.contrib.tensor_forest.python.ops import training_ops @@ -429,8 +431,9 @@ class RandomForestGraphs(object): return math_ops.reduce_mean(math_ops.to_float(array_ops.pack(sizes))) # pylint: disable=unused-argument - def training_loss(self, features, labels): - return math_ops.neg(self.average_size()) + def training_loss(self, features, labels, data_spec=None, + name='training_loss'): + return math_ops.neg(self.average_size(), name=name) # pylint: disable=unused-argument def validation_loss(self, features, labels): @@ -456,6 +459,63 @@ class RandomForestGraphs(object): return ForestStats(tree_stats, self.params) +def one_hot_wrapper(num_classes, loss_fn): + """Some loss functions take one-hot labels.""" + def _loss(probs, targets): + one_hot_labels = array_ops.one_hot( + math_ops.to_int32(targets), num_classes, + on_value=1., off_value=0., dtype=dtypes.float32) + return loss_fn(probs, one_hot_labels) + return _loss + + +class TrainingLossForest(RandomForestGraphs): + """Random Forest that uses training loss as the termination criteria.""" + + def __init__(self, params, loss_fn=None, **kwargs): + """Initialize. + + Args: + params: Like RandomForestGraphs, a ForestHParams object. + loss_fn: A function that takes probabilities and targets and returns + a loss for each example. + **kwargs: Keyword args to pass to superclass (RandomForestGraphs). + """ + self.loss_fn = loss_fn or one_hot_wrapper(params.num_classes, + loss_ops.log_loss) + self._loss = None + super(TrainingLossForest, self).__init__(params, **kwargs) + + def _get_loss(self, features, labels, data_spec=None): + """Constructs, caches, and returns the inference-based loss.""" + if self._loss is not None: + return self._loss + + def _average_loss(): + probs = self.inference_graph(features, data_spec=data_spec) + return math_ops.reduce_sum(self.loss_fn( + probs, labels)) / math_ops.to_float( + array_ops.shape(features)[0]) + + self._loss = control_flow_ops.cond( + self.average_size() > 0, _average_loss, + lambda: constant_op.constant(sys.maxsize, dtype=dtypes.float32)) + + return self._loss + + def training_graph(self, input_data, input_labels, data_spec=None, + **kwargs): + loss = self._get_loss(input_data, input_labels, data_spec=data_spec) + with ops.control_dependencies([loss.op]): + return super(TrainingLossForest, self).training_graph( + input_data, input_labels, **kwargs) + + def training_loss(self, features, labels, data_spec=None, + name='training_loss'): + return array_ops.identity( + self._get_loss(features, labels, data_spec=data_spec), name=name) + + class RandomTreeGraphs(object): """Builds TF graphs for random tree training and inference.""" diff --git a/tensorflow/contrib/tfprof/BUILD b/tensorflow/contrib/tfprof/BUILD index d55bda1bd05..e817cb86dfd 100644 --- a/tensorflow/contrib/tfprof/BUILD +++ b/tensorflow/contrib/tfprof/BUILD @@ -12,6 +12,7 @@ py_library( srcs_version = "PY2AND3", visibility = ["//tensorflow:__subpackages__"], deps = [ + "//tensorflow/contrib/tfprof/python/tools/tfprof:model_analyzer", "//tensorflow/contrib/tfprof/python/tools/tfprof:tfprof_logger", ], ) diff --git a/tensorflow/contrib/tfprof/README.md b/tensorflow/contrib/tfprof/README.md index 013be486767..e103cb21216 100644 --- a/tensorflow/contrib/tfprof/README.md +++ b/tensorflow/contrib/tfprof/README.md @@ -20,434 +20,9 @@ and measures system performance. 4. Explore model based on name scope or graph structure. 5. Selectively grouping/filtering/accounting/ordering ops. -### Interfaces +tfprof can be used as CommandLine Interface (CLI) and Python API. +CLI locates in tensorflow/tools/tfprof. +Python API locates in tensorflow/contrib/tfprof. +Tutorial locates in tensorflow/tools/tfprof/README.md -[CLI Tutorials](#cli-tutorials): -It supports interactive mode for exploration and single-shot mode for -scripts. Outputs can be dumped to files or printed in terminal. - -Python API Tutorials: Python API is not released yet. - -## CLI Tutorials - -Tutorials are based on a 32 layers ResNet. -TODO(xpan): Provide graph.pbtxt, model.ckpt, tfprof_log and run_meta download. - -### Examples - -1) Start `tfprof` command line tool - -```shell -# Build the tool. -bazel build -c opt tensorflow/contrib/tfprof/... - -# Help information, including detail 'option' instructions. -bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof help -# -# The following commands will start tfprof interactive mode. -# -# Profile model shapes and parameters only. -bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \ - --graph_path=/graph.pbtxt -# -# Additionally profile checkpoint statistics and values. -# Use '-account_type_regexes _checkpoint_variables' to select -# checkpoint tensors. -bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \ - --graph_path=graph.pbtxt \ - --checkpoint_path=model.ckpt -# -# Additionally profile ops requested memory and timing. -# See CLI Input Files section on generating run_meta file. -bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \ - --graph_path=graph.pbtxt \ - --run_meta_path=run_meta \ - --checkpoint_path=model.ckpt -# -# tfprof_log is used to define customized op types and float ops. -# Use tfprof_logger.write_op_log() to create tfprof_log. -# See 11) in Examples section on generating tfprof_log file. -bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \ - --graph_path=graph.pbtxt \ - --run_meta_path=run_meta \ - --op_log_path=tfprof_log \ - --checkpoint_path=model.ckpt -``` -Note that `graph.pbtxt` is an ASCII text format. - -2) Press enter to show the default options - -```shell -tfprof> -tfprof> --max_depth 4 --min_bytes 0 --min_micros 0 --min_params 0 --min_float_ops 0 --device_regexes .* --order_by name --account_type_regexes Variable --start_name_regexes .* --trim_name_regexes --show_name_regexes .* --hide_name_regexes IsVariableInitialized_[0-9]+,save\/.*,^zeros[0-9_]* --account_displayed_op_only false -# supported select fileds. Availability depends on --[run_meta|checkpoint|op_log]_path. -# [bytes|micros|params|float_ops|num_hidden_ops|tensor_value|device|op_types] --select params --viz false --dump_to_file -``` - -3) I want to see the `BatchNorm`'s gamma value in checkpoint. - -```shell -# Requires --graph_path, --checkpoint_path. -tfprof> scope -show_name_regexes unit_1_0.*gamma -select tensor_value -max_depth 5 -_TFProfRoot () - unit_1_0/shared_activation/init_bn/gamma () -[1.80 2.10 2.06 1.91 2.26 1.86 1.81 1.37 1.78 1.85 1.96 1.54 2.04 2.34 2.22 1.99 ], - unit_1_0/sub2/bn2/gamma () -[1.57 1.83 1.30 1.25 1.59 1.14 1.26 0.82 1.19 1.10 1.48 1.01 0.82 1.23 1.21 1.14 ], -``` - -4) I want to see my checkpoint tensors shape and number of parameters. - -```shell -# Requires --graph_path, --checkpoint_path. -# Increase -max_depth to see all tensors. -tfprof> scope -account_type_regexes _checkpoint_variables -select params -max_depth 4 -_TFProfRoot (--/930.58k params) - global_step (0/0 params) - init/init_conv/DW (3x3x3x16, 432/864 params) - pool_logit/DW (64x10, 640/1.28k params) - pool_logit/DW/Momentum (64x10, 640/640 params) - pool_logit/biases (10, 10/20 params) - pool_logit/biases/Momentum (10, 10/10 params) - unit_last/final_bn/beta (64, 64/128 params) - unit_last/final_bn/gamma (64, 64/128 params) - unit_last/final_bn/moving_mean (64, 64/64 params) - unit_last/final_bn/moving_variance (64, 64/64 params) -``` - -5) I defined an op named ‘cost’ to calculate the loss. I want to know what ops -it depends on take a long time to run. Hint: Use the ‘graph’ command to explore -graph dependencies. - -```shell -# Requires --graph_path, --run_meta_path. -tfprof> graph -start_name_regexes cost.* -max_depth 100 -min_micros 10000 -select micros -account_type_regexes .* -_TFProfRoot (0us/3.61sec) - init/init_conv/Conv2D (11.75ms/3.10sec) - random_shuffle_queue_DequeueMany (3.09sec/3.09sec) - unit_1_0/sub2/conv2/Conv2D (74.14ms/3.19sec) - unit_1_3/sub2/conv2/Conv2D (60.75ms/3.34sec) - unit_2_4/sub2/conv2/Conv2D (73.58ms/3.54sec) - unit_3_3/sub2/conv2/Conv2D (10.26ms/3.60sec) -``` - -6) I want to know the expensive operations during the back propagation. -Hint: tensorflow prepend ‘gradient’ to your defined name scopes. Use the ‘scope’ -command to explore based on name scope hierarchies. - -```shell -# Requires --graph_path, --run_meta_path. -tfprof> scope -start_name_regexes gradient.* -max_depth 100 -min_micros 20000 -select micros -account_type_regexes .* -_TFProfRoot (0us/2.29sec) - gradients/unit_1_0/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (54.96ms/54.96ms) - gradients/unit_1_0/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (83.63ms/83.63ms) - gradients/unit_1_1/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (99.25ms/99.25ms) - gradients/unit_1_2/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.40ms/95.40ms) - gradients/unit_1_2/sub2/conv2/Conv2D_grad/Conv2DBackpropFilter (99.83ms/99.83ms) - gradients/unit_1_3/sub1/conv1/Conv2D_grad/Conv2DBackpropFilter (95.39ms/95.39ms) - ... -``` - -7) Show the number of float operations in the model. -Note: float operations calculation depends on -1) op.RegisterStatistics. If an op doesn’t -have RegisterStatistics defined, its float operations cannot be counted. -2) fully defined shape is also necessary in order to calculate flops. -float operations number is provided by tensorflow::tfprof::OpLog logged from -Python API. - -```shell -# Requires --graph_path, --op_log_path. -tfprof> scope -min_float_ops 1 -max_depth 10 -select float_ops -account_type_regexes .* -_TFProfRoot (0/17.63b flops) - gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul (163.84k/163.84k flops) - gradients/pool_logit/xw_plus_b/MatMul_grad/MatMul_1 (163.84k/163.84k flops) - init/init_conv/Conv2D (113.25m/113.25m flops) - pool_logit/xw_plus_b (1.28k/165.12k flops) - pool_logit/xw_plus_b/MatMul (163.84k/163.84k flops) - unit_1_0/sub1/conv1/Conv2D (603.98m/603.98m flops) - unit_1_0/sub2/conv2/Conv2D (603.98m/603.98m flops) - unit_1_1/sub1/conv1/Conv2D (603.98m/603.98m flops) - unit_1_1/sub2/conv2/Conv2D (603.98m/603.98m flops) - ... -``` - -8) Show the number of parameters of all `tf.trainable_variables()` in the model. - -```shell -# Requires --graph_path --op_log_path. -# store option for future commands. -tfprof> set -account_type_regexes _trainable_variables -tfprof> scope -max_depth 4 -select params -_TFProfRoot (--/464.15k params) - init/init_conv/DW (3x3x3x16, 432/432 params) - pool_logit/DW (64x10, 640/640 params) - pool_logit/biases (10, 10/10 params) - unit_last/final_bn/beta (64, 64/64 params) - unit_last/final_bn/gamma (64, 64/64 params) -``` - -Where does “_trainable_variables” come from? It is from the OpLog file -generated by write_op_log() Python API. write_op_log() help users create some -common op types implicitly. Users can define their own op types and log it -through the write_op_log() API. - -9) What if I’m lazy and don’t want to define op type? I have given my ops -well-defined names in my model’s code. And want to use names to select a group -of ops. Let’s try it! - -```shell -tfprof> set -account_type_regexes .* -tfprof> scope -show_name_regexes unit_2_1.*DW -max_depth 100 -account_displayed_op_only -_TFProfRoot (0/18.43k params) - unit_2_1/sub1/conv1/DW (3x3x32x32, 9.22k/9.22k params) - unit_2_1/sub2/conv2/DW (3x3x32x32, 9.22k/9.22k params) -``` - -The above command allows you to filter ops that match specific names. -`-account_displayed_op_only` asks tfprof to only account ops displayed -in terminal. Otherwise, tfprof accounts all ops matched by -`-account_type_regexes` recursively even if they are hidden due to some -options such as -max_depth. - -10) TensorFlow has built-in op types. For example, built-in op type `Variable` -seems to include `Variable's` created by your model. However, be careful when -depending on it because TensorFlow creates extra `Variable` ops implicitly and -the implicitly created ops can have the same prefix as the `Variable's` you -defined. - -In the following example, extra `Variables` are created and “/Momentum” is -appended to their names. This might cause you “model capacity” calculation -to get wrong. - -```shell -tfprof> scope -account_type_regexes Variable -max_depth 4 -select params -_TFProfRoot (--/930.58k params) - global_step (1/1 params) - init/init_conv/DW (3x3x3x16, 432/864 params) - pool_logit/DW (64x10, 640/1.28k params) - pool_logit/DW/Momentum (64x10, 640/640 params) - pool_logit/biases (10, 10/20 params) - pool_logit/biases/Momentum (10, 10/10 params) - unit_last/final_bn/beta (64, 64/128 params) - unit_last/final_bn/gamma (64, 64/128 params) - unit_last/final_bn/moving_mean (64, 64/64 params) - unit_last/final_bn/moving_variance (64, 64/64 params) -``` - - -11) A example of defining extra op type for ops using `OpLog` - -First, in Python code, create an `OpLog` proto and add op type -information to it: - -```python - -op_log = tfprof_log_pb2.OpLog() -entry = op_log.log_entries.add() -entry.name = 'pool_logit/DW' -entry.types.append('pool_logit') -entry = op_log.log_entries.add() -entry.name = 'pool_logit/biases' -# Alternatively: -# var = tf.get_variable(xxx) -# entry.name = var.op.name -entry.types.append('pool_logit') -``` - -Second, call write_op_log to write the OpLog proto. - -```python -tf.tfprof.tfprof_logger.write_op_log(sess.graph, /tmp/my_op_log_dir, op_log) -``` - -Third, when starting the tfprof tool, specify -"--op_log_path /tmp/my_op_log_dir/op_log" - -```shell -tfprof> scope -account_type_regexes pool_logit -max_depth 4 -select params -_TFProfRoot (--/650 params) - pool_logit/DW (64x10, 640/640 params) - pool_logit/biases (10, 10/10 params) -``` - -Note that when you call -`tf.tfprof.tfprof_logger.write_op_log(...)`, the tool adds all `Variables` -inside `tf.trainable_variables()` to `_trainable_variables`. - -12) Run tfprof in one-shot mode and dump result to file. - -```shell -# Printed to stdout if --dump_to_file is not set. -tfprof scope --graph_path /cns/ij-d/home/xpan/tfprof/graph.pbtxt \ - --max_depth 3 \ - --dump_to_file "/tmp/dump" -Reading Files... -Parsing GraphDef... -Preparing Views... - -cat /tmp/dump -_TFProfRoot (--/930.58k params) - global_step (0/0 params) - pool_logit/DW (64x10, 640/1.28k params) - pool_logit/biases (10, 10/20 params) -``` - -13) Analyze how balanced Variable are on parameter servers. - -In this tutorial, I'm going to use a seq2seq model, which are split -on several gpus at workers and several parameter servers. - -In tfprof, 'device' is an op_type. For example, if op1 and op2 are placed on -gpu0. They share an op_type called 'gpu0'. - -```shell -bazel-bin/tensorflow/contrib/tfprof/tools/tfprof/tfprof \ - --graph_path ~/tfprof/textsum/graph.pbtxt \ - --run_meta_path ~/tfprof/textsum/run_meta - -# Looks like ps task 1 is holding twice more parameters than task 0. -tfprof> scope -select device,params -account_type_regexes .*ps.*task:0.* -max_depth 1 -_TFProfRoot (--/25.81m params) -tfprof> scope -select device,params -account_type_regexes .*ps.*task:1.* -max_depth 1 -_TFProfRoot (--/58.84m params) -``` - -### CLI Input Files - -tfprof command line inference (CLI) loads dumped files from a tensorflow model. -Convert them into in-memory data structures. To use it, users need to specify -the locations of the dumped files. The following are the dumped files loaded -by tfprof: - ---graph_path: GraphDef text file (required). Used to build in-memory -representation of the model. For example, graph.pbtxt written by tf.Supervisor -is a candidate. If you are not using tf.Supervisor, you can easily get GraphDef -using tf.Graph.as_graph_def() or other API. - ---run_meta_path: tensorflow::RunMetadata. -Used to get the memory and time consumption of -each op of the model. Users need to enable it. For example, the following code -snippet writes a RunMetadata file: - -```python -run_options = config_pb2.RunOptions(trace_level=config_pb2.RunOptions.FULL_TRACE) -run_metadata = config_pb2.RunMetadata() -# Once a while, call it the get the RunMeta. -_ = self._sess.run(..., options=run_options, run_metadata=run_metadata) -with gfile.Open(os.path.join(output_dir, "run_meta"), "w") as f: - f.write(run_metadata.SerializeToString()) -``` - ---op_log_path: -tensorflow::tfprof::OpLog. A proto used to provide extra op information -for ops. By giving a group of ops a type name, users can easily aggregate the -statistics for those ops without accidently missing or including extra ops. -tfprof exposes the following Python API to add op information and logging. - -```python -tf.contrib.tfprof.tfprof_logger.write_op_log(graph, log_dir, op_log=None) -``` - ---checkpoint_path: -TensorFlow checkpoint. It defines _checkpoint_variable op type. It also -provides checkpointed tensors' values. - - -## Design - - -### In-memory representation - -Scope: This representation organizes ops based on name scope hierarchy, -similar to filesystem hierarchy. Hence, it is essentially a tree data structure. -For example op1 with name “name1/name2” is a child of op2 with name “name1”. - -Graph: The representation organizes ops based on op inputs. Hence it is -a graph structure. The graph is a “directed acyclic graph” (hopefully), with -direction from “output to input”. The direction is design this way so that users -can trace from “result” to its “sources”. - -### Command line options - -tfprof’s major goals are to measure system performance and quicly analyze -model architectures. Hence, its commands and options should allow users to achieve -these 2 goals easily. - -graph: It is expected that users will mostly use graph representation to -debug system performance. Hence, tfprof supports graph command, which pulls the -graph in-memory representation described above. - -scope: It is expected that some users might want to explore their model -statistics using the name scope information they defined in the Python codes. -Hence, tfprof supports “scope” command, which pulls the tree in-memory -representation. - -set: It is used to store the options so that user doesn’t need to -re-type the same option again and again in the follow up command line. Note that -tfprof has traditional terminal’s history and auto-complete support. - -help: print help information. - -Options: Run “tfprof help” to get detailed explanations. - -```python -"-max_depth", -"-min_bytes", -"-min_micros", -"-min_params", -"-min_float_ops", -"-order_by", -"-account_type_regexes", -"-start_name_regexes", -"-trim_name_regexes", -"-show_name_regexes", -"-hide_name_regexes", -"-account_displayed_op_only", -"-select", -"-viz", # Only supported for graph command. -"-dump_to_file", -``` - -A key design is that stats are aggregated from descendants up to ancestors. -`-account_type_regexes` is used to decide which ops stat is accounted. It makes -decision based on op type. Usually set it to `.*` if no extra type information -is added to the ops using OpLog. Intuitively, only accounted ops are displayed. -`-min/max` and `-show/hide/trim/start` options are only used the optionally -displayed or hide ops based on ops’ name and stats. However, they don’t prevent -tfprof from accounting stats of hidden ops. Hence, the stat of a op can be -aggregated by its parent even if it is hidden. `-account_displayed_op_only` is -an option to break this rule. When it is set, only displayed ops are accounted. - -Regexes are all comma-separated, for example `-show_name_regexes` -`regex1.*,regex2.*`. It is designed this way because it is convenient and comma -is not expected to show up in op names. - -`-order_by` is used to order displayed ops. Displayed ops at the same hierarchy -(notice the indent printed) are sorted according to order_by. - -## Future Work - -* Load SummaryWriter event logs so that it can show the latest summary value. - -* Better sorting and aggregation of outputs. Easier comprehension. - -* Currently, shape information is based on `graph.pbtxt`. When the shape -information is incomplete, tfprof ignores it. See if it can use `RunMetadata` -and `Checkpoint` to complete shape information. +Enjoy! \ No newline at end of file diff --git a/tensorflow/contrib/tfprof/__init__.py b/tensorflow/contrib/tfprof/__init__.py index ce777979b96..129dad2726c 100644 --- a/tensorflow/contrib/tfprof/__init__.py +++ b/tensorflow/contrib/tfprof/__init__.py @@ -17,5 +17,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.contrib.tfprof.python.tools.tfprof import model_analyzer from tensorflow.contrib.tfprof.python.tools.tfprof import tfprof_logger from tensorflow.python.util.all_util import make_all diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/BUILD b/tensorflow/contrib/tfprof/python/tools/tfprof/BUILD index 87a8311486f..07677c6ed73 100644 --- a/tensorflow/contrib/tfprof/python/tools/tfprof/BUILD +++ b/tensorflow/contrib/tfprof/python/tools/tfprof/BUILD @@ -3,14 +3,36 @@ licenses(["notice"]) # Apache 2.0 package(default_visibility = ["//visibility:public"]) load("//tensorflow:tensorflow.bzl", "tf_py_test") +load("//tensorflow:tensorflow.bzl", "tf_py_wrap_cc") + +py_library( + name = "model_analyzer", + srcs = ["model_analyzer.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/contrib/tfprof/python/tools/tfprof:pywrap_tensorflow_print_model_analysis_lib", + "//tensorflow/contrib/tfprof/python/tools/tfprof:tfprof_logger", + "//tensorflow/tools/tfprof:protos_all_py", + ], +) + +py_test( + name = "model_analyzer_test", + srcs = ["model_analyzer_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":model_analyzer", + "//tensorflow:tensorflow_py", + ], +) py_library( name = "tfprof_logger", srcs = ["tfprof_logger.py"], srcs_version = "PY2AND3", deps = [ - "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_py", "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/tools/tfprof:protos_all_py", ], ) @@ -20,7 +42,34 @@ tf_py_test( additional_deps = [ ":tfprof_logger", "//tensorflow:tensorflow_py", - "//tensorflow/contrib/tfprof/tools/tfprof:protos_all_py", + "//tensorflow/tools/tfprof:protos_all_py", + ], +) + +tf_py_wrap_cc( + name = "pywrap_tensorflow_print_model_analysis_lib", + srcs = ["pywrap_tensorflow_print_model_analysis.i"], + swig_includes = [ + "//tensorflow/python:lib/core/strings.i", + "//tensorflow/python:platform/base.i", + ], + deps = [ + "//tensorflow/core:framework_headers_lib", + "//tensorflow/tools/tfprof/internal:print_model_analysis_hdr", + "//util/python:python_headers", + ], +) + +py_test( + name = "print_model_analysis_test", + srcs = ["print_model_analysis_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":pywrap_tensorflow_print_model_analysis_lib", + "//tensorflow:tensorflow_py", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:platform_test", + "//tensorflow/tools/tfprof:protos_all_py", ], ) diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer.py b/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer.py new file mode 100644 index 00000000000..cc94fd65b53 --- /dev/null +++ b/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer.py @@ -0,0 +1,188 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Model Analyzer. + +Analyze model, including shape, params, time, memory, structure, etc. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.tfprof.python.tools.tfprof import pywrap_tensorflow_print_model_analysis_lib as print_mdl +from tensorflow.contrib.tfprof.python.tools.tfprof import tfprof_logger +from tensorflow.tools.tfprof import tfprof_options_pb2 +from tensorflow.tools.tfprof import tfprof_output_pb2 + +# pylint: disable=bad-whitespace +# pylint: disable=bad-continuation +# 2 example tfprof_options for print_model_analysis API. +# +# Show the parameter statistics of trainable variables. +TRAINABLE_VARS_PARAMS_STAT_OPTIONS = { + 'max_depth': 10000, + 'min_bytes': 0, + 'min_micros': 0, + 'min_params': 0, + 'min_float_ops': 0, + 'device_regexes': ['.*'], + 'order_by': 'name', + 'account_type_regexes': [tfprof_logger.TRAINABLE_VARIABLES], + 'start_name_regexes': ['.*'], + 'trim_name_regexes': [], + 'show_name_regexes': ['.*'], + 'hide_name_regexes': [], + 'account_displayed_op_only': True, + 'select': ['params'], + 'viz': False, + 'dump_to_file': '' +} + +# Show the number float operations. +FLOAT_OPS_OPTIONS = { + 'max_depth': 10000, + 'min_bytes': 0, + 'min_micros': 0, + 'min_params': 0, + 'min_float_ops': 1, + 'device_regexes': ['.*'], + 'order_by': 'float_ops', + 'account_type_regexes': ['.*'], + 'start_name_regexes': ['.*'], + 'trim_name_regexes': [], + 'show_name_regexes': ['.*'], + 'hide_name_regexes': [], + 'account_displayed_op_only': True, + 'select': ['float_ops'], + 'viz': False, + 'dump_to_file': '' +} + +# Show number of parameters on parameter server 0. +# It is recommended to provide`run_meta` argument +# to have complete device placement info. +PRINT_PARAMS_ON_DEVICE = { + 'max_depth': 1, + 'min_bytes': 0, + 'min_micros': 0, + 'min_params': 0, + 'min_float_ops': 0, + 'device_regexes': ['.*'], + 'order_by': 'name', + 'account_type_regexes': ['.*ps.*task:0.*'], + 'start_name_regexes': ['.*'], + 'trim_name_regexes': [], + 'show_name_regexes': ['.*'], + 'hide_name_regexes': [], + 'account_displayed_op_only': False, + 'select': ['device', 'params'], + 'viz': False, + 'dump_to_file': '' +} + +# Show the timing stats and memory demands. +PRINT_ALL_TIMING_MEMORY = { + 'max_depth': 10000, + 'min_bytes': 1, # Only >=1 + 'min_micros': 1, # Only >=1 + 'min_params': 0, + 'min_float_ops': 0, + 'device_regexes': ['.*'], + 'order_by': 'name', + 'account_type_regexes': ['.*'], + 'start_name_regexes': ['.*'], + 'trim_name_regexes': [], + 'show_name_regexes': ['.*'], + 'hide_name_regexes': [], + 'account_displayed_op_only': True, + 'select': ['micros', 'bytes'], + 'viz': False, + 'dump_to_file': '' +} + +# pylint: enable=bad-whitespace +# pylint: enable=bad-continuation + + +def print_model_analysis(graph, + run_meta=None, + op_log=None, + tfprof_cmd='scope', + tfprof_options=TRAINABLE_VARS_PARAMS_STAT_OPTIONS): + """Print model statistics. + + Prints the model statistics to stdout. Also returns the results + in a TFProfNode proto. See go/tfprof or run tfprof tool: + 'bazel run third_party/tensorflow/tools/tfprof help' + + Examples: + Show the parameter/shape statistics of tf.trainable_variables(). + print_model_analysis(sess.graph). + + Show number of float ops. Only ops with RegisterStatistics defined + are counted. + show_float_op_opts = model_analyzer.FLOAT_OPS_OPTIONS + print_model_analysis(sess.graph, tfprof_options=show_float_op_opts) + + Args: + graph: tf.Graph. + run_meta: tensorflow::RunMetadata proto. When provided, also shows valid + timing and memory information when 'select' option contains + 'micros' and 'bytes'. + op_log: tensorflow::tfprof::OpLog proto. users can use this proto to + group together ops and use a op_type to select the group. + tfprof_cmd: string. Either 'scope' or 'graph'. 'scope' view organize + ops using their name scopes. 'graph' view organize ops using + their graph inputs. + tfprof_options: See 'tfprof help' for details. + Returns: + TFProfNode proto. Side effect: a formatted output to stdout. + """ + # pylint: disable=protected-access + op_log = tfprof_logger._merge_default_with_oplog(graph, op_log, run_meta) + # pylint: enable=protected-access + opts = tfprof_options_pb2.OptionsProto() + opts.max_depth = tfprof_options['max_depth'] + opts.min_bytes = tfprof_options['min_bytes'] + opts.min_micros = tfprof_options['min_micros'] + opts.min_params = tfprof_options['min_params'] + opts.min_float_ops = tfprof_options['min_float_ops'] + for p in tfprof_options['device_regexes']: + opts.device_regexes.append(p) + opts.order_by = tfprof_options['order_by'] + for p in tfprof_options['account_type_regexes']: + opts.account_type_regexes.append(p) + for p in tfprof_options['start_name_regexes']: + opts.start_name_regexes.append(p) + for p in tfprof_options['trim_name_regexes']: + opts.trim_name_regexes.append(p) + for p in tfprof_options['show_name_regexes']: + opts.show_name_regexes.append(p) + for p in tfprof_options['hide_name_regexes']: + opts.hide_name_regexes.append(p) + opts.account_displayed_op_only = tfprof_options['account_displayed_op_only'] + for p in tfprof_options['select']: + opts.select.append(p) + opts.viz = tfprof_options['viz'] + opts.dump_to_file = tfprof_options['dump_to_file'] + + run_meta_str = run_meta.SerializeToString() if run_meta else b'' + op_log_str = op_log.SerializeToString() if op_log else b'' + + tfprof_node = tfprof_output_pb2.TFProfNode() + tfprof_node.ParseFromString( + print_mdl.PrintModelAnalysis( + graph.as_graph_def().SerializeToString(), run_meta_str, op_log_str, + tfprof_cmd.encode('utf-8'), opts.SerializeToString())) + return tfprof_node diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py b/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py new file mode 100644 index 00000000000..9988392acd9 --- /dev/null +++ b/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py @@ -0,0 +1,84 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +import tensorflow as tf + + +class PrintModelAnalysisTest(tf.test.TestCase): + + def _BuildSmallModel(self): + image = tf.zeros([2, 6, 6, 3]) + kernel = tf.get_variable( + 'DW', [3, 3, 3, 6], + tf.float32, + initializer=tf.random_normal_initializer(stddev=0.001)) + x = tf.nn.conv2d(image, kernel, [1, 2, 2, 1], padding='SAME') + kernel = tf.get_variable( + 'DW2', [2, 2, 6, 12], + tf.float32, + initializer=tf.random_normal_initializer(stddev=0.001)) + x = tf.nn.conv2d(x, kernel, [1, 2, 2, 1], padding='SAME') + return x + + def testDumpToFile(self): + opts = tf.contrib.tfprof.model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS + opts['dump_to_file'] = os.path.join(tf.test.get_temp_dir(), 'dump') + + with tf.Session() as sess, tf.device('/cpu:0'): + _ = self._BuildSmallModel() + tf.contrib.tfprof.model_analyzer.print_model_analysis( + sess.graph, tfprof_options=opts) + + with tf.gfile.Open(opts['dump_to_file'], 'r') as f: + self.assertEqual(u'_TFProfRoot (--/450 params)\n' + ' DW (3x3x3x6, 162/162 params)\n' + ' DW2 (2x2x6x12, 288/288 params)\n', + f.read().decode('utf-8')) + + def testSelectEverything(self): + opts = tf.contrib.tfprof.model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS + opts['dump_to_file'] = os.path.join(tf.test.get_temp_dir(), 'dump') + opts['account_type_regexes'] = ['.*'] + opts['select'] = [ + 'bytes', 'params', 'float_ops', 'num_hidden_ops', 'device', 'op_types' + ] + + with tf.Session() as sess, tf.device('/cpu:0'): + x = self._BuildSmallModel() + + sess.run(tf.initialize_all_variables()) + run_meta = tf.RunMetadata() + _ = sess.run(x, + options=tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE), + run_metadata=run_meta) + + tf.contrib.tfprof.model_analyzer.print_model_analysis( + sess.graph, run_meta, tfprof_options=opts) + + with tf.gfile.Open(opts['dump_to_file'], 'r') as f: + # pylint: disable=line-too-long + self.assertEqual( + '_TFProfRoot (0/450 params, 0/10.44k flops, 0B/5.28KB, _kTFScopeParent)\n Conv2D (0/0 params, 5.83k/5.83k flops, 432B/432B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D)\n Conv2D_1 (0/0 params, 4.61k/4.61k flops, 384B/384B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Conv2D)\n DW (3x3x3x6, 162/162 params, 0/0 flops, 648B/1.30KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Variable|_trainable_variables)\n DW/Assign (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|Assign)\n DW/Initializer (0/0 params, 0/0 flops, 0B/0B, _kTFScopeParent)\n DW/Initializer/random_normal (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|Add)\n DW/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|RandomStandardNormal)\n DW/Initializer/random_normal/mean (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|Const)\n DW/Initializer/random_normal/mul (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|Mul)\n DW/Initializer/random_normal/shape (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|Const)\n DW/Initializer/random_normal/stddev (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|Const)\n DW/read (0/0 params, 0/0 flops, 648B/648B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity)\n DW2 (2x2x6x12, 288/288 params, 0/0 flops, 1.15KB/2.30KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Variable|_trainable_variables)\n DW2/Assign (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|Assign)\n DW2/Initializer (0/0 params, 0/0 flops, 0B/0B, _kTFScopeParent)\n DW2/Initializer/random_normal (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|Add)\n DW2/Initializer/random_normal/RandomStandardNormal (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|RandomStandardNormal)\n DW2/Initializer/random_normal/mean (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|Const)\n DW2/Initializer/random_normal/mul (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|Mul)\n DW2/Initializer/random_normal/shape (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|Const)\n DW2/Initializer/random_normal/stddev (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|Const)\n DW2/read (0/0 params, 0/0 flops, 1.15KB/1.15KB, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Identity)\n init (0/0 params, 0/0 flops, 0B/0B, /device:CPU:0, /device:CPU:0|NoOp)\n zeros (0/0 params, 0/0 flops, 864B/864B, /job:localhost/replica:0/task:0/cpu:0, /job:localhost/replica:0/task:0/cpu:0|Const)\n', + f.read().decode('utf-8')) + # pylint: enable=line-too-long + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/print_model_analysis_test.py b/tensorflow/contrib/tfprof/python/tools/tfprof/print_model_analysis_test.py new file mode 100644 index 00000000000..0354d0f631d --- /dev/null +++ b/tensorflow/contrib/tfprof/python/tools/tfprof/print_model_analysis_test.py @@ -0,0 +1,238 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""print_model_analysis test.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +from google.protobuf import text_format +from tensorflow.contrib.tfprof.python.tools.tfprof import pywrap_tensorflow_print_model_analysis_lib as print_mdl +from tensorflow.tools.tfprof import tfprof_options_pb2 +from tensorflow.tools.tfprof import tfprof_output_pb2 + +# pylint: disable=bad-whitespace +# pylint: disable=bad-continuation +TEST_OPTIONS = { + 'max_depth': 10000, + 'min_bytes': 0, + 'min_micros': 0, + 'min_params': 0, + 'min_float_ops': 0, + 'device_regexes': ['.*'], + 'order_by': 'name', + 'account_type_regexes': ['.*'], + 'start_name_regexes': ['.*'], + 'trim_name_regexes': [], + 'show_name_regexes': ['.*'], + 'hide_name_regexes': [], + 'account_displayed_op_only': True, + 'select': ['params'], + 'viz': False +} + +# pylint: enable=bad-whitespace +# pylint: enable=bad-continuation + + +class PrintModelAnalysisTest(tf.test.TestCase): + + def _BuildSmallModel(self): + image = tf.zeros([2, 6, 6, 3]) + kernel = tf.get_variable( + 'DW', [6, 6, 3, 6], + tf.float32, + initializer=tf.random_normal_initializer(stddev=0.001)) + x = tf.nn.conv2d(image, kernel, [1, 2, 2, 1], padding='SAME') + return x + + def testPrintModelAnalysis(self): + opts = tfprof_options_pb2.OptionsProto() + opts.max_depth = TEST_OPTIONS['max_depth'] + opts.min_bytes = TEST_OPTIONS['min_bytes'] + opts.min_micros = TEST_OPTIONS['min_micros'] + opts.min_params = TEST_OPTIONS['min_params'] + opts.min_float_ops = TEST_OPTIONS['min_float_ops'] + for p in TEST_OPTIONS['device_regexes']: + opts.device_regexes.append(p) + opts.order_by = TEST_OPTIONS['order_by'] + for p in TEST_OPTIONS['account_type_regexes']: + opts.account_type_regexes.append(p) + for p in TEST_OPTIONS['start_name_regexes']: + opts.start_name_regexes.append(p) + for p in TEST_OPTIONS['trim_name_regexes']: + opts.trim_name_regexes.append(p) + for p in TEST_OPTIONS['show_name_regexes']: + opts.show_name_regexes.append(p) + for p in TEST_OPTIONS['hide_name_regexes']: + opts.hide_name_regexes.append(p) + opts.account_displayed_op_only = TEST_OPTIONS['account_displayed_op_only'] + for p in TEST_OPTIONS['select']: + opts.select.append(p) + opts.viz = TEST_OPTIONS['viz'] + + with tf.Session() as sess, tf.device('/cpu:0'): + _ = self._BuildSmallModel() + tfprof_pb = tfprof_output_pb2.TFProfNode() + tfprof_pb.ParseFromString( + print_mdl.PrintModelAnalysis(sess.graph.as_graph_def( + ).SerializeToString(), b'', b'', b'scope', opts.SerializeToString())) + + expected_pb = tfprof_output_pb2.TFProfNode() + text_format.Merge(r"""name: "_TFProfRoot" + exec_micros: 0 + requested_bytes: 0 + total_exec_micros: 0 + total_requested_bytes: 0 + total_parameters: 648 + children { + name: "Conv2D" + exec_micros: 0 + requested_bytes: 0 + total_exec_micros: 0 + total_requested_bytes: 0 + total_parameters: 0 + device: "/device:CPU:0" + float_ops: 0 + total_float_ops: 0 + } + children { + name: "DW" + exec_micros: 0 + requested_bytes: 0 + parameters: 648 + total_exec_micros: 0 + total_requested_bytes: 0 + total_parameters: 648 + device: "/device:CPU:0" + children { + name: "DW/Assign" + exec_micros: 0 + requested_bytes: 0 + total_exec_micros: 0 + total_requested_bytes: 0 + total_parameters: 0 + device: "/device:CPU:0" + float_ops: 0 + total_float_ops: 0 + } + children { + name: "DW/Initializer" + exec_micros: 0 + requested_bytes: 0 + total_exec_micros: 0 + total_requested_bytes: 0 + total_parameters: 0 + children { + name: "DW/Initializer/random_normal" + exec_micros: 0 + requested_bytes: 0 + total_exec_micros: 0 + total_requested_bytes: 0 + total_parameters: 0 + device: "/device:CPU:0" + children { + name: "DW/Initializer/random_normal/RandomStandardNormal" + exec_micros: 0 + requested_bytes: 0 + total_exec_micros: 0 + total_requested_bytes: 0 + total_parameters: 0 + device: "/device:CPU:0" + float_ops: 0 + total_float_ops: 0 + } + children { + name: "DW/Initializer/random_normal/mean" + exec_micros: 0 + requested_bytes: 0 + total_exec_micros: 0 + total_requested_bytes: 0 + total_parameters: 0 + device: "/device:CPU:0" + float_ops: 0 + total_float_ops: 0 + } + children { + name: "DW/Initializer/random_normal/mul" + exec_micros: 0 + requested_bytes: 0 + total_exec_micros: 0 + total_requested_bytes: 0 + total_parameters: 0 + device: "/device:CPU:0" + float_ops: 0 + total_float_ops: 0 + } + children { + name: "DW/Initializer/random_normal/shape" + exec_micros: 0 + requested_bytes: 0 + total_exec_micros: 0 + total_requested_bytes: 0 + total_parameters: 0 + device: "/device:CPU:0" + float_ops: 0 + total_float_ops: 0 + } + children { + name: "DW/Initializer/random_normal/stddev" + exec_micros: 0 + requested_bytes: 0 + total_exec_micros: 0 + total_requested_bytes: 0 + total_parameters: 0 + device: "/device:CPU:0" + float_ops: 0 + total_float_ops: 0 + } + float_ops: 0 + total_float_ops: 0 + } + float_ops: 0 + total_float_ops: 0 + } + children { + name: "DW/read" + exec_micros: 0 + requested_bytes: 0 + total_exec_micros: 0 + total_requested_bytes: 0 + total_parameters: 0 + device: "/device:CPU:0" + float_ops: 0 + total_float_ops: 0 + } + float_ops: 0 + total_float_ops: 0 + } + children { + name: "zeros" + exec_micros: 0 + requested_bytes: 0 + total_exec_micros: 0 + total_requested_bytes: 0 + total_parameters: 0 + device: "/device:CPU:0" + float_ops: 0 + total_float_ops: 0 + } + float_ops: 0 + total_float_ops: 0""", expected_pb) + self.assertEqual(expected_pb, tfprof_pb) + + +if __name__ == '__main__': + tf.test.main() diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/pywrap_tensorflow_print_model_analysis.i b/tensorflow/contrib/tfprof/python/tools/tfprof/pywrap_tensorflow_print_model_analysis.i new file mode 100644 index 00000000000..05b734a699f --- /dev/null +++ b/tensorflow/contrib/tfprof/python/tools/tfprof/pywrap_tensorflow_print_model_analysis.i @@ -0,0 +1,43 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +%include "tensorflow/python/lib/core/strings.i" +%include "tensorflow/python/platform/base.i" + +%{ +#include "tensorflow/tools/tfprof/internal/print_model_analysis.h" +#include "tensorflow/core/framework/types.h" +%} + +%typemap(typecheck) const string & = char *; +%typemap(in) const string& (string temp) { + if (!_PyObjAs($input, &temp)) return NULL; + $1 = &temp; +} +%typemap(out) const string& { + $result = PyString_FromStringAndSize($1->data(), $1->size()); +} +%apply const string & {string &}; +%apply const string & {string *}; + +%ignoreall + +%unignore tensorflow; +%unignore tensorflow::tfprof; +%unignore tensorflow::tfprof::PrintModelAnalysis; + +%include "tensorflow/tools/tfprof/internal/print_model_analysis.h" + +%unignoreall \ No newline at end of file diff --git a/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py b/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py index 53dd2632b69..1f710bc970c 100644 --- a/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py +++ b/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py @@ -24,8 +24,8 @@ import os import sys import tensorflow as tf -from tensorflow.contrib.tfprof.tools.tfprof import tfprof_log_pb2 from tensorflow.python.framework import ops +from tensorflow.tools.tfprof import tfprof_log_pb2 TRAINABLE_VARIABLES = '_trainable_variables' REGISTERED_FLOP_STATS = 'flops' @@ -85,7 +85,7 @@ def _get_logged_ops(graph, run_meta=None): if node.name not in logged_ops: entry = tfprof_log_pb2.OpLogEntry() entry.name = node.name - entry.float_ops = stats.value + entry.float_ops = int(stats.value) logged_ops[entry.name] = entry for v in graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES): diff --git a/tensorflow/contrib/training/__init__.py b/tensorflow/contrib/training/__init__.py index c9564fc316c..d2a6368d785 100644 --- a/tensorflow/contrib/training/__init__.py +++ b/tensorflow/contrib/training/__init__.py @@ -32,8 +32,9 @@ like to store state in the forward direction across segments of an example. To resample data with replacement on a per-example basis, use ['rejection_sample'](#rejection_sample) or ['resample_at_rate'](#resample_at_rate). For `rejection_sample`, provide -a boolean Tensor describing whether to accept or reject. For `resample_at_rate`, -providing the desired rate for each example. If you wish to specify relative +a boolean Tensor describing whether to accept or reject. Resulting batch sizes +are always the same. For `resample_at_rate`, provide the desired rate for each +example. Resulting batch sizes may vary. If you wish to specify relative rates, rather than absolute ones, use ['weighted_resample'](#weighted_resample) (which also returns the actual resampling rate used for each output example). diff --git a/tensorflow/contrib/util/convert_graphdef_memmapped_format_lib.cc b/tensorflow/contrib/util/convert_graphdef_memmapped_format_lib.cc index 68cb20d0b57..1f079027efb 100644 --- a/tensorflow/contrib/util/convert_graphdef_memmapped_format_lib.cc +++ b/tensorflow/contrib/util/convert_graphdef_memmapped_format_lib.cc @@ -16,8 +16,10 @@ limitations under the License. #include #include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor.pb.h" +#include "tensorflow/core/framework/types.pb.h" #include "tensorflow/core/kernels/immutable_constant_op.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/logging.h" @@ -45,13 +47,27 @@ class NodeConverter { const DataType tensor_data_type = tensor_proto.dtype(); const TensorShapeProto tensor_shape = tensor_proto.tensor_shape(); + // Check that the tensor type is POD, only these types are supported for + // memmapping. + // DataType enum is explicitly converted to int to avoid errors with passing + // enum type are a parameter type to std::unordered_set. + static std::unordered_set supported_types{ +#define TYPE_FOR_SET(type) static_cast(DataTypeToEnum::value), + TF_CALL_POD_TYPES(TYPE_FOR_SET) +#undef ADD_TYPE + }; + + if (supported_types.count(static_cast(tensor_data_type)) == 0) { + return Status::OK(); + } + // Create Tensor from value and write it in memmapped format. Tensor parsed(tensor_proto.dtype()); if (!parsed.FromProto(cpu_allocator(), tensor_proto)) { return errors::InvalidArgument("Cannot parse tensor from proto: ", tensor_proto.DebugString()); } - if (parsed.TotalBytes() < min_conversion_size_bytes) { + if (parsed.TotalBytes() < static_cast(min_conversion_size_bytes)) { return Status::OK(); } diff --git a/tensorflow/contrib/util/convert_graphdef_memmapped_format_test.cc b/tensorflow/contrib/util/convert_graphdef_memmapped_format_test.cc index d64dca7b634..cb1e7577cf2 100644 --- a/tensorflow/contrib/util/convert_graphdef_memmapped_format_test.cc +++ b/tensorflow/contrib/util/convert_graphdef_memmapped_format_test.cc @@ -26,6 +26,15 @@ limitations under the License. namespace tensorflow { namespace { +bool GraphHasImmutableConstNodes(const GraphDef& graph_def) { + for (const auto& node : graph_def.node()) { + if (node.op() == "ImmutableConst") { + return true; + } + } + return false; +} + TEST(ConvertGraphdefMemmappedFormatTest, ConvertModel) { const string dir = testing::TmpDir(); const string filename_pb = io::JoinPath(dir, "graphdef.pb"); @@ -69,6 +78,7 @@ TEST(ConvertGraphdefMemmappedFormatTest, ConvertModel) { TF_ASSERT_OK(ReadBinaryProto( &memmapped_env, MemmappedFileSystem::kMemmappedPackageDefaultGraphDef, &loaded_graph_def)); + ASSERT_TRUE(GraphHasImmutableConstNodes(loaded_graph_def)); TF_ASSERT_OK(session->Create(loaded_graph_def)) << "Can't create test graph"; std::vector outputs; @@ -79,5 +89,48 @@ TEST(ConvertGraphdefMemmappedFormatTest, ConvertModel) { EXPECT_EQ(outputs.front().flat()(2), 2.0f * 3.0f * kTensorHeight); } +TEST(ConvertGraphdefMemmappedFormatTest, NotSupportedTypesConvert) { + // Create a graph with strings. + const string dir = testing::TmpDir(); + const string filename_pb = io::JoinPath(dir, "string_graphdef.pb"); + + constexpr int kTensorWidth = 4000; + constexpr int kTensorHeight = 100; + const TensorShape kTestTensorShape({kTensorWidth, kTensorHeight}); + Tensor test_tensor1(DT_STRING, kTestTensorShape); + test::FillFn(&test_tensor1, [](int) -> string { return "ABC"; }); + + Tensor test_tensor2(DT_STRING, kTestTensorShape); + test::FillFn(&test_tensor2, [](int) -> string { return "XYZ"; }); + auto root = Scope::NewRootScope().ExitOnError(); + ops::Output m = ops::Add(root, test_tensor1, test_tensor2); + const string result_name = m.node()->name(); + + GraphDef graph_def; + TF_ASSERT_OK(root.ToGraphDef(&graph_def)); + string graph_def_serialized; + graph_def.SerializeToString(&graph_def_serialized); + TF_ASSERT_OK( + WriteStringToFile(Env::Default(), filename_pb, graph_def_serialized)); + + const string filename_mmap = io::JoinPath(dir, "string_graphdef.mmap"); + TF_ASSERT_OK(ConvertConstantsToImmutable(filename_pb, filename_mmap, 1000)); + + // Create and initialize MemmappedEnv from the converted file. + MemmappedEnv memmapped_env(Env::Default()); + TF_ASSERT_OK(memmapped_env.InitializeFromFile(filename_mmap)); + + // Load the graph and run calculations. + SessionOptions session_options; + session_options.env = &memmapped_env; + std::unique_ptr session(NewSession(session_options)); + ASSERT_TRUE(session != nullptr) << "Failed to create session"; + GraphDef loaded_graph_def; + TF_ASSERT_OK(ReadBinaryProto( + &memmapped_env, MemmappedFileSystem::kMemmappedPackageDefaultGraphDef, + &loaded_graph_def)); + ASSERT_FALSE(GraphHasImmutableConstNodes(loaded_graph_def)); +} + } // namespace } // namespace tensorflow diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index a2a998cf4dc..1c37921afc3 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -164,6 +164,8 @@ cc_library( "lib/core/threadpool.h", "lib/gtl/array_slice.h", "lib/gtl/cleanup.h", + "lib/gtl/flatmap.h", + "lib/gtl/flatset.h", "lib/gtl/inlined_vector.h", "lib/gtl/priority_queue_util.h", "lib/hash/crc32c.h", @@ -178,7 +180,6 @@ cc_library( "lib/io/table.h", "lib/io/table_builder.h", "lib/io/table_options.h", - "lib/jpeg/jpeg_mem.h", "lib/math/math_util.h", "lib/monitoring/collected_metrics.h", "lib/monitoring/collection_registry.h", @@ -220,6 +221,13 @@ cc_library( ], ) +cc_library( + name = "jpeg", + hdrs = ["lib/jpeg/jpeg_mem.h"], + visibility = ["//visibility:public"], + deps = [":jpeg_internal"], +) + # Test support library needed for all tests # This is currently public, but may be made internal in the # future. Try to avoid depending on it. @@ -521,6 +529,7 @@ cc_library( "//tensorflow/core/kernels:control_flow_ops", "//tensorflow/core/kernels:ctc_ops", "//tensorflow/core/kernels:data_flow", + "//tensorflow/core/kernels:fake_quant_ops", "//tensorflow/core/kernels:function_ops", "//tensorflow/core/kernels:image", "//tensorflow/core/kernels:io", @@ -970,6 +979,7 @@ cc_library( ], exclude = [ "**/*test*", + "lib/jpeg/**/*", "platform/**/cuda.h", "platform/**/stream_executor.h", "platform/load_library.cc", @@ -986,6 +996,7 @@ cc_library( ], exclude = [ "**/*test*", + "lib/jpeg/**/*", "platform/**/cuda.h", "platform/**/stream_executor.h", ], @@ -1019,7 +1030,6 @@ cc_library( "lib/io/zlib_compression_options.h", "lib/io/zlib_inputstream.h", "lib/io/zlib_outputbuffer.h", - "lib/jpeg/jpeg_handle.h", "lib/png/png_io.h", "lib/random/random.h", "lib/random/random_distributions.h", @@ -1048,6 +1058,26 @@ cc_library( ], ) +cc_library( + name = "jpeg_internal", + srcs = glob( + [ + "lib/jpeg/*h", + "lib/jpeg/*.cc", + ], + exclude = [ + "**/*test*", + ], + ), + hdrs = ["lib/jpeg/jpeg_handle.h"], + copts = tf_copts(), + linkopts = ["-ldl"], + deps = [ + ":lib", + "//tensorflow/core/platform/default/build_config:jpeg", + ], +) + proto_text_hdrs_and_srcs = tf_generate_proto_text_sources( name = "proto_text_srcs_all", srcs = tf_proto_text_protos_relative(), @@ -1149,83 +1179,6 @@ cc_header_only_library( ], ) -filegroup( - name = "framework_headers", - srcs = [ - "framework/allocator.h", - "framework/attr_value_util.h", - "framework/bfloat16.h", - "framework/cancellation.h", - "framework/control_flow.h", - "framework/device_base.h", - "framework/function.h", - "framework/kernel_def_builder.h", - "framework/node_def_util.h", - "framework/numeric_types.h", - "framework/op.h", - "framework/op_def_builder.h", - "framework/op_def_util.h", - "framework/op_kernel.h", - "framework/partial_tensor_shape.h", - "framework/register_types.h", - "framework/rendezvous.h", - "framework/selective_registration.h", - "framework/session_state.h", - "framework/shape_inference.h", - "framework/tensor.h", - "framework/tensor_reference.h", - "framework/tensor_shape.h", - "framework/tensor_types.h", - "framework/tracking_allocator.h", - "framework/type_traits.h", - "framework/types.h", - "framework/unique_tensor_references.h", - "lib/core/errors.h", - "lib/core/notification.h", - "lib/core/refcount.h", - "lib/core/status.h", - "lib/core/stringpiece.h", - "lib/core/threadpool.h", - "lib/gtl/array_slice.h", - "lib/gtl/array_slice_internal.h", - "lib/gtl/inlined_vector.h", - "lib/gtl/manual_constructor.h", - "lib/hash/hash.h", - "lib/strings/numbers.h", - "lib/strings/str_util.h", - "lib/strings/strcat.h", - "platform/cpu_info.h", - "platform/default/dynamic_annotations.h", - "platform/default/integral_types.h", - "platform/default/logging.h", - "platform/default/mutex.h", - "platform/default/notification.h", - "platform/default/protobuf.h", - "platform/default/thread_annotations.h", - "platform/dynamic_annotations.h", - "platform/env.h", - "platform/file_statistics.h", - "platform/file_system.h", - "platform/fingerprint.h", - "platform/logging.h", - "platform/macros.h", - "platform/mem.h", - "platform/mutex.h", - "platform/net.h", - "platform/notification.h", - "platform/platform.h", - "platform/prefetch.h", - "platform/protobuf.h", - "platform/strong_hash.h", - "platform/thread_annotations.h", - "platform/types.h", - "public/session.h", - "public/session_options.h", - "public/version.h", - "util/device_name_utils.h", - ], -) - tf_cuda_library( name = "stream_executor", srcs = tf_additional_stream_executor_srcs(), @@ -1316,7 +1269,7 @@ cc_library( "platform/regexp.h", ], visibility = [ - "//tensorflow/contrib/tfprof:__subpackages__", + "//tensorflow/tools/tfprof:__subpackages__", ], deps = [":lib_internal"], ) @@ -1326,11 +1279,13 @@ tf_cuda_library( srcs = ["common_runtime/direct_session.cc"], hdrs = ["common_runtime/direct_session.h"], copts = tf_copts(), + cuda_deps = [ + ":gpu_tracer", + ], linkstatic = 1, deps = [ ":core_cpu_internal", ":framework", - ":gpu_tracer", ":lib", ":lib_internal", ":proto_text", @@ -1496,6 +1451,8 @@ tf_cc_tests( "lib/gtl/array_slice_test.cc", "lib/gtl/cleanup_test.cc", "lib/gtl/edit_distance_test.cc", + "lib/gtl/flatmap_test.cc", + "lib/gtl/flatset_test.cc", "lib/gtl/inlined_vector_test.cc", "lib/gtl/int_type_test.cc", "lib/gtl/iterator_range_test.cc", @@ -1582,6 +1539,8 @@ cc_test( srcs = ["lib/jpeg/jpeg_mem_unittest.cc"], data = glob(["lib/jpeg/testdata/*.jpg"]), deps = [ + ":jpeg", + ":jpeg_internal", ":lib", ":lib_internal", ":test", diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index 59fa09bd8db..35332dfc8cf 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -23,7 +23,6 @@ limitations under the License. #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/common_runtime/executor.h" #include "tensorflow/core/common_runtime/function.h" -#include "tensorflow/core/common_runtime/gpu/gpu_tracer.h" #include "tensorflow/core/common_runtime/graph_optimizer.h" #include "tensorflow/core/common_runtime/memory_types.h" #include "tensorflow/core/common_runtime/simple_placer.h" @@ -57,6 +56,10 @@ limitations under the License. #include "tensorflow/core/platform/types.h" #include "tensorflow/core/util/device_name_utils.h" +#if GOOGLE_CUDA +#include "tensorflow/core/common_runtime/gpu/gpu_tracer.h" +#endif // GOOGLE_CUDA + namespace tensorflow { namespace { @@ -453,12 +456,14 @@ Status DirectSession::Run(const RunOptions& run_options, args.stats_collector = run_state.collector.get(); } +#if GOOGLE_CUDA std::unique_ptr tracer; if (run_options.trace_level() >= RunOptions::HARDWARE_TRACE) { tracer.reset(CreateGPUTracer()); // tracer will be NULL on non-GPU platforms. if (tracer) tracer->Start(); } +#endif // GOOGLE_CUDA for (const auto& item : executors_and_keys->items) { item.executor->RunAsync(args, barrier->Get()); @@ -468,10 +473,12 @@ Status DirectSession::Run(const RunOptions& run_options, ? run_options.timeout_in_ms() : operation_timeout_in_ms_); +#if GOOGLE_CUDA if (tracer) { tracer->Stop(); tracer->Collect(args.stats_collector); } +#endif // GOOGLE_CUDA { mutex_lock l(run_state.mu_); @@ -840,10 +847,11 @@ Status DirectSession::GetOrCreateExecutors( std::vector tn_sorted(target_nodes.begin(), target_nodes.end()); std::sort(tn_sorted.begin(), tn_sorted.end()); - const string key = strings::StrCat(str_util::Join(inputs_sorted, ","), "->", - str_util::Join(outputs_sorted, ","), "/", - str_util::Join(tn_sorted, ","), "/", - run_state_args->is_partial_run); + const string key = strings::StrCat( + str_util::Join(inputs_sorted, ","), "->", + str_util::Join(outputs_sorted, ","), "/", str_util::Join(tn_sorted, ","), + "/", run_state_args->is_partial_run, "/", + SummarizeDebugTensorWatches(run_state_args->debug_tensor_watches)); // Set the handle. run_state_args->handle = @@ -938,7 +946,7 @@ Status DirectSession::GetOrCreateExecutors( partition_graph = iter->second.release(); optimizer.Optimize(lib, options_.env, device, &partition_graph); - // EXPERIMENTAL: tfdb inserts debug nodes (i.e., probes) to the graph + // EXPERIMENTAL: tfdbg inserts debug nodes (i.e., probes) to the graph if (!run_state_args->debug_tensor_watches.empty()) { TF_RETURN_IF_ERROR( DebugNodeInserter::InsertNodes(run_state_args->debug_tensor_watches, diff --git a/tensorflow/core/common_runtime/direct_session.h b/tensorflow/core/common_runtime/direct_session.h index a4289112534..0e7203a4d86 100644 --- a/tensorflow/core/common_runtime/direct_session.h +++ b/tensorflow/core/common_runtime/direct_session.h @@ -291,7 +291,7 @@ class DirectSession : public Session { TF_DISALLOW_COPY_AND_ASSIGN(DirectSession); - // EXPERIMENTAL: debugger (tfdb) related + // EXPERIMENTAL: debugger (tfdbg) related friend class DebugGateway; }; diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc index c3cc11abb1b..390809b68a0 100644 --- a/tensorflow/core/common_runtime/executor.cc +++ b/tensorflow/core/common_runtime/executor.cc @@ -222,7 +222,7 @@ typedef gtl::InlinedVector AllocatorAttributeVec; class ExecutorImpl : public Executor { public: ExecutorImpl(const LocalExecutorParams& p, const Graph* g) - : params_(p), graph_(g), initial_pending_counts_(graph_->num_node_ids()) { + : params_(p), graph_(g) { CHECK(p.create_kernel != nullptr); CHECK(p.delete_kernel != nullptr); } @@ -231,6 +231,7 @@ class ExecutorImpl : public Executor { for (int i = 0; i < graph_->num_node_ids(); i++) { params_.delete_kernel(nodes_[i].kernel); } + delete[] frame_local_ids_; delete[] nodes_; delete graph_; } @@ -256,13 +257,39 @@ class ExecutorImpl : public Executor { private: friend class ExecutorState; - static void InitializePending(const Graph* graph, PendingCounts* counts); + struct ControlFlowInfo { + std::unordered_map frame_name_to_size; + std::vector frame_names; + }; + + struct FrameInfo { + // The total number of inputs to a frame. + int input_count; + + // The total number of input tensors of a frame. + // == sum(nodes[*].num_inputs()) where nodes are the nodes in the frame. + int total_inputs; + + // Each frame has its own PendingCounts only for the nodes in the frame. + PendingCounts* pending_counts; // Owned + + // The nodes in a frame. Used only for debugging. + std::vector* nodes; // Owned + + ~FrameInfo() { + delete pending_counts; + delete nodes; + } + }; + + static Status BuildControlFlowInfo(const Graph* graph, + ControlFlowInfo* cf_info); + void InitializePending(const Graph* graph, const ControlFlowInfo& cf_info); // Owned. LocalExecutorParams params_; const Graph* graph_; NodeItem* nodes_ = nullptr; // array of size "graph_.num_node_ids()" - int total_input_tensors_ = 0; // == sum(nodes_[*].num_inputs()) int total_output_tensors_ = 0; // == sum(nodes_[*].num_outputs()) // A cached value of params_ @@ -271,14 +298,17 @@ class ExecutorImpl : public Executor { // Root nodes (with no in edges) that should form the initial ready queue std::vector root_nodes_; - PendingCounts initial_pending_counts_; - - // The number of inputs for each frame in this graph. This is static - // information of the graph. - std::unordered_map frame_input_count_; - std::vector output_attrs_; + // Mapping from frame name to static information about the frame. + // TODO(yuanbyu): We could cache it along with the graph so to avoid + // the overhead of constructing it for each executor instance. + std::unordered_map frame_info_; + + // Mapping from a node's id to its index in the PendingCounts of the + // frame the node belongs to. + int* frame_local_ids_ = nullptr; // Owned + TF_DISALLOW_COPY_AND_ASSIGN(ExecutorImpl); }; @@ -287,23 +317,31 @@ Status ExecutorImpl::Initialize() { delete[] nodes_; nodes_ = new NodeItem[num_nodes]; - Status s; - total_input_tensors_ = 0; total_output_tensors_ = 0; - InitializePending(graph_, &initial_pending_counts_); + // Build the information about frames in this subgraph. + ControlFlowInfo cf_info; + BuildControlFlowInfo(graph_, &cf_info); // Cache this value so we make this virtual function call once, rather // that O(# steps * # nodes per step) times. device_record_tensor_accesses_ = params_.device->RequiresRecordingAccessedTensors(); + for (auto& it : cf_info.frame_name_to_size) { + frame_info_[it.first].nodes = new std::vector; + } + frame_local_ids_ = new int[num_nodes]; + std::unordered_map frame_count; + // Preprocess every node in the graph to create an instance of op - // kernel for each node; + // kernel for each node. for (const Node* n : graph_->nodes()) { const int id = n->id(); + const string& frame_name = cf_info.frame_names[id]; + FrameInfo& frame_info = frame_info_[frame_name]; - // See if this node is a root node, and if so, add to root_nodes_ + // See if this node is a root node, and if so, add to root_nodes_. const int num_in_edges = n->in_edges().size(); if (num_in_edges == 0) { root_nodes_.push_back(n); @@ -321,18 +359,18 @@ Status ExecutorImpl::Initialize() { item->inlined_output_type[i] = n->output_type(i); } - item->input_start = total_input_tensors_; - total_input_tensors_ += n->num_inputs(); + item->input_start = frame_info.total_inputs; + frame_info.total_inputs += n->num_inputs(); item->output_attr_start = total_output_tensors_; total_output_tensors_ += n->num_outputs(); - s = params_.create_kernel(n->def(), &item->kernel); + Status s = params_.create_kernel(n->def(), &item->kernel); if (!s.ok()) { item->kernel = nullptr; s = AttachDef(s, n->def()); LOG(ERROR) << "Executor failed to create kernel. " << s; - break; + return s; } CHECK(item->kernel); item->kernel_is_expensive = item->kernel->IsExpensive(); @@ -340,14 +378,18 @@ Status ExecutorImpl::Initialize() { item->is_merge = IsMerge(n); // Initialize static information about the frames in the graph. + frame_local_ids_[id] = frame_count[frame_name]++; + frame_info.nodes->push_back(n); if (IsEnter(n)) { - string frame_name; - s = GetNodeAttr(n->def(), "frame_name", &frame_name); - if (!s.ok()) return s; - ++frame_input_count_[frame_name]; + string enter_name; + TF_RETURN_IF_ERROR(GetNodeAttr(n->def(), "frame_name", &enter_name)); + ++frame_info_[enter_name].input_count; } } - if (!s.ok()) return s; + + // Initialize PendingCounts only after frame_local_ids_ is initialized. + InitializePending(graph_, cf_info); + return SetAllocAttrs(); } @@ -533,12 +575,13 @@ class ExecutorState { typedef gtl::InlinedVector EntryVector; struct IterationState { - explicit IterationState(const ExecutorImpl* impl) - : input_tensors(new Entry[impl->total_input_tensors_]), + explicit IterationState(const PendingCounts* pending_counts, + int total_input_tensors) + : input_tensors(new Entry[total_input_tensors]), outstanding_ops(0), outstanding_frame_count(0), - counts_(impl->graph_->num_node_ids()) { - counts_.InitializeFrom(impl->initial_pending_counts_); + counts_(pending_counts->num_nodes()) { + counts_.InitializeFrom(*pending_counts); } // The state of an iteration. @@ -668,9 +711,23 @@ class ExecutorState { // will only "execute" the dead exits of the final iteration. std::vector dead_exits GUARDED_BY(mu); + // Static information specific to this frame. + PendingCounts* pending_counts = nullptr; + int total_input_tensors = 0; + std::vector* nodes = nullptr; + // Lock ordering: ExecutorState.mu_ < mu. mutex mu; + void InitializeFrameInfo(const string& enter_name) { + auto it_frame_info = executor->frame_info_.find(enter_name); + DCHECK(it_frame_info != executor->frame_info_.end()); + pending_counts = it_frame_info->second.pending_counts; + total_input_tensors = it_frame_info->second.total_inputs; + num_pending_inputs = it_frame_info->second.input_count; + nodes = it_frame_info->second.nodes; + } + inline IterationState* GetIteration(int64 iter) EXCLUSIVE_LOCKS_REQUIRED(mu) { int index = iter % iterations.size(); @@ -889,13 +946,12 @@ class ExecutorState { inline void MaybeMarkCompleted(FrameState* frame, int64 iter, int64 id); // Provide debugging output about an outstanding node in the executor. - void DumpCompletedNodeState(const int node_id, const Entry* input_vector); void DumpPendingNodeState(const int node_id, const Entry* input_vector, bool show_nodes_with_no_ready_inputs); void DumpActiveNodeState(const int node_id, const Entry* input_vector); // Provide debugging output about an outstanding iteration in the executor. - void DumpIterationState(IterationState* iteration); + void DumpIterationState(const FrameState* frame, IterationState* iteration); // Provide debugging output of the state of the executor. void DumpState(); @@ -932,16 +988,16 @@ ExecutorState::ExecutorState(const Executor::Args& args, ExecutorImpl* impl) num_outstanding_ops_(0) { // We start the entire execution in iteration 0 of the root frame // so let us create the root frame and the state for iteration 0. - // Initialize the frame. + // We assume root_frame_->frame_name.empty(). root_frame_ = new FrameState(impl_, 1); - root_frame_->frame_name = "_root"; // assume to be unique root_frame_->frame_id = 0; // must be 0 - // Initialize the first iteration. - root_frame_->iterations.resize(root_frame_->max_parallel_iterations); - IterationState* iter_state = new IterationState(impl); - root_frame_->iterations[0] = iter_state; + root_frame_->InitializeFrameInfo(root_frame_->frame_name); + + // Initialize iteration 0. + root_frame_->iterations.resize(root_frame_->max_parallel_iterations); + root_frame_->iterations[0] = new IterationState( + root_frame_->pending_counts, root_frame_->total_input_tensors); - if (vlog_) VLOG(2) << "Create frame: " << root_frame_->frame_name; outstanding_frames_.insert({root_frame_->frame_name, root_frame_}); } @@ -949,21 +1005,88 @@ ExecutorState::~ExecutorState() { for (auto name_frame : outstanding_frames_) { delete name_frame.second; } - for (auto it : device_context_map_) { it->Unref(); } - delete slice_reader_cache_; } +Status ExecutorImpl::BuildControlFlowInfo(const Graph* g, + ControlFlowInfo* cf_info) { + const int num_nodes = g->num_node_ids(); + cf_info->frame_names.resize(num_nodes); + std::vector parent_nodes; + parent_nodes.resize(num_nodes); + std::vector visited; + visited.resize(num_nodes); + + string frame_name; + std::deque ready; + + // Initialize with the root nodes. + for (Node* n : g->nodes()) { + if (n->in_edges().empty()) { + visited[n->id()] = true; + ++cf_info->frame_name_to_size[frame_name]; + ready.push_back(n); + } + } + + while (!ready.empty()) { + Node* curr_node = ready.front(); + int curr_id = curr_node->id(); + ready.pop_front(); + + Node* parent = nullptr; + if (IsEnter(curr_node)) { + // Enter a child frame. + TF_RETURN_IF_ERROR( + GetNodeAttr(curr_node->def(), "frame_name", &frame_name)); + parent = curr_node; + } else if (IsExit(curr_node)) { + // Exit to the parent frame. + parent = parent_nodes[curr_id]; + frame_name = cf_info->frame_names[parent->id()]; + parent = parent_nodes[parent->id()]; + } else { + parent = parent_nodes[curr_id]; + frame_name = cf_info->frame_names[curr_id]; + } + + for (const Edge* out_edge : curr_node->out_edges()) { + Node* out = out_edge->dst(); + int out_id = out->id(); + + // Add to ready queue if not visited. + bool is_visited = visited[out_id]; + if (!is_visited) { + ready.push_back(out); + visited[out_id] = true; + + // Process the node 'out'. + cf_info->frame_names[out_id] = frame_name; + parent_nodes[out_id] = parent; + ++cf_info->frame_name_to_size[frame_name]; + } + } + } + + return Status::OK(); +} + void ExecutorImpl::InitializePending(const Graph* graph, - PendingCounts* counts) { - for (int id = 0; id < graph->num_node_ids(); id++) { - counts->set_initial_count(id, 0, 0); // Make sure everything is initialized + const ControlFlowInfo& cf_info) { + for (auto& it : cf_info.frame_name_to_size) { + PendingCounts* counts = new PendingCounts(it.second); + frame_info_[it.first].pending_counts = counts; + // Make sure everything is initialized + for (int id = 0; id < it.second; id++) { + counts->set_initial_count(id, 0, 0); + } } for (const Node* n : graph->nodes()) { const int id = n->id(); + const int pending_id = frame_local_ids_[id]; const int num_in_edges = n->in_edges().size(); int initial_count; if (IsMerge(n)) { @@ -980,7 +1103,9 @@ void ExecutorImpl::InitializePending(const Graph* graph, } else { initial_count = num_in_edges; } - counts->set_initial_count(id, initial_count, num_in_edges); + const string& name = cf_info.frame_names[id]; + PendingCounts* counts = frame_info_[name].pending_counts; + counts->set_initial_count(pending_id, initial_count, num_in_edges); } } @@ -1104,8 +1229,9 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_usec) { // TODO(misard) Replace with a finer-grain enabling flag once we // add better optional debugging support. if (vlog_ && VLOG_IS_ON(1)) { + int pending_id = impl_->frame_local_ids_[id]; mutex_lock l(input_frame->mu); - input_frame->GetIteration(input_iter)->mark_started(id); + input_frame->GetIteration(input_iter)->mark_started(pending_id); } // Set the device_context for this node id, if it exists. @@ -1637,12 +1763,13 @@ void ExecutorState::ScheduleReady(const TaggedNodeSeq& ready, } inline void ExecutorState::MaybeMarkCompleted(FrameState* frame, int64 iter, - int64 id) { + int64 node_id) { // TODO(misard) Replace with a finer-grain enabling flag once we // add better optional debugging support. if (vlog_ && VLOG_IS_ON(1)) { + int pending_id = impl_->frame_local_ids_[node_id]; mutex_lock l(frame->mu); - frame->GetIteration(iter)->mark_completed(id); + frame->GetIteration(iter)->mark_completed(pending_id); } } @@ -1656,18 +1783,6 @@ const Tensor* ExecutorState::GetTensorValueForDump(const Entry& input) { } } -void ExecutorState::DumpCompletedNodeState(const int node_id, - const Entry* input_vector) { - const NodeItem& node_item = impl_->nodes_[node_id]; - const Node& node = *node_item.node; - LOG(WARNING) << " Completed Node: " << node.DebugString(); - const int input_base = node_item.input_start; - for (int i = 0; i < node.num_inputs(); ++i) { - const Entry& input = input_vector[input_base + i]; - CHECK(!GetTensorValueForDump(input)->IsInitialized()); - } -} - void ExecutorState::DumpPendingNodeState( const int node_id, const Entry* input_vector, const bool show_nodes_with_no_ready_inputs) { @@ -1723,23 +1838,30 @@ void ExecutorState::DumpActiveNodeState(const int node_id, } } -void ExecutorState::DumpIterationState(IterationState* iteration) { +void ExecutorState::DumpIterationState(const FrameState* frame, + IterationState* iteration) { + const std::vector* nodes = frame->nodes; // Dump any waiting nodes that are holding on to tensors. - for (int i = 0; i < impl_->graph_->num_node_ids(); ++i) { - if (iteration->node_state(i) == PendingCounts::PENDING_NOTREADY || - iteration->node_state(i) == PendingCounts::PENDING_READY) { - DumpPendingNodeState(i, iteration->input_tensors, false); + for (const Node* node : *nodes) { + int node_id = node->id(); + int pending_id = impl_->frame_local_ids_[node_id]; + if (iteration->node_state(pending_id) == PendingCounts::PENDING_NOTREADY || + iteration->node_state(pending_id) == PendingCounts::PENDING_READY) { + DumpPendingNodeState(node_id, iteration->input_tensors, false); } } // Then the active nodes. - for (int i = 0; i < impl_->graph_->num_node_ids(); ++i) { - if (iteration->node_state(i) == PendingCounts::STARTED) { - DumpActiveNodeState(i, iteration->input_tensors); + for (const Node* node : *nodes) { + int node_id = node->id(); + int pending_id = impl_->frame_local_ids_[node_id]; + if (iteration->node_state(pending_id) == PendingCounts::STARTED) { + DumpActiveNodeState(pending_id, iteration->input_tensors); } } // Show all input tensors in use. + int total_input_tensors = frame->total_input_tensors; size_t total_bytes = 0; - for (int i = 0; i < impl_->total_input_tensors_; ++i) { + for (int i = 0; i < total_input_tensors; ++i) { const Entry& input = iteration->input_tensors[i]; const Tensor* tensor = GetTensorValueForDump(input); if (tensor->IsInitialized()) { @@ -1764,7 +1886,7 @@ void ExecutorState::DumpState() { mutex_lock frame_lock(frame_state->mu); for (IterationState* iteration : frame_state->iterations) { LOG(WARNING) << " Iteration:"; - DumpIterationState(iteration); + DumpIterationState(frame_state, iteration); } } dumped_on_error_ = true; @@ -1819,16 +1941,13 @@ void ExecutorState::FindOrCreateChildFrame(FrameState* frame, int64 iter, temp->frame_id = Hash64(child_name); temp->parent_frame = frame; temp->parent_iter = iter; + temp->InitializeFrameInfo(enter_name); // 'iterations' is a fixed-length circular buffer. temp->iterations.resize(temp->max_parallel_iterations + 1); - // Initialize the first iteration. - IterationState* iter_state = new IterationState(impl_); - temp->iterations[0] = iter_state; - - auto frame_pending = impl_->frame_input_count_.find(enter_name); - DCHECK(frame_pending != impl_->frame_input_count_.end()); - temp->num_pending_inputs = frame_pending->second; + // Initialize iteration 0. + temp->iterations[0] = + new IterationState(temp->pending_counts, temp->total_input_tensors); { mutex_lock executor_lock(mu_); @@ -1851,33 +1970,40 @@ void ExecutorState::DeleteFrame(FrameState* frame, TaggedNodeSeq* ready) { FrameState* parent_frame = frame->parent_frame; int64 parent_iter = frame->parent_iter; if (parent_frame != nullptr) { + const int* pending_ids = impl_->frame_local_ids_; mutex_lock paranet_frame_lock(parent_frame->mu); // Propagate all the dead exits to the parent frame. for (const Node* node : frame->dead_exits) { auto parent_iter_state = parent_frame->GetIteration(parent_iter); for (const Edge* e : node->out_edges()) { const Node* dst_node = e->dst(); - const int dst_id = dst_node->id(); + const int dst_pending_id = pending_ids[dst_node->id()]; + + // TODO(yuanbyu): We don't need this if we require the subgraph + // given to an executor not to contain a sink node. + if (dst_node->IsSink()) continue; bool dst_dead = true; bool dst_ready = false; // We know this is a dead input to dst. if (IsMerge(dst_node)) { if (e->IsControlEdge()) { - parent_iter_state->decrement_pending(dst_id, 2); - int count = parent_iter_state->pending(dst_id); - dst_dead = (parent_iter_state->dead_count(dst_id) == - dst_node->num_inputs()); + parent_iter_state->decrement_pending(dst_pending_id, 2); + int count = parent_iter_state->pending(dst_pending_id); + int dead_cnt = parent_iter_state->dead_count(dst_pending_id); + dst_dead = (dead_cnt == dst_node->num_inputs()); dst_ready = (count == 0) || ((count == 1) && dst_dead); } else { - parent_iter_state->increment_dead_count(dst_id); - const int dead_cnt = parent_iter_state->dead_count(dst_id); + parent_iter_state->increment_dead_count(dst_pending_id); + const int dead_cnt = parent_iter_state->dead_count(dst_pending_id); dst_dead = (dead_cnt == dst_node->num_inputs()); - dst_ready = (parent_iter_state->pending(dst_id) == 1) && dst_dead; + dst_ready = + (parent_iter_state->pending(dst_pending_id) == 1) && dst_dead; } } else { - parent_iter_state->increment_dead_count(dst_id); - dst_ready = (parent_iter_state->decrement_pending(dst_id, 1) == 0); + parent_iter_state->increment_dead_count(dst_pending_id); + dst_ready = + (parent_iter_state->decrement_pending(dst_pending_id, 1) == 0); } if (dst_ready) { ready->push_back( @@ -1923,12 +2049,18 @@ void ExecutorState::FrameState::ActivateNodes(const Node* node, const EntryVector& outputs, TaggedNodeSeq* ready) { const NodeItem* nodes = executor->nodes_; + const int* pending_ids = executor->frame_local_ids_; IterationState* iter_state = GetIteration(iter); for (const Edge* e : node->out_edges()) { const Node* dst_node = e->dst(); const int dst_id = dst_node->id(); + const int dst_pending_id = pending_ids[dst_id]; const int src_slot = e->src_output(); + // TODO(yuanbyu): We don't need this if we require the subgraph + // given to an executor not to contain a sink node. + if (dst_node->IsSink()) continue; + bool dst_dead = false; bool dst_ready = false; // True iff this input for dst is needed. We only set this input for @@ -1940,15 +2072,16 @@ void ExecutorState::FrameState::ActivateNodes(const Node* node, // a) a live data input becomes available or b) all data inputs are dead. // For Merge, pending's LSB is set iff a live data input has arrived. if (e->IsControlEdge()) { - iter_state->decrement_pending(dst_id, 2); - int count = iter_state->pending(dst_id); - dst_dead = (iter_state->dead_count(dst_id) == dst_node->num_inputs()); + iter_state->decrement_pending(dst_pending_id, 2); + int count = iter_state->pending(dst_pending_id); + int dead_cnt = iter_state->dead_count(dst_pending_id); + dst_dead = (dead_cnt == dst_node->num_inputs()); dst_ready = (count == 0) || ((count == 1) && dst_dead); } else { if (outputs[src_slot].has_value) { // This is a live data input. - int count = iter_state->pending(dst_id); - iter_state->mark_live(dst_id); + int count = iter_state->pending(dst_pending_id); + iter_state->mark_live(dst_pending_id); // Only the first live edge sets the input and (potentially) // triggers execution. The low bit of count is set if and // only if no live input has been used yet (mark_live clears @@ -1962,10 +2095,10 @@ void ExecutorState::FrameState::ActivateNodes(const Node* node, // a dead enter. We need this to handle properly a while loop on // the untaken branch of a conditional. // TODO(yuanbyu): This is a bit hacky, but a good solution for now. - iter_state->increment_dead_count(dst_id); - const int dead_cnt = iter_state->dead_count(dst_id); + iter_state->increment_dead_count(dst_pending_id); + const int dead_cnt = iter_state->dead_count(dst_pending_id); dst_dead = (dead_cnt == dst_node->num_inputs()) || IsEnter(node); - dst_ready = (iter_state->pending(dst_id) == 1) && dst_dead; + dst_ready = (iter_state->pending(dst_pending_id) == 1) && dst_dead; dst_need_input = false; } } @@ -1974,10 +2107,10 @@ void ExecutorState::FrameState::ActivateNodes(const Node* node, // for all inputs to come in even if we know the node is dead. This // ensures that all input tensors get cleaned up. if (is_dead || (!e->IsControlEdge() && !outputs[src_slot].has_value)) { - iter_state->increment_dead_count(dst_id); + iter_state->increment_dead_count(dst_pending_id); } - dst_dead = iter_state->dead_count(dst_id) > 0; - dst_ready = (iter_state->decrement_pending(dst_id, 1) == 0); + dst_dead = iter_state->dead_count(dst_pending_id) > 0; + dst_ready = (iter_state->decrement_pending(dst_pending_id, 1) == 0); } if (dst_need_input) { @@ -2052,7 +2185,8 @@ void ExecutorState::FrameState::IncrementIteration(TaggedNodeSeq* ready) { int64 next_iter = iteration_count; // Initialize the next iteration. - IterationState* iter_state = new IterationState(executor); + IterationState* iter_state = + new IterationState(pending_counts, total_input_tensors); SetIteration(next_iter, iter_state); num_outstanding_iterations++; dead_exits.clear(); diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc index 89c870253f2..c868083efda 100644 --- a/tensorflow/core/common_runtime/function.cc +++ b/tensorflow/core/common_runtime/function.cc @@ -44,11 +44,7 @@ static const char* const kRetOp = "_Retval"; static const char* const kGradientOp = "SymbolicGradient"; static const char* const kNodeLabel = "Func"; static const char* const kFuncAttr = "f"; -// kNoinlineAttr must start with an "_" to avoid collisions with -// user-specified attrs. -static const char* const kNoinlineAttr = "_noinline"; -// Old graphs use no "_". -static const char* const kOldNoinlineAttr = "noinline"; +static const char* const kNoInlineAttr = "_noinline"; // Represents the index-th output of a node. struct Endpoint { @@ -168,6 +164,7 @@ class FunctionLibraryRuntimeImpl : public FunctionLibraryRuntime { Device* device() override { return device_; } Env* env() override { return env_; } + int graph_def_version() override { return graph_def_version_; } string DebugString(Handle h) override; @@ -290,6 +287,34 @@ const FunctionBody* FunctionLibraryRuntimeImpl::GetFunctionBody(Handle h) { return func_graphs_[h]; } +namespace { + +struct CustomCreatorSingleton { + mutex mu; + CustomKernelCreator custom_creator = nullptr; + + void Set(CustomKernelCreator cb) { + mutex_lock l(mu); + custom_creator = cb; + } + + CustomKernelCreator Get() { + mutex_lock l(mu); + return custom_creator; + } +}; + +CustomCreatorSingleton* GetCustomCreatorSingleton() { + static CustomCreatorSingleton* ccs = new CustomCreatorSingleton; + return ccs; +} + +} // end namespace + +void RegisterCustomKernelCreator(CustomKernelCreator cb) { + GetCustomCreatorSingleton()->Set(cb); +} + Status FunctionLibraryRuntimeImpl::CreateKernel(const NodeDef& ndef, OpKernel** kernel) { if (lib_def_->Find(ndef.op()) == nullptr) { @@ -318,8 +343,23 @@ Status FunctionLibraryRuntimeImpl::CreateKernel(const NodeDef& ndef, output_memory_types.push_back(t == DT_INT32 ? HOST_MEMORY : DEVICE_MEMORY); } - // Constructs a CallOp kernel for running the instantiated function. + // If a custom kernel creator is given, try that. + CustomKernelCreator custom_creator = GetCustomCreatorSingleton()->Get(); Status s; + if (custom_creator) { + std::unique_ptr ret; + s = custom_creator(this, ndef, &ret); + if (s.ok()) { + *kernel = ret.release(); + return s; + } else { + VLOG(2) << "Custom creator error: " << s; + // Falls through. + s = Status::OK(); + } + } + + // Constructs a CallOp kernel for running the instantiated function. auto device_type = DeviceType(device_->attributes().device_type()); OpKernelConstruction construction( device_type, device_, device_->GetAllocator(AllocatorAttributes()), &ndef, @@ -327,7 +367,7 @@ Status FunctionLibraryRuntimeImpl::CreateKernel(const NodeDef& ndef, fbody->ret_types, output_memory_types, graph_def_version_, &s); *kernel = new CallOp(handle, &construction); if (!s.ok()) { - delete kernel; + delete *kernel; } return s; } @@ -887,15 +927,11 @@ static void InlineFunctionBody(Graph* g, Node* caller, } // Given a node's NodeDef, returns false iff the node explicitly -// specified _noinline. This gives ExpandInlineFunctions a heuristic to -// decide whether to inline the function. -// `old` is true for GraphDef versions older than 12, when the -// `noinline` attr was renamed to `_noinline` to avoid conflicts with -// user-specified attrs. -bool ShouldInline(const NodeDef& ndef, bool old) { +// specified _noinline. This gives ExpandInlineFunctions a heuristic +// to decide whether to inline the function. +bool ShouldInline(const NodeDef& ndef) { bool noinline = false; - const char* const attr = old ? kOldNoinlineAttr : kNoinlineAttr; - if (GetNodeAttr(ndef, attr, &noinline).ok()) { + if (GetNodeAttr(ndef, kNoInlineAttr, &noinline).ok()) { // If the node specifies attribute '_noinline', returns accordingly. return !noinline; } @@ -914,7 +950,8 @@ bool ShouldInline(const NodeDef& ndef, bool old) { // continue and the runtime will error out. return false; } - s = GetNodeAttr(AttrSlice(&forward_func_attrs->attr()), attr, &noinline); + s = GetNodeAttr(AttrSlice(&forward_func_attrs->attr()), kNoInlineAttr, + &noinline); if (!s.ok()) { // The forward function doesn't specify '_noinline' attr, we should // be free to decide. @@ -926,11 +963,9 @@ bool ShouldInline(const NodeDef& ndef, bool old) { bool ExpandInlineFunctions(FunctionLibraryRuntime* lib, Graph* graph) { std::vector> candidates; - // Identify old graphs before the 'noinline' attr was renamed '_noinline'. - const bool old_inline_attr = graph->versions().producer() < 12; for (Node* node : graph->nodes()) { VLOG(3) << "Expanding " << node->DebugString(); - if (!ShouldInline(node->def(), old_inline_attr)) { + if (!ShouldInline(node->def())) { VLOG(3) << "noinline: " << node->DebugString(); continue; } diff --git a/tensorflow/core/common_runtime/function.h b/tensorflow/core/common_runtime/function.h index 196226214ba..73e99442388 100644 --- a/tensorflow/core/common_runtime/function.h +++ b/tensorflow/core/common_runtime/function.h @@ -123,6 +123,18 @@ void ToGraphDef(const Graph* g, GraphDef* gdef, bool pretty = false); // TODO(zhifengc): Asks math expert to say the comment again. FunctionBody* SymbolicGradient(const FunctionBody& f); +// Registers a customizable kernel creator for a function call. +// +// If 'cb()' returns a non-OK, we still fall back to an executor-based +// interpreter op kernel to execute a function. If 'cb()' returns OK, +// takes ownership of the returned OpKernel. +// +// TODO(zhifengc/phawkins): b/32379046 +typedef std::function*)> + CustomKernelCreator; +void RegisterCustomKernelCreator(CustomKernelCreator cb); + } // end namespace tensorflow #endif // TENSORFLOW_COMMON_RUNTIME_FUNCTION_H_ diff --git a/tensorflow/core/common_runtime/pending_counts.h b/tensorflow/core/common_runtime/pending_counts.h index be2dc2418ed..cfc40324710 100644 --- a/tensorflow/core/common_runtime/pending_counts.h +++ b/tensorflow/core/common_runtime/pending_counts.h @@ -71,6 +71,7 @@ class PendingCounts { } } + inline int num_nodes() const { return num_nodes_; } NodeState node_state(int id) { if (IsLarge(id)) { return NodeStateLarge(id); @@ -185,12 +186,7 @@ class PendingCounts { // use one byte to hold both the pending and dead count for a node // where these together can fit in one byte, and we use a hash table // to handle the rare node ids that need larger counts than this. - - // TODO(yuanbyu): We current use O(# of nodes in partition) space - // even for nested iterations where only a small fraction of the - // nodes are involved. This is not efficient if the subgraph for - // the frame is only a small subset of the partition. We should make - // the vector size to be only the size of the frame subgraph. + // Each frame in this subgraph has its own PendingCounts. // We use 3 bits each for dead_count and pending. static const int kMaxCountForPackedCounts = 7; diff --git a/tensorflow/core/common_runtime/shape_refiner.cc b/tensorflow/core/common_runtime/shape_refiner.cc index 4752be41ff1..1ddd4830761 100644 --- a/tensorflow/core/common_runtime/shape_refiner.cc +++ b/tensorflow/core/common_runtime/shape_refiner.cc @@ -27,6 +27,10 @@ limitations under the License. namespace tensorflow { +using shape_inference::DimensionHandle; +using shape_inference::InferenceContext; +using shape_inference::ShapeHandle; + ShapeRefiner::ShapeRefiner(const OpRegistryInterface* ops) : ops_registry_(ops) {} @@ -37,7 +41,7 @@ Status ShapeRefiner::AddNode(const Node* node) { // from 'input's InferenceContext, and store into a vector // indexed by 'node's input. std::vector input_nodes(node->num_inputs()); - std::vector input_shapes(node->num_inputs()); + std::vector input_shapes(node->num_inputs()); for (const Edge* e : node->in_edges()) { if (e->IsControlEdge()) continue; @@ -49,7 +53,7 @@ Status ShapeRefiner::AddNode(const Node* node) { node->name(), "' was not previously added to ShapeRefiner."); } - shape_inference::InferenceContext* c = it->second; + InferenceContext* c = it->second; DCHECK_GE(e->dst_input(), 0); input_nodes[e->dst_input()] = input; input_shapes[e->dst_input()] = c->output(e->src_output()); @@ -68,11 +72,13 @@ Status ShapeRefiner::AddNode(const Node* node) { std::vector input_tensors(node->num_inputs()); std::vector real_tensors(node->num_inputs()); std::vector attempted_materialization(node->num_inputs()); + std::vector attempted_tensor_as_shape_conversion(node->num_inputs()); + std::vector input_tensors_as_shapes; // Create the inference context for this node with the existing input shapes. - std::unique_ptr c( - new shape_inference::InferenceContext(&node->def(), node->op_def(), - input_shapes, input_tensors)); + std::unique_ptr c( + new InferenceContext(&node->def(), node->op_def(), input_shapes, + input_tensors, input_tensors_as_shapes)); if (!c->construction_status().ok()) { return c->construction_status(); } @@ -101,63 +107,44 @@ Status ShapeRefiner::AddNode(const Node* node) { // subgraph once. for (int i = 0; i < c->num_inputs(); ++i) { + if (!c->requested_input_tensor(i)) { + continue; + } // Check if we have not already filled in the requested input, // and if not, try to materialize the tensors. - if (c->requested_input_tensor(i) && !attempted_materialization[i]) { + if (!attempted_materialization[i]) { attempted_materialization[i] = true; - const Edge* input_edge; - TF_RETURN_IF_ERROR(node->input_edge(i, &input_edge)); - - bool is_constant_graph = false; - Graph subgraph(ops_registry_); - - // We identify the possibly constant subgraph to evaluate by - // recursively iterating backwards through the inputs to 'node' - // until we either 1) find an already existing input to our subgraph - // (filled in `const_inputs`), 2) Discover our graph is not constant, - // or 3) Hit a root node. - std::vector> const_inputs; - TF_RETURN_IF_ERROR(ExtractConstantSubgraph( - input_nodes[i], &subgraph, &is_constant_graph, &const_inputs)); - if (is_constant_graph) { - const string output_tensor_name = strings::StrCat( - input_nodes[i]->name(), ":", input_edge->src_output()); - std::vector outputs; - // NOTE; we should pass in a function library runtime if we want - // to support constant-expression evaluation on functions. - Status s = GraphRunner::Run(&subgraph, nullptr /* function_library */, - Env::Default(), const_inputs, - {output_tensor_name}, &outputs); - - // If all kernels in the constant graph are not registered - // in the process, GraphRunner::Run may fail, in which case - // we cannot propagate constants, so this is best-effort. - if (s.ok()) { - real_tensors[i] = outputs[0]; - input_tensors[i] = &real_tensors[i]; - - // We have more concrete information about a shape, - // so re-run shape inference. - rerun_shape_fn = true; - - // We memoize (small) constants evaluated so far, so - // ExtractConstantSubgraph can avoid extracting the full - // subgraph. As we build up large graphs, this avoids - // repeated computation of the early parts of a constant - // graph. - if (outputs[0].TotalBytes() <= kMaxTensorSize) { - const_tensor_map_[output_tensor_name] = outputs[0]; - } - } + Tensor result; + bool evaluated = false; + TF_RETURN_IF_ERROR( + EvaluateConstantTensorForEdge(node, i, &evaluated, &result)); + if (evaluated) { + real_tensors[i] = result; + input_tensors[i] = &real_tensors[i]; + // We have more concrete information about a shape, + // so re-run shape inference. + rerun_shape_fn = true; } } + if (c->requested_input_tensor_as_partial_shape(i) && + !attempted_tensor_as_shape_conversion[i]) { + attempted_tensor_as_shape_conversion[i] = true; + if (i >= input_tensors_as_shapes.size()) { + input_tensors_as_shapes.resize(i + 1); + } + ShapeHandle s; + TF_RETURN_IF_ERROR(ConstantPartialShape(c.get(), node, i, &s)); + input_tensors_as_shapes[i] = s; + rerun_shape_fn = true; + } } if (rerun_shape_fn) { // We have more information about the shapes on this pass, // so re-run shape inference. c->set_input_tensors(input_tensors); + c->set_input_tensors_as_shapes(input_tensors_as_shapes); TF_RETURN_IF_ERROR(op_reg_data->shape_inference_fn(c.get())); } } while (rerun_shape_fn); @@ -169,7 +156,7 @@ Status ShapeRefiner::AddNode(const Node* node) { } Status ShapeRefiner::SetShape(const Node* node, int output_port, - shape_inference::ShapeHandle shape) { + ShapeHandle shape) { auto c = GetContext(node); if (c == nullptr) { return errors::Internal("Could not find context for ", node->name()); @@ -182,7 +169,7 @@ Status ShapeRefiner::SetShape(const Node* node, int output_port, } // Check compatibility, and merge the shapes. - shape_inference::ShapeHandle existing_shape = c->output(output_port); + ShapeHandle existing_shape = c->output(output_port); TF_RETURN_IF_ERROR(c->Merge(existing_shape, shape, &shape)); c->set_output(output_port, shape); @@ -196,6 +183,55 @@ Status ShapeRefiner::SetShape(const Node* node, int output_port, return Status::OK(); } +Status ShapeRefiner::EvaluateConstantTensorForEdge(const Node* node, + int dst_idx, bool* evaluated, + Tensor* result) { + *evaluated = false; + const Edge* input_edge; + TF_RETURN_IF_ERROR(node->input_edge(dst_idx, &input_edge)); + + bool is_constant_graph = false; + Graph subgraph(ops_registry_); + + // We identify the possibly constant subgraph to evaluate by + // recursively iterating backwards through the inputs to 'node' + // until we either 1) find an already existing input to our subgraph + // (filled in `const_inputs`), 2) Discover our graph is not constant, + // or 3) Hit a root node. + std::vector> const_inputs; + TF_RETURN_IF_ERROR(ExtractConstantSubgraph( + input_edge->src(), &subgraph, &is_constant_graph, &const_inputs)); + if (!is_constant_graph) { + return Status::OK(); + } + const string output_tensor_name = + strings::StrCat(input_edge->src()->name(), ":", input_edge->src_output()); + std::vector outputs; + // NOTE; we should pass in a function library runtime if we want + // to support constant-expression evaluation on functions. + Status s = GraphRunner::Run(&subgraph, nullptr /* function_library */, + Env::Default(), const_inputs, + {output_tensor_name}, &outputs); + + // If all kernels in the constant graph are not registered + // in the process, GraphRunner::Run may fail, in which case + // we cannot propagate constants, so this is best-effort. + if (s.ok()) { + *result = outputs[0]; + *evaluated = true; + + // We memoize (small) constants evaluated so far, so + // ExtractConstantSubgraph can avoid extracting the full + // subgraph. As we build up large graphs, this avoids + // repeated computation of the early parts of a constant + // graph. + if (outputs[0].TotalBytes() <= kMaxTensorSize) { + const_tensor_map_[output_tensor_name] = outputs[0]; + } + } + return Status::OK(); +} + Status ShapeRefiner::ExtractConstantSubgraph( Node* target_node, Graph* out_graph, bool* is_constant_graph, std::vector>* const_inputs) { @@ -308,4 +344,75 @@ Status ShapeRefiner::ExtractConstantSubgraph( return Status::OK(); } +Status ShapeRefiner::ConstantPartialShape(InferenceContext* target_context, + const Node* node, int dst_idx, + ShapeHandle* result) { + const Edge* input_edge; + TF_RETURN_IF_ERROR(node->input_edge(dst_idx, &input_edge)); + + InferenceContext* src_context = GetContext(input_edge->src()); + if (src_context == nullptr) return errors::Internal("Missing src context"); + ShapeHandle src_shape = src_context->output(input_edge->src_output()); + TF_RETURN_IF_ERROR(src_context->WithRank(src_shape, 1, &src_shape)); + + const string& src_op = input_edge->src()->type_string(); + if (src_context->Value(src_context->Dim(src_shape, 0)) == 0) { + // Source tensor is a vector of length 0, so the shape it + // represents is as scalar. + *result = target_context->Scalar(); + } else if (src_op == "Shape") { + *result = src_context->input(0); + } else if (src_op == "Pack") { + std::vector dims; + // Pack is concatenating its input scalars to form the shape tensor vector. + for (int i = 0; i < src_context->num_inputs(); ++i) { + Tensor scalar; + bool evaluated = false; + TF_RETURN_IF_ERROR(EvaluateConstantTensorForEdge(input_edge->src(), i, + &evaluated, &scalar)); + if (evaluated) { + int64 size; + if (scalar.dtype() == DT_INT32) { + size = scalar.scalar()(); + } else if (scalar.dtype() == DT_INT64) { + size = scalar.scalar()(); + } else { + return errors::InvalidArgument("Pack input must be int32 or int64"); + } + dims.push_back(size < 0 ? target_context->UnknownDim() + : target_context->MakeDim(size)); + } else { + dims.push_back(target_context->UnknownDim()); + } + } + *result = target_context->MakeShape(dims); + } else if (src_op == "Concat") { + *result = target_context->Scalar(); + // Concat is concatenating its input shape vectors. + // input 0 is ignored as it is the concat dim and will always be 0. + for (int i = 1; i < src_context->num_inputs(); ++i) { + ShapeHandle sub_result; + TF_RETURN_IF_ERROR(ConstantPartialShape(target_context, input_edge->src(), + i, &sub_result)); + if (!target_context->RankKnown(sub_result)) { + // Failed to evaluate. Treat the output as completely unknown. + // TODO(cwhipkey): we could rely on all inputs being the same size, so + // figure that size out and append the right number of unknown dims. + *result = target_context->UnknownShape(); + return Status::OK(); + } + TF_RETURN_IF_ERROR( + target_context->Concatenate(*result, sub_result, result)); + } + } else { + Tensor t; + bool evaluated = false; + TF_RETURN_IF_ERROR( + EvaluateConstantTensorForEdge(node, dst_idx, &evaluated, &t)); + TF_RETURN_IF_ERROR(target_context->MakeShapeFromTensor( + evaluated ? &t : nullptr, src_shape, result)); + } + return Status::OK(); +} + } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/shape_refiner.h b/tensorflow/core/common_runtime/shape_refiner.h index b72001ddd21..6ce5ddb3661 100644 --- a/tensorflow/core/common_runtime/shape_refiner.h +++ b/tensorflow/core/common_runtime/shape_refiner.h @@ -71,6 +71,34 @@ class ShapeRefiner { Node* node, Graph* out_graph, bool* is_constant_graph, std::vector>* const_inputs) TF_MUST_USE_RESULT; + Status EvaluateConstantTensorForEdge(const Node* node, int dst_idx, + bool* evaluated, Tensor* result); + + // This function tries to materialize as much information about the 'node''s + // dst_idx input as a statically computable shape, and the result may be + // partially known, depending on what is statically inferable. + // + // This is called when node.input[dst_idx] is a tensor that is used to define + // the shape of some other tensor (e.g., the second argument to Reshape is a + // tensor, where each element of the shape tensor is a dimension of + // the target tensor). It returns in a shape for that input. + // + // Unlike simply resolving node.input[dst_idx] to a constant and then + // converting that to a shape, this function can return a partial shape. This + // is useful for cases where the shape tensor is only partially defined, such + // as with calls for: reshape(x, shape(y)) where shape(y) is partially + // defined. + // + // The implementation has op implementations for ops commonly called on shape + // tensors, and the implementations are specialized to shape tensors (namely, + // the output is a vector). + // + // is used when creating new DimensionHandle and ShapeHandle + // objects. + Status ConstantPartialShape(shape_inference::InferenceContext* target_context, + const Node* node, int dst_idx, + shape_inference::ShapeHandle* result); + const OpRegistryInterface* ops_registry_ = nullptr; // Stores a map from a node to its InferenceContext. diff --git a/tensorflow/core/common_runtime/shape_refiner_test.cc b/tensorflow/core/common_runtime/shape_refiner_test.cc index 164fa6afb0b..420594d98a5 100644 --- a/tensorflow/core/common_runtime/shape_refiner_test.cc +++ b/tensorflow/core/common_runtime/shape_refiner_test.cc @@ -398,5 +398,347 @@ TEST(ShapeRefinerTest, ConstantValueVisitNodeTwice) { EXPECT_EQ("[1,4,7]", ctx->DebugString(ctx->output(0))); } +namespace { + +Status TensorAsShapeShapeFn(shape_inference::InferenceContext* c) { + shape_inference::ShapeHandle out; + TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0 /* input_idx */, &out)); + c->set_output(0, out); + return Status::OK(); +} + +// Register ops used by the ConstantValueAsShape* tests. + +REGISTER_OP("TensorAsShapeInt32") + .Input("a: int32") + .Output("o: int32") + .SetShapeFn(TensorAsShapeShapeFn); + +REGISTER_OP("TensorAsShapeInt64") + .Input("a: int64") + .Output("o: int64") + .SetShapeFn(TensorAsShapeShapeFn); + +REGISTER_OP("NonConstScalarInt32") + .Output("o: int32") + .SetIsStateful() // prevents constant folding + .SetShapeFn(shape_inference::ScalarShape); + +REGISTER_OP("NonConstScalarInt64") + .Output("o: int64") + .SetIsStateful() // prevents constant folding + .SetShapeFn(shape_inference::ScalarShape); + +REGISTER_OP("WithEmptyVectorShape") + .Output("o: int32") + .SetIsStateful() // prevents constant folding + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->Vector(0)); + return Status::OK(); + }); + +REGISTER_OP("WithPartialShape") + .Output("o: int32") + .SetIsStateful() // prevents constant folding + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output( + 0, c->MakeShape({1, shape_inference::InferenceContext::kUnknownDim, 3, + shape_inference::InferenceContext::kUnknownDim, 5})); + return Status::OK(); + }); + +REGISTER_OP("WithPartialShape2") + .Output("o: int32") + .SetIsStateful() // prevents constant folding + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output( + 0, + c->MakeShape({6, shape_inference::InferenceContext::kUnknownDim, 8})); + return Status::OK(); + }); + +REGISTER_OP("WithUnknownShape") + .Output("o: int32") + .SetIsStateful() // prevents constant folding + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->UnknownShape()); + return Status::OK(); + }); + +} // namespace + +TEST(ShapeRefinerTest, ConstantValueAsShape_EmptyVector) { + Scope root = Scope::NewRootScope(); + Node* input; + TF_ASSERT_OK( + NodeBuilder("in", "WithEmptyVectorShape").Finalize(root.graph(), &input)); + Node* result; + TF_ASSERT_OK(NodeBuilder("test", "TensorAsShapeInt32") + .Input(input) + .Finalize(root.graph(), &result)); + + ShapeRefiner m(OpRegistry::Global()); + TF_ASSERT_OK(m.AddNode(input)); + TF_ASSERT_OK(m.AddNode(result)); + + shape_inference::InferenceContext* ctx = m.GetContext(result); + EXPECT_EQ("[]", ctx->DebugString(ctx->output(0))); +} + +TEST(ShapeRefinerTest, ConstantValueAsShape_Shape) { + for (int pass = 0; pass < 2; ++pass) { + Scope root = Scope::NewRootScope(); + Node* input; + TF_ASSERT_OK( + NodeBuilder("in", pass == 0 ? "WithPartialShape" : "WithUnknownShape") + .Finalize(root.graph(), &input)); + auto shape = ops::Shape(root, ops::Output(input)); + Node* result; + TF_ASSERT_OK(NodeBuilder("test", "TensorAsShapeInt32") + .Input(shape.node()) + .Finalize(root.graph(), &result)); + + ShapeRefiner m(OpRegistry::Global()); + TF_ASSERT_OK(m.AddNode(input)); + TF_ASSERT_OK(m.AddNode(shape.node())); + TF_ASSERT_OK(m.AddNode(result)); + + shape_inference::InferenceContext* ctx = m.GetContext(result); + if (pass == 0) { + EXPECT_EQ("[1,?,3,?,5]", ctx->DebugString(ctx->output(0))); + } else { + EXPECT_EQ("?", ctx->DebugString(ctx->output(0))); + } + } +} + +TEST(ShapeRefinerTest, ConstantValueAsShape_PackInt32) { + Scope root = Scope::NewRootScope(); + Node* scalar_non_const; + TF_ASSERT_OK(NodeBuilder("in", "NonConstScalarInt32") + .Finalize(root.graph(), &scalar_non_const)); + + ops::InputList inputs{ + ops::Input(ops::Const(root, 10)), + ops::Input(ops::Const(root, 20)), + ops::Input(ops::Output(scalar_non_const)), + ops::Input(ops::Const(root, 40)), + }; + auto pack = ops::Pack(root, inputs); + TF_ASSERT_OK(root.status()); + + Node* result; + TF_ASSERT_OK(NodeBuilder("test", "TensorAsShapeInt32") + .Input(pack.node()) + .Finalize(root.graph(), &result)); + + ShapeRefiner m(OpRegistry::Global()); + for (auto input : inputs) { + TF_ASSERT_OK(m.AddNode(input.node())); + } + TF_ASSERT_OK(m.AddNode(pack.node())); + TF_ASSERT_OK(m.AddNode(result)); + + shape_inference::InferenceContext* ctx = m.GetContext(result); + EXPECT_EQ("[10,20,?,40]", ctx->DebugString(ctx->output(0))); +} + +TEST(ShapeRefinerTest, ConstantValueAsShape_PackInt64) { + Scope root = Scope::NewRootScope(); + Node* scalar_non_const; + TF_ASSERT_OK(NodeBuilder("in", "NonConstScalarInt64") + .Finalize(root.graph(), &scalar_non_const)); + + ops::InputList inputs{ + ops::Input(ops::Const(root, 10LL)), + ops::Input(ops::Const(root, 20LL)), + ops::Input(ops::Output(scalar_non_const)), + ops::Input(ops::Const(root, 1LL << 40)), + }; + auto pack = ops::Pack(root, inputs); + TF_ASSERT_OK(root.status()); + + Node* result; + TF_ASSERT_OK(NodeBuilder("test", "TensorAsShapeInt64") + .Input(pack.node()) + .Finalize(root.graph(), &result)); + + ShapeRefiner m(OpRegistry::Global()); + for (const auto& input : inputs) { + TF_ASSERT_OK(m.AddNode(input.node())); + } + TF_ASSERT_OK(m.AddNode(pack.node())); + TF_ASSERT_OK(m.AddNode(result)); + + shape_inference::InferenceContext* ctx = m.GetContext(result); + EXPECT_EQ("[10,20,?,1099511627776]", ctx->DebugString(ctx->output(0))); +} + +TEST(ShapeRefinerTest, ConstantValueAsShape_PackUnknownDim) { + Scope root = Scope::NewRootScope(); + + ops::InputList inputs{ + ops::Input(ops::Const(root, 10LL)), + ops::Input(ops::Const(root, -1LL)), + }; + auto pack = ops::Pack(root, inputs); + TF_ASSERT_OK(root.status()); + + Node* result; + TF_ASSERT_OK(NodeBuilder("test", "TensorAsShapeInt64") + .Input(pack.node()) + .Finalize(root.graph(), &result)); + + ShapeRefiner m(OpRegistry::Global()); + for (const auto& input : inputs) { + TF_ASSERT_OK(m.AddNode(input.node())); + } + TF_ASSERT_OK(m.AddNode(pack.node())); + TF_ASSERT_OK(m.AddNode(result)); + + shape_inference::InferenceContext* ctx = m.GetContext(result); + EXPECT_EQ("[10,?]", ctx->DebugString(ctx->output(0))); +} + +TEST(ShapeRefinerTest, ConstantValueAsShape_PackInvalidInput) { + Scope root = Scope::NewRootScope(); + + // Inputs are length 2 vectors instead of scalars. + ops::InputList inputs{ + ops::Input(ops::Const(root, {10LL, 20LL})), + ops::Input(ops::Const(root, {10LL, 21LL})), + }; + auto pack = ops::Pack(root, inputs); + TF_ASSERT_OK(root.status()); + + Node* result; + TF_ASSERT_OK(NodeBuilder("test", "TensorAsShapeInt64") + .Input(pack.node()) + .Finalize(root.graph(), &result)); + + ShapeRefiner m(OpRegistry::Global()); + for (const auto& input : inputs) { + TF_ASSERT_OK(m.AddNode(input.node())); + } + TF_ASSERT_OK(m.AddNode(pack.node())); + EXPECT_TRUE( + StringPiece(m.AddNode(result).error_message()).contains("but is rank 2")); +} + +TEST(ShapeRefinerTest, ConstantValueAsShape_Concat) { + Scope root = Scope::NewRootScope(); + Graph* g = root.graph(); + Node* partial_1; + Node* partial_2; + TF_ASSERT_OK(NodeBuilder("in", "WithPartialShape").Finalize(g, &partial_1)); + TF_ASSERT_OK(NodeBuilder("in", "WithPartialShape2").Finalize(g, &partial_2)); + auto const_input = ops::Const(root, {9, 10, 11}); + ops::OutputList concat_inputs{ + ops::Shape(root, ops::Output(partial_1)), + ops::Shape(root, ops::Output(partial_2)), const_input, + }; + auto concat_dim = ops::Const(root, 0); + auto concat = ops::Concat(root, concat_dim, concat_inputs); + TF_ASSERT_OK(root.status()); + + Node* result; + TF_ASSERT_OK(NodeBuilder("test", "TensorAsShapeInt32") + .Input(concat.node()) + .Finalize(g, &result)); + + ShapeRefiner m(OpRegistry::Global()); + TF_ASSERT_OK(m.AddNode(partial_1)); + TF_ASSERT_OK(m.AddNode(partial_2)); + for (const auto& o : concat_inputs) { + TF_ASSERT_OK(m.AddNode(o.node())); + } + TF_ASSERT_OK(m.AddNode(concat_dim.node())); + TF_ASSERT_OK(m.AddNode(concat.node())); + TF_ASSERT_OK(m.AddNode(result)); + + shape_inference::InferenceContext* ctx = m.GetContext(result); + EXPECT_EQ("[1,?,3,?,5,6,?,8,9,10,11]", ctx->DebugString(ctx->output(0))); +} + +TEST(ShapeRefinerTest, ConstantValueAsShape_ConcatWithUnknown) { + Scope root = Scope::NewRootScope(); + Graph* g = root.graph(); + Node* scalar_non_const; + TF_ASSERT_OK(NodeBuilder("in", "NonConstScalarInt32") + .Finalize(root.graph(), &scalar_non_const)); + + Node* partial_1; + Node* partial_2; + Node* unknown; + TF_ASSERT_OK(NodeBuilder("in", "WithPartialShape").Finalize(g, &partial_1)); + TF_ASSERT_OK(NodeBuilder("in", "WithPartialShape2").Finalize(g, &partial_2)); + TF_ASSERT_OK(NodeBuilder("in", "WithUnknownShape").Finalize(g, &unknown)); + ops::OutputList concat_inputs{ + ops::Shape(root, ops::Output(partial_1)), + ops::Shape(root, ops::Output(partial_2)), + ops::Shape(root, ops::Output(unknown)), + }; + auto concat_dim = ops::Const(root, 0); + auto concat = ops::Concat(root, concat_dim, concat_inputs); + TF_ASSERT_OK(root.status()); + + Node* result; + TF_ASSERT_OK(NodeBuilder("test", "TensorAsShapeInt32") + .Input(concat.node()) + .Finalize(g, &result)); + + ShapeRefiner m(OpRegistry::Global()); + TF_ASSERT_OK(m.AddNode(partial_1)); + TF_ASSERT_OK(m.AddNode(partial_2)); + TF_ASSERT_OK(m.AddNode(unknown)); + for (const auto& o : concat_inputs) { + TF_ASSERT_OK(m.AddNode(o.node())); + } + TF_ASSERT_OK(m.AddNode(concat_dim.node())); + TF_ASSERT_OK(m.AddNode(concat.node())); + TF_ASSERT_OK(m.AddNode(result)); + + shape_inference::InferenceContext* ctx = m.GetContext(result); + EXPECT_EQ("?", ctx->DebugString(ctx->output(0))); +} + +TEST(ShapeRefinerTest, ConstantValueAsShape_ConcatInvalidDimValue) { + Scope root = Scope::NewRootScope(); + Graph* g = root.graph(); + Node* scalar_non_const; + TF_ASSERT_OK(NodeBuilder("in", "NonConstScalarInt32") + .Finalize(root.graph(), &scalar_non_const)); + + Node* partial_1; + Node* partial_2; + TF_ASSERT_OK(NodeBuilder("in", "WithPartialShape").Finalize(g, &partial_1)); + TF_ASSERT_OK(NodeBuilder("in", "WithPartialShape2").Finalize(g, &partial_2)); + auto const_input = ops::Const(root, {9, -2, 11}); + ops::OutputList concat_inputs{ + ops::Shape(root, ops::Output(partial_1)), + ops::Shape(root, ops::Output(partial_2)), // + const_input, + }; + auto concat_dim = ops::Const(root, 0); + auto concat = ops::Concat(root, concat_dim, concat_inputs); + TF_ASSERT_OK(root.status()); + + Node* result; + TF_ASSERT_OK(NodeBuilder("test", "TensorAsShapeInt32") + .Input(concat.node()) + .Finalize(g, &result)); + + ShapeRefiner m(OpRegistry::Global()); + TF_ASSERT_OK(m.AddNode(partial_1)); + TF_ASSERT_OK(m.AddNode(partial_2)); + for (const auto& o : concat_inputs) { + TF_ASSERT_OK(m.AddNode(o.node())); + } + TF_ASSERT_OK(m.AddNode(concat_dim.node())); + TF_ASSERT_OK(m.AddNode(concat.node())); + EXPECT_EQ("Invalid value in tensor used for shape: -2", + m.AddNode(result).error_message()); +} + } // namespace } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/simple_graph_execution_state.cc b/tensorflow/core/common_runtime/simple_graph_execution_state.cc index ff00ad5cfda..82d36b51b5a 100644 --- a/tensorflow/core/common_runtime/simple_graph_execution_state.cc +++ b/tensorflow/core/common_runtime/simple_graph_execution_state.cc @@ -274,16 +274,6 @@ Status SimpleGraphExecutionState::InitBaseGraph( return Status::OK(); } -void SimpleGraphExecutionState::UpdateCostsFromStats(const StepStats& ss) { - mutex_lock l(mu_); - costs_.MergeFromStats(node_name_to_cost_id_map_, ss); -} - -void SimpleGraphExecutionState::MergeCostsFromGlobal(CostModel* costs) { - mutex_lock l(mu_); - costs->MergeFromGlobal(costs_); -} - Status SimpleGraphExecutionState::BuildGraph( const BuildGraphOptions& options, std::unique_ptr* out) { VLOG(1) << "BuildGraph"; diff --git a/tensorflow/core/common_runtime/simple_graph_execution_state.h b/tensorflow/core/common_runtime/simple_graph_execution_state.h index 2a33d9e298c..3b6ce23c754 100644 --- a/tensorflow/core/common_runtime/simple_graph_execution_state.h +++ b/tensorflow/core/common_runtime/simple_graph_execution_state.h @@ -133,22 +133,6 @@ class SimpleGraphExecutionState { Status BuildGraph(const BuildGraphOptions& options, std::unique_ptr* out); - // Sums execution statistics in "ss" into the CostModel. - void UpdateCostsFromStats(const StepStats& ss); - - Microseconds TimeEstimate(const Node* n) { - mutex_lock l(mu_); // could use reader lock - return costs_.TimeEstimate(n); - } - - Bytes SizeEstimate(const Node* n, int output_slot) { - mutex_lock l(mu_); // could use reader lock - return costs_.SizeEstimate(n, output_slot); - } - - // Merge the cost model maintained by this graph_execution_state to 'costs'. - void MergeCostsFromGlobal(CostModel* costs); - // The graph returned by BuildGraph may contain only the pruned // graph, whereas some clients may want access to the full graph. const Graph* full_graph() { diff --git a/tensorflow/core/debug/debug_gateway_test.cc b/tensorflow/core/debug/debug_gateway_test.cc index bba8299e6d5..1fab9a56a35 100644 --- a/tensorflow/core/debug/debug_gateway_test.cc +++ b/tensorflow/core/debug/debug_gateway_test.cc @@ -335,7 +335,9 @@ TEST_F(SessionDebugMinusAXTest, RunSimpleNetworkWithTwoDebugNodesInserted) { } TEST_F(SessionDebugMinusAXTest, - RunSimpleNetworkConcurrentlyWithDebugNodesInserted) { + RunSimpleNetworkConcurrentlyWithDifferentDebugTensorWatches) { + // Test concurrent Run() calls on a graph with different debug watches. + Initialize({3, 2, -1, 0}); std::unique_ptr session(CreateSession()); ASSERT_TRUE(session != nullptr); @@ -351,33 +353,39 @@ TEST_F(SessionDebugMinusAXTest, mutex mu; DebugGateway debug_gateway(session.get()); - std::vector debug_identity_tensor_vals; + std::unordered_map debug_identity_tensor_vals; const string debug_identity = "DebugIdentity"; - const string debug_identity_node_name = DebugNodeInserter::GetDebugNodeName( + + const string a_debug_identity_node_name = DebugNodeInserter::GetDebugNodeName( + strings::StrCat(a_, ":", 0), 0, debug_identity); + const string x_debug_identity_node_name = DebugNodeInserter::GetDebugNodeName( + strings::StrCat(x_, ":", 0), 0, debug_identity); + const string y_debug_identity_node_name = DebugNodeInserter::GetDebugNodeName( strings::StrCat(y_, ":", 0), 0, debug_identity); Notification callbacks_done; - int comp_callback_count = 0; - int val_callback_count = 0; - debug_gateway.SetNodeCompletionCallback( - [&mu, &callbacks_done, &comp_callback_count, &debug_identity_node_name]( - const string& node_name, const bool any_output) { - mutex_lock l(mu); - if (node_name == debug_identity_node_name) { - comp_callback_count++; - } - }); + volatile int val_callback_count = 0; debug_gateway.SetNodeValueCallback( - [this, &mu, &val_callback_count, &debug_identity_node_name, + [this, &mu, &val_callback_count, &a_debug_identity_node_name, + &x_debug_identity_node_name, &y_debug_identity_node_name, &debug_identity_tensor_vals, &callbacks_done](const string& node_name, const int output_slot, const Tensor& tensor_value, const bool is_ref) { mutex_lock l(mu); - if (node_name == debug_identity_node_name && output_slot == 0) { + + if (node_name == a_debug_identity_node_name && output_slot == 0) { + debug_identity_tensor_vals["a"] = tensor_value; + val_callback_count++; + } else if (node_name == x_debug_identity_node_name && + output_slot == 0) { // output_slot == 0 carries the debug signal. - debug_identity_tensor_vals.push_back(tensor_value); + debug_identity_tensor_vals["x"] = tensor_value; + val_callback_count++; + } else if (node_name == y_debug_identity_node_name && + output_slot == 0) { + debug_identity_tensor_vals["y"] = tensor_value; val_callback_count++; } @@ -389,19 +397,41 @@ TEST_F(SessionDebugMinusAXTest, } }); + int run_counter = 0; + mutex run_lock; + // Function to be executed concurrently. - auto fn = [this, &session, output_names, target_nodes, &debug_identity]() { - // Create unique debug tensor watch options for each of the two concurrent + auto fn = [this, &run_lock, &run_counter, &session, output_names, + target_nodes, &debug_identity]() { + // Create unique debug tensor watch options for each of the concurrent // run calls. RunOptions run_opts; run_opts.set_output_partition_graphs(true); + DebugTensorWatch* tensor_watch_opts = run_opts.add_debug_tensor_watch_opts(); - - tensor_watch_opts->set_node_name(y_); tensor_watch_opts->set_output_slot(0); tensor_watch_opts->add_debug_ops(debug_identity); + { + // Let the concurrent runs watch different tensors. + + mutex_lock l(run_lock); + + if (run_counter == 0) { + // Let the 1st concurrent run watch a. + tensor_watch_opts->set_node_name(a_); + } else if (run_counter == 1) { + // Let the 2nd concurrent watch x. + tensor_watch_opts->set_node_name(x_); + } else if (run_counter == 2) { + // Let the 3rd concurrent watch y. + tensor_watch_opts->set_node_name(y_); + } + + run_counter++; + } + // Run the graph. RunMetadata run_metadata; std::vector> inputs; @@ -436,15 +466,26 @@ TEST_F(SessionDebugMinusAXTest, { mutex_lock l(mu); - ASSERT_EQ(kConcurrentRuns, comp_callback_count); + ASSERT_EQ(kConcurrentRuns, val_callback_count); ASSERT_EQ(kConcurrentRuns, debug_identity_tensor_vals.size()); - for (int i = 0; i < kConcurrentRuns; ++i) { - ASSERT_EQ(TensorShape({2, 1}), debug_identity_tensor_vals[i].shape()); - auto mat_identity = debug_identity_tensor_vals[i].matrix(); - ASSERT_EQ(5.0, mat_identity(0, 0)); - ASSERT_EQ(-1.0, mat_identity(1, 0)); - } + + ASSERT_EQ(TensorShape({2, 2}), debug_identity_tensor_vals["a"].shape()); + auto a_mat_identity = debug_identity_tensor_vals["a"].matrix(); + ASSERT_EQ(3.0, a_mat_identity(0, 0)); + ASSERT_EQ(2.0, a_mat_identity(0, 1)); + ASSERT_EQ(-1.0, a_mat_identity(1, 0)); + ASSERT_EQ(0.0, a_mat_identity(1, 1)); + + ASSERT_EQ(TensorShape({2, 1}), debug_identity_tensor_vals["x"].shape()); + auto x_mat_identity = debug_identity_tensor_vals["x"].matrix(); + ASSERT_EQ(1.0, x_mat_identity(0, 0)); + ASSERT_EQ(1.0, x_mat_identity(1, 0)); + + ASSERT_EQ(TensorShape({2, 1}), debug_identity_tensor_vals["y"].shape()); + auto y_mat_identity = debug_identity_tensor_vals["y"].matrix(); + ASSERT_EQ(5.0, y_mat_identity(0, 0)); + ASSERT_EQ(-1.0, y_mat_identity(1, 0)); } } @@ -499,25 +540,22 @@ TEST_F(SessionDebugOutputSlotWithoutOngoingEdgeTest, Notification callbacks_done; - debug_gateway.SetNodeCompletionCallback( - [&mu, &callbacks_done](const string& node_name, const bool any_output) { - mutex_lock l(mu); - if (node_name == "_SINK" && !callbacks_done.HasBeenNotified()) { - callbacks_done.Notify(); - } - }); - std::vector debug_identity_tensor_vals; - debug_gateway.SetNodeValueCallback( - [this, &mu, &debug_identity_node_name, &debug_identity_tensor_vals]( - const string& node_name, const int output_slot, - const Tensor& tensor_value, const bool is_ref) { - mutex_lock l(mu); + debug_gateway.SetNodeValueCallback([this, &mu, &callbacks_done, + &debug_identity_node_name, + &debug_identity_tensor_vals]( + const string& node_name, const int output_slot, + const Tensor& tensor_value, const bool is_ref) { + mutex_lock l(mu); - if (node_name == debug_identity_node_name && output_slot == 0) { - debug_identity_tensor_vals.push_back(tensor_value); - } - }); + if (node_name == debug_identity_node_name && output_slot == 0) { + debug_identity_tensor_vals.push_back(tensor_value); + + if (!callbacks_done.HasBeenNotified()) { + callbacks_done.Notify(); + } + } + }); // Add DebugIdentity watch on c:0, which does not have an outgoing edge. RunOptions run_opts; diff --git a/tensorflow/core/debug/debug_graph_utils.cc b/tensorflow/core/debug/debug_graph_utils.cc index b4b0ca810b4..bd0625fec34 100644 --- a/tensorflow/core/debug/debug_graph_utils.cc +++ b/tensorflow/core/debug/debug_graph_utils.cc @@ -24,6 +24,30 @@ limitations under the License. namespace tensorflow { +const string SummarizeDebugTensorWatches( + const protobuf::RepeatedPtrField& watches) { + std::ostringstream oss; + + for (const DebugTensorWatch& watch : watches) { + string tensor_name = + strings::StrCat(watch.node_name(), ":", watch.output_slot()); + oss << tensor_name << "|"; + + for (const string& debug_op : watch.debug_ops()) { + oss << debug_op << ","; + } + + oss << "@"; + for (const string& debug_url : watch.debug_urls()) { + oss << debug_url << ","; + } + + oss << ";"; + } + + return oss.str(); +} + // static Status DebugNodeInserter::InsertNodes( const protobuf::RepeatedPtrField& watches, Graph* graph, diff --git a/tensorflow/core/debug/debug_graph_utils.h b/tensorflow/core/debug/debug_graph_utils.h index ea61dee4d08..e01af00bdd4 100644 --- a/tensorflow/core/debug/debug_graph_utils.h +++ b/tensorflow/core/debug/debug_graph_utils.h @@ -27,6 +27,10 @@ limitations under the License. namespace tensorflow { +// Returns a summary string for RepeatedPtrFields of DebugTensorWatches. +const string SummarizeDebugTensorWatches( + const protobuf::RepeatedPtrField& watches); + class DebugNodeInserter { public: // EXPERIMENTAL: Insert special debug ops (e.g., DebugIdentity) to graph for diff --git a/tensorflow/core/distributed_runtime/graph_mgr.cc b/tensorflow/core/distributed_runtime/graph_mgr.cc index f77bc0b6b7a..577f6617f79 100644 --- a/tensorflow/core/distributed_runtime/graph_mgr.cc +++ b/tensorflow/core/distributed_runtime/graph_mgr.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/graph_optimizer.h" #include "tensorflow/core/common_runtime/memory_types.h" #include "tensorflow/core/common_runtime/process_util.h" +#include "tensorflow/core/common_runtime/step_stats_collector.h" #include "tensorflow/core/distributed_runtime/rendezvous_mgr_interface.h" #include "tensorflow/core/framework/cancellation.h" #include "tensorflow/core/framework/log_memory.h" @@ -207,6 +208,11 @@ Status GraphMgr::InitItem(const string& session, const GraphDef& gdef, if (!s.ok()) { break; } + unit->graph = subgraph; + unit->build_cost_model = graph_options.build_cost_model(); + if (unit->build_cost_model > 0) { + skip_cost_models_ = false; + } } return s; } @@ -319,6 +325,7 @@ Status GraphMgr::RecvOutputs(const int64 step_id, NamedTensors* out) { void GraphMgr::ExecuteAsync(const string& handle, const int64 step_id, const ExecutorOpts& opts, StepStatsCollector* collector, + CostGraphDef* cost_graph, CancellationManager* cancellation_manager, const NamedTensors& in, StatusCallback done) { // Lookup an item. Holds one ref while executing. @@ -348,7 +355,7 @@ void GraphMgr::ExecuteAsync(const string& handle, const int64 step_id, return; } - StartParallelExecutors(handle, item, rendezvous, collector, + StartParallelExecutors(handle, item, rendezvous, collector, cost_graph, cancellation_manager, [this, item, rendezvous, done](const Status& s) { done(s); @@ -360,6 +367,7 @@ void GraphMgr::ExecuteAsync(const string& handle, const int64 step_id, void GraphMgr::StartParallelExecutors(const string& handle, Item* item, Rendezvous* rendezvous, StepStatsCollector* collector, + CostGraphDef* cost_graph, CancellationManager* cancellation_manager, StatusCallback done) { const int num_units = item->units.size(); @@ -367,7 +375,9 @@ void GraphMgr::StartParallelExecutors(const string& handle, Item* item, ResourceMgr* step_resource_manager = new ResourceMgr; // NOTE: Transfer one ref of rendezvous and item. ExecutorBarrier* barrier = new ExecutorBarrier( - num_units, rendezvous, [step_resource_manager, done](const Status& s) { + num_units, rendezvous, [this, item, collector, cost_graph, + step_resource_manager, done](const Status& s) { + BuildCostModel(item, collector, cost_graph); done(s); delete step_resource_manager; }); @@ -393,4 +403,24 @@ void GraphMgr::StartParallelExecutors(const string& handle, Item* item, } } +void GraphMgr::BuildCostModel(Item* item, StepStatsCollector* collector, + CostGraphDef* cost_graph) { + if (collector && !skip_cost_models_) { + // Build the cost model + std::unordered_map device_to_graph; + for (const auto& unit : item->units) { + if (unit.build_cost_model > 0) { + device_to_graph[unit.device->name()] = unit.graph; + } + } + collector->BuildCostModel(&cost_model_manager_, device_to_graph); + + if (cost_graph != nullptr) { + for (const auto& unit : item->units) { + cost_model_manager_.AddToCostGraphDef(unit.graph, cost_graph); + } + } + } +} + } // end namespace tensorflow diff --git a/tensorflow/core/distributed_runtime/graph_mgr.h b/tensorflow/core/distributed_runtime/graph_mgr.h index bb4b3f2c8c6..a8994f14834 100644 --- a/tensorflow/core/distributed_runtime/graph_mgr.h +++ b/tensorflow/core/distributed_runtime/graph_mgr.h @@ -19,9 +19,11 @@ limitations under the License. #include #include +#include "tensorflow/core/common_runtime/costmodel_manager.h" #include "tensorflow/core/common_runtime/executor.h" #include "tensorflow/core/distributed_runtime/worker_env.h" #include "tensorflow/core/framework/cancellation.h" +#include "tensorflow/core/framework/cost_graph.pb.h" #include "tensorflow/core/lib/core/refcount.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/macros.h" @@ -73,6 +75,7 @@ class GraphMgr { typedef std::function StatusCallback; void ExecuteAsync(const string& handle, const int64 step_id, const ExecutorOpts& opts, StepStatsCollector* collector, + CostGraphDef* cost_graph, CancellationManager* cancellation_manager, const NamedTensors& in, StatusCallback done); @@ -89,9 +92,12 @@ class GraphMgr { typedef GraphMgr ME; struct ExecutionUnit { + Graph* graph = nullptr; Device* device = nullptr; Executor* root = nullptr; FunctionLibraryRuntime* lib = nullptr; + // Build the cost model if this value is strictly positive. + int64 build_cost_model = 0; }; struct Item : public core::RefCounted { @@ -117,6 +123,8 @@ class GraphMgr { // Not owned. const WorkerEnv* worker_env_; + CostModelManager cost_model_manager_; + // Owned. mutex mu_; int64 next_id_ GUARDED_BY(mu_) = 0; @@ -131,9 +139,17 @@ class GraphMgr { void StartParallelExecutors(const string& handle, Item* item, Rendezvous* rendezvous, StepStatsCollector* collector, + CostGraphDef* cost_graph, CancellationManager* cancellation_manager, StatusCallback done); + // Don't attempt to process cost models unless explicitely requested for at + // least one of the items. + bool skip_cost_models_ = true; + + void BuildCostModel(Item* item, StepStatsCollector* collector, + CostGraphDef* cost_graph); + Status SendInputsToRendezvous(Rendezvous* rendezvous, const NamedTensors& in); Status RecvOutputsFromRendezvous(Rendezvous* rendezvous, NamedTensors* out); diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc index 5537e3f2eff..6f3b7841785 100644 --- a/tensorflow/core/distributed_runtime/master_session.cc +++ b/tensorflow/core/distributed_runtime/master_session.cc @@ -25,6 +25,7 @@ limitations under the License. #include "tensorflow/core/distributed_runtime/scheduler.h" #include "tensorflow/core/distributed_runtime/worker_cache.h" #include "tensorflow/core/distributed_runtime/worker_interface.h" +#include "tensorflow/core/framework/cost_graph.pb.h" #include "tensorflow/core/framework/function.pb.h" #include "tensorflow/core/framework/node_def_util.h" #include "tensorflow/core/framework/tensor.h" @@ -58,6 +59,7 @@ struct PerStepState { Microseconds end_micros = Microseconds(0); std::vector step_stats; // per partition StepStats rpc_stats; // for RPC layer + CostGraphDef cost_graph; }; // MasterSession wraps SimpleClientGraph in a reference counted object. @@ -178,7 +180,8 @@ class MasterSession::ReffedClientGraph : public core::RefCounted { // Post-processing of any runtime statistics gathered during execution. void ProcessStats(const MasterEnv* env, int64 step_id, PerStepState* pss, SimpleGraphExecutionState* execution_state, - ProfileHandler* ph, RunStepResponse* resp); + ProfileHandler* ph, const RunStepRequest& req, + RunStepResponse* resp); void ProcessDeviceStats(ProfileHandler* ph, const SimpleGraphExecutionState* execution_state, const DeviceStepStats& ds, bool is_rpc); @@ -480,17 +483,6 @@ class RunManyGraphs { TF_DISALLOW_COPY_AND_ASSIGN(RunManyGraphs); }; -int64 CostFrequency(int64 x) { - if (x < 10) { - return 1; // 100% - } else if (x < 100) { - return 10; // 10% - } else if (x < 1000) { - return 100; // 1% - } else { - return 1000; // 0.1% - } -} Status MasterSession::ReffedClientGraph::RunPartitions( const MasterEnv* env, int64 step_id, int64 execution_count, @@ -604,6 +596,12 @@ Status MasterSession::ReffedClientGraph::RunPartitions( if (pss->collect_timeline && calls.get(i)->resp.has_step_stats()) { pss->step_stats[i].Swap(calls.get(i)->resp.mutable_step_stats()); } + if (pss->collect_costs && calls.get(i)->resp.has_cost_graph()) { + for (int j = 0; j < calls.get(i)->resp.cost_graph().node_size(); ++j) { + resp->mutable_metadata()->mutable_cost_graph()->add_node()->Swap( + calls.get(i)->resp.mutable_cost_graph()->mutable_node(j)); + } + } } } return status; @@ -679,7 +677,7 @@ void MasterSession::ReffedClientGraph::CleanupPartitionsAsync( void MasterSession::ReffedClientGraph::ProcessStats( const MasterEnv* env, int64 step_id, PerStepState* pss, SimpleGraphExecutionState* execution_state, ProfileHandler* ph, - RunStepResponse* resp) { + const RunStepRequest& req, RunStepResponse* resp) { if (!pss->collect_costs && !pss->collect_timeline) return; // Out-of-band logging data is collected now, during post-processing. @@ -689,9 +687,6 @@ void MasterSession::ReffedClientGraph::ProcessStats( } for (size_t i = 0; i < partitions_.size(); ++i) { const StepStats& ss = pss->step_stats[i]; - if (pss->collect_costs) { - execution_state->UpdateCostsFromStats(ss); - } if (ph) { for (const auto& ds : ss.dev_stats()) { ProcessDeviceStats(ph, execution_state, ds, false /*is_rpc*/); @@ -717,7 +712,7 @@ void MasterSession::ReffedClientGraph::ProcessStats( stats_publisher_->PublishStatsProto(step_stats_proto); // Copy the stats back, but only for on-demand profiling to avoid slowing // down calls that trigger the automatic profiling. - if (session_opts_.config.graph_options().timeline_step() <= 0) { + if (req.options().trace_level() == RunOptions::FULL_TRACE) { resp->mutable_metadata()->mutable_step_stats()->Swap(&step_stats_proto); } } @@ -1063,7 +1058,17 @@ Status MasterSession::DoRunWithLocalExecution(CallOptions* opts, std::unique_ptr ph; pss.collect_timeline = req->options().trace_level() == RunOptions::FULL_TRACE; - pss.collect_costs = (0 == (count % CostFrequency(count))); + + // Build the cost model every 'build_cost_model_every' steps after skipping an + // initial 'build_cost_model_after' steps. + const int64 build_cost_model_after = + session_opts_.config.graph_options().build_cost_model_after(); + const int64 build_cost_model_every = + session_opts_.config.graph_options().build_cost_model(); + pss.collect_costs = + build_cost_model_every > 0 && + ((count + 1 - build_cost_model_after) % build_cost_model_every == 0); + ph = rcg->GetProfileHandler(step_id, count, req->options()); if (ph) { pss.collect_timeline = true; @@ -1078,7 +1083,7 @@ Status MasterSession::DoRunWithLocalExecution(CallOptions* opts, // Schedule post-processing and cleanup to be done asynchronously. rcg->Ref(); - rcg->ProcessStats(env_, step_id, &pss, execution_state_.get(), ph.get(), + rcg->ProcessStats(env_, step_id, &pss, execution_state_.get(), ph.get(), *req, resp); rcg->CleanupPartitionsAsync(step_id, [rcg](const Status& s) { if (!s.ok()) { diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc index 2ae5dcebe6b..ec8c06abb49 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc @@ -329,7 +329,8 @@ class GrpcWorkerService : public AsyncServiceInterface { return; } StepStatsCollector* collector = nullptr; - if (call->request.exec_opts().record_timeline()) { + if (call->request.exec_opts().record_timeline() || + call->request.exec_opts().record_costs()) { collector = new StepStatsCollector(call->response.mutable_step_stats()); // TODO(mrry,pbar): GPU tracing for distributed steps. } @@ -345,9 +346,10 @@ class GrpcWorkerService : public AsyncServiceInterface { cancellation_manager_->RegisterCallback(token, [cm]() { cm->StartCancel(); }); } + CostGraphDef* cost_graph = call->response.mutable_cost_graph(); env_->graph_mgr->ExecuteAsync( call->request.graph_handle(), step_id, call->request.exec_opts(), - collector, cm, in, + collector, cost_graph, cm, in, [this, step_id, call, cm, out, token, collector](Status s) { if (s.ok()) { env_->graph_mgr->RecvOutputs(step_id, out); diff --git a/tensorflow/core/framework/common_shape_fns_test.cc b/tensorflow/core/framework/common_shape_fns_test.cc index a4efc04467c..7196bc83042 100644 --- a/tensorflow/core/framework/common_shape_fns_test.cc +++ b/tensorflow/core/framework/common_shape_fns_test.cc @@ -56,7 +56,7 @@ TEST(CommonShapeFnsTest, NoOutputShapeTest) { .Input({{"data", 0, DT_FLOAT}}) .Finalize(&def)); - InferenceContext c(&def, op_def, {S({}), S({10})}, {}); + InferenceContext c(&def, op_def, {S({}), S({10})}, {}, {}); TF_EXPECT_OK(NoOutputs(&c)); EXPECT_EQ(0, c.num_outputs()); } @@ -74,14 +74,14 @@ TEST(CommonShapeFnsTest, ScalarShapeTest) { NodeDefBuilder("test", "L2Loss").Input("t", 0, DT_FLOAT).Finalize(&def)); { - InferenceContext c(&def, op_def, {S({})}, {}); + InferenceContext c(&def, op_def, {S({})}, {}, {}); TF_EXPECT_OK(ScalarShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ(0, c.Rank(output)); } { - InferenceContext c(&def, op_def, {S({1, 23, 4, 4, 2})}, {}); + InferenceContext c(&def, op_def, {S({1, 23, 4, 4, 2})}, {}, {}); TF_EXPECT_OK(ScalarShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ(0, c.Rank(output)); @@ -108,7 +108,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) { .Finalize(&def)); { - InferenceContext c(&def, op_def, {S({2, 3}), S({3, 4})}, {}); + InferenceContext c(&def, op_def, {S({2, 3}), S({3, 4})}, {}, {}); TF_EXPECT_OK(MatMulShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ(2, c.Value(c.Dim(output, 0))); @@ -117,7 +117,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) { { // Unknown inner dimension for one - InferenceContext c(&def, op_def, {S({2, -1}), S({3, 4})}, {}); + InferenceContext c(&def, op_def, {S({2, -1}), S({3, 4})}, {}, {}); TF_EXPECT_OK(MatMulShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ(2, c.Value(c.Dim(output, 0))); @@ -126,7 +126,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) { { // Invalid rank. - InferenceContext c(&def, op_def, {S({2}), S({3, 4})}, {}); + InferenceContext c(&def, op_def, {S({2}), S({3, 4})}, {}, {}); auto s = MatMulShape(&c); EXPECT_FALSE(s.ok()); EXPECT_TRUE( @@ -136,7 +136,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) { { // Unknown outer dimension - InferenceContext c(&def, op_def, {S({2, 3}), S({3, -1})}, {}); + InferenceContext c(&def, op_def, {S({2, 3}), S({3, -1})}, {}, {}); TF_EXPECT_OK(MatMulShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ(2, c.Value(c.Dim(output, 0))); @@ -145,7 +145,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) { { // Inner shapes not compatible - InferenceContext c(&def, op_def, {S({2, 5}), S({3, 4})}, {}); + InferenceContext c(&def, op_def, {S({2, 5}), S({3, 4})}, {}, {}); auto s = MatMulShape(&c); EXPECT_FALSE(s.ok()); EXPECT_TRUE( @@ -156,7 +156,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) { { // Inner shapes not compatible - InferenceContext c(&def, op_def, {S({2, 5, 3}), S({3, 5, 4})}, {}); + InferenceContext c(&def, op_def, {S({2, 5, 3}), S({3, 5, 4})}, {}, {}); auto s = MatMulShape(&c); EXPECT_FALSE(s.ok()); EXPECT_TRUE( @@ -174,7 +174,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) { .Attr("type", DT_FLOAT) .Finalize(&def)); - InferenceContext c(&def, op_def, {S({3, 2}), S({3, 4})}, {}); + InferenceContext c(&def, op_def, {S({3, 2}), S({3, 4})}, {}, {}); auto s = MatMulShape(&c); ShapeHandle output = c.output(0); EXPECT_EQ(2, c.Value(c.Dim(output, 0))); @@ -191,7 +191,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) { .Attr("type", DT_FLOAT) .Finalize(&def)); - InferenceContext c(&def, op_def, {S({2, 3}), S({4, 3})}, {}); + InferenceContext c(&def, op_def, {S({2, 3}), S({4, 3})}, {}, {}); auto s = MatMulShape(&c); ShapeHandle output = c.output(0); EXPECT_EQ(2, c.Value(c.Dim(output, 0))); @@ -215,7 +215,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) { .Finalize(&def)); { - InferenceContext c(&def, op_def, {S({2, 10}), S({10})}, {}); + InferenceContext c(&def, op_def, {S({2, 10}), S({10})}, {}, {}); TF_EXPECT_OK(BiasAddShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ(2, c.Value(c.Dim(output, 0))); @@ -224,7 +224,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) { { // Unknown ranks. - InferenceContext c(&def, op_def, {Unknown(), Unknown()}, {}); + InferenceContext c(&def, op_def, {Unknown(), Unknown()}, {}, {}); TF_EXPECT_OK(BiasAddShape(&c)); ShapeHandle output = c.output(0); EXPECT_FALSE(c.RankKnown(output)); @@ -232,7 +232,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) { { // Rank > 2 - InferenceContext c(&def, op_def, {S({4, 3, 4, 2, 15}), S({15})}, {}); + InferenceContext c(&def, op_def, {S({4, 3, 4, 2, 15}), S({15})}, {}, {}); TF_EXPECT_OK(BiasAddShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ("[4,3,4,2,15]", c.DebugString(output)); @@ -245,7 +245,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) { .Input("b", 0, DT_FLOAT) .Attr("data_format", "NCHW") .Finalize(&def)); - InferenceContext c(&def, op_def, {S({2, 3, 4, 5}), S({3})}, {}); + InferenceContext c(&def, op_def, {S({2, 3, 4, 5}), S({3})}, {}, {}); TF_EXPECT_OK(BiasAddShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ("[2,3,4,5]", c.DebugString(output)); @@ -258,7 +258,8 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) { .Input("b", 0, DT_FLOAT) .Attr("data_format", "NCHW") .Finalize(&def)); - InferenceContext c(&def, op_def, {S({8, 6, 4, 2, 3, 4, 5}), S({3})}, {}); + InferenceContext c(&def, op_def, {S({8, 6, 4, 2, 3, 4, 5}), S({3})}, {}, + {}); TF_EXPECT_OK(BiasAddShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ("[8,6,4,2,3,4,5]", c.DebugString(output)); @@ -271,7 +272,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) { .Input("b", 0, DT_FLOAT) .Attr("data_format", "NCHW") .Finalize(&def)); - InferenceContext c(&def, op_def, {S({10, 11, 12}), S({10})}, {}); + InferenceContext c(&def, op_def, {S({10, 11, 12}), S({10})}, {}, {}); TF_EXPECT_OK(BiasAddShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ("[10,11,12]", c.DebugString(output)); @@ -279,7 +280,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) { { // Input rank not high enough - InferenceContext c(&def, op_def, {S({3}), S({3})}, {}); + InferenceContext c(&def, op_def, {S({3}), S({3})}, {}, {}); EXPECT_FALSE(BiasAddShape(&c).ok()); } @@ -291,7 +292,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) { .Attr("data_format", "NCHW") .Finalize(&def)); // NCHW format - InferenceContext c(&def, op_def, {S({2, 3}), S({3})}, {}); + InferenceContext c(&def, op_def, {S({2, 3}), S({3})}, {}, {}); EXPECT_FALSE(BiasAddShape(&c).ok()); } } @@ -310,7 +311,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) { .Finalize(&def)); { - InferenceContext c(&def, op_def, {S({2, 10})}, {}); + InferenceContext c(&def, op_def, {S({2, 10})}, {}, {}); TF_EXPECT_OK(BiasAddGradShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ(10, c.Value(c.Dim(output, 0))); @@ -318,7 +319,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) { { // Rank > 2 - InferenceContext c(&def, op_def, {S({5, 7, 2, 10})}, {}); + InferenceContext c(&def, op_def, {S({5, 7, 2, 10})}, {}, {}); TF_EXPECT_OK(BiasAddGradShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ(10, c.Value(c.Dim(output, 0))); @@ -330,7 +331,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) { .Input("a", 0, DT_FLOAT) .Attr("data_format", "NCHW") .Finalize(&def)); - InferenceContext c(&def, op_def, {S({2, 3, 4, 5})}, {}); + InferenceContext c(&def, op_def, {S({2, 3, 4, 5})}, {}, {}); TF_EXPECT_OK(BiasAddGradShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ(3, c.Value(c.Dim(output, 0))); @@ -342,7 +343,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) { .Input("a", 0, DT_FLOAT) .Attr("data_format", "NCHW") .Finalize(&def)); - InferenceContext c(&def, op_def, {S({8, 6, 4, 2, 3, 4, 5})}, {}); + InferenceContext c(&def, op_def, {S({8, 6, 4, 2, 3, 4, 5})}, {}, {}); TF_EXPECT_OK(BiasAddGradShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ(3, c.Value(c.Dim(output, 0))); @@ -354,7 +355,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) { .Input("a", 0, DT_FLOAT) .Attr("data_format", "NCHW") .Finalize(&def)); - InferenceContext c(&def, op_def, {S({10, 11, 12})}, {}); + InferenceContext c(&def, op_def, {S({10, 11, 12})}, {}, {}); TF_EXPECT_OK(BiasAddGradShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ(10, c.Value(c.Dim(output, 0))); @@ -362,7 +363,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) { { // Input rank not high enough - InferenceContext c(&def, op_def, {S({3})}, {}); + InferenceContext c(&def, op_def, {S({3})}, {}, {}); EXPECT_FALSE(BiasAddGradShape(&c).ok()); } @@ -373,7 +374,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) { .Attr("data_format", "NCHW") .Finalize(&def)); // NCHW format - InferenceContext c(&def, op_def, {S({2, 3})}, {}); + InferenceContext c(&def, op_def, {S({2, 3})}, {}, {}); EXPECT_FALSE(BiasAddGradShape(&c).ok()); } } diff --git a/tensorflow/core/framework/function.h b/tensorflow/core/framework/function.h index 109df5d4f56..67c71be46c3 100644 --- a/tensorflow/core/framework/function.h +++ b/tensorflow/core/framework/function.h @@ -400,6 +400,9 @@ class FunctionLibraryRuntime { // Returns a debug string showing the definition of the function of // 'handle'. virtual string DebugString(Handle handle) = 0; + + // Returns the graph version number. + virtual int graph_def_version() = 0; }; // To register a gradient function for a builtin op, one should use diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc index 77a433ddcb5..da88b6a7ca6 100644 --- a/tensorflow/core/framework/shape_inference.cc +++ b/tensorflow/core/framework/shape_inference.cc @@ -30,9 +30,10 @@ constexpr int64 InferenceContext::kUnknownDim; InferenceContext::InferenceContext( const NodeDef* node_def, const OpDef& op_def, const std::vector& input_shapes, - const std::vector& input_tensors) + const std::vector& input_tensors, + const std::vector& input_tensors_as_shapes) : node_def_(*CHECK_NOTNULL(node_def)) { - PreInputInit(op_def, input_tensors); + PreInputInit(op_def, input_tensors, input_tensors_as_shapes); if (!construction_status_.ok()) return; for (const TensorShapeProto& p : input_shapes) { ShapeHandle shape; @@ -48,9 +49,10 @@ InferenceContext::InferenceContext( InferenceContext::InferenceContext( const NodeDef* node_def, const OpDef& op_def, const std::vector& input_shapes, - const std::vector& input_tensors) + const std::vector& input_tensors, + const std::vector& input_tensors_as_shapes) : node_def_(*CHECK_NOTNULL(node_def)) { - PreInputInit(op_def, input_tensors); + PreInputInit(op_def, input_tensors, input_tensors_as_shapes); if (!construction_status_.ok()) return; inputs_ = input_shapes; PostInputInit(); @@ -106,8 +108,10 @@ Status InferenceContext::output(StringPiece output_name, } void InferenceContext::PreInputInit( - const OpDef& op_def, const std::vector& input_tensors) { + const OpDef& op_def, const std::vector& input_tensors, + const std::vector& input_tensors_as_shapes) { input_tensors_ = input_tensors; + input_tensors_as_shapes_ = input_tensors_as_shapes; construction_status_ = NameRangesForNode(node_def_, op_def, &input_name_map_, &output_name_map_); @@ -139,6 +143,7 @@ void InferenceContext::PostInputInit() { CHECK_LE(input_tensors_.size(), inputs_.size()); input_tensors_.resize(inputs_.size()); requested_input_tensor_.resize(inputs_.size()); + requested_input_tensor_as_partial_shape_.resize(inputs_.size()); } bool InferenceContext::FullyDefined(ShapeHandle s) { @@ -470,11 +475,24 @@ Status InferenceContext::MakeShapeFromShapeTensor(int input_idx, ShapeHandle input_shape; TF_RETURN_IF_ERROR(WithRank(input(input_idx), 1, &input_shape)); - const Tensor* t = input_tensor(input_idx); + if (input_idx < input_tensors_as_shapes_.size() && + input_tensors_as_shapes_[input_idx].IsSet() && + RankKnown(input_tensors_as_shapes_[input_idx])) { + *out = input_tensors_as_shapes_[input_idx]; + return Status::OK(); + } + requested_input_tensor_as_partial_shape_[input_idx] = true; + + return MakeShapeFromTensor(input_tensor(input_idx), input_shape, out); +} + +Status InferenceContext::MakeShapeFromTensor(const Tensor* t, + ShapeHandle tensor_shape, + ShapeHandle* out) { if (t == nullptr) { // Shape tensor is not known, but if the shape of the shape tensor is then // the right number of unknown dims can be created. - DimensionHandle shape_dim = Dim(input_shape, 0); + DimensionHandle shape_dim = Dim(tensor_shape, 0); if (!ValueKnown(shape_dim)) { return ReturnUnknownShape(out); } @@ -493,12 +511,24 @@ Status InferenceContext::MakeShapeFromShapeTensor(int input_idx, if (t->dtype() == DataType::DT_INT32) { auto flat_t = t->flat(); for (int i = 0; i < flat_t.size(); ++i) { - dims.push_back(MakeDim(flat_t(i))); + const int32 val = flat_t(i); + if (val < -1) { + return errors::InvalidArgument( + "Invalid value in tensor used for shape: ", val); + } + // -1 will become an unknown dim. + dims.push_back(MakeDim(val)); } } else if (t->dtype() == DataType::DT_INT64) { auto flat_t = t->flat(); for (int i = 0; i < flat_t.size(); ++i) { - dims.push_back(MakeDim(flat_t(i))); + const int64 val = flat_t(i); + if (val < -1) { + return errors::InvalidArgument( + "Invalid value in tensor used for shape: ", val); + } + // -1 will become an unknown dim. + dims.push_back(MakeDim(val)); } } else { *out = nullptr; @@ -558,24 +588,27 @@ Status InferenceContext::MakeDimForScalarInput(int idx, DimensionHandle* out) { return Status::OK(); } -Status InferenceContext::Divide(DimensionHandle dividend, int64 divisor, +Status InferenceContext::Divide(DimensionHandle dividend, + DimensionOrConstant divisor, bool evenly_divisible, DimensionHandle* out) { - if (divisor == 1) { + const int64 divisor_value = Value(divisor); + if (divisor_value == 1) { *out = dividend; - } else if (!ValueKnown(dividend)) { + } else if (!ValueKnown(dividend) || + (divisor.dim.IsSet() && !ValueKnown(divisor.dim))) { *out = UnknownDim(); } else { const int64 v = Value(dividend); - if (divisor <= 0) { + if (divisor_value <= 0) { return errors::InvalidArgument("Divisor must be positive but is ", - divisor); + divisor_value); } - if (evenly_divisible && (v % divisor) != 0) { + if (evenly_divisible && (v % divisor_value) != 0) { return errors::InvalidArgument( - "Dimension size must be evenly divisible by ", divisor, " but is ", - v); + "Dimension size must be evenly divisible by ", divisor_value, + " but is ", v); } - *out = MakeDim(v / divisor); + *out = MakeDim(v / divisor_value); } return Status::OK(); } diff --git a/tensorflow/core/framework/shape_inference.h b/tensorflow/core/framework/shape_inference.h index 1dfb9af0a47..f5befc15a11 100644 --- a/tensorflow/core/framework/shape_inference.h +++ b/tensorflow/core/framework/shape_inference.h @@ -136,17 +136,33 @@ class InferenceContext { // is NULL-padded to be the same size as . // + // Elements of are used for when a shape function + // makes a call to MakeShapeFromShapeTensor; in particular, when the + // input_tensors[i] is nullptr but the shape represented by it is partially + // known from analysis of the graph. + // can have fewer elements than . + // Values of do not need to outlive the context. + // // REQUIRES: is not NULL, and must outlive the InferenceContext. InferenceContext(const NodeDef* node_def, const OpDef& op_def, const std::vector& input_shapes, - const std::vector& input_tensors); + const std::vector& input_tensors, + const std::vector& input_tensors_as_shapes); // is NULL-padded to be the same size as . // + // Elements of are used for when a shape function + // makes a call to MakeShapeFromShapeTensor; in particular, when the + // input_tensors[i] is nullptr but the shape represented by it is partially + // known from analysis of the graph. + // can have fewer elements than . + // Values of do not need to outlive the context. + // // REQUIRES: is not NULL, and must outlive the InferenceContext. InferenceContext(const NodeDef* node_def, const OpDef& op_def, const std::vector& input_shapes, - const std::vector& input_tensors); + const std::vector& input_tensors, + const std::vector& input_tensors_as_shapes); ~InferenceContext(); @@ -180,10 +196,21 @@ class InferenceContext { return requested_input_tensor_[idx]; } + // Returns true if MakeShapeFromInputTensor was called but the constant + // input_tensor was not present. + bool requested_input_tensor_as_partial_shape(int idx) const { + return requested_input_tensor_as_partial_shape_[idx]; + } + void set_input_tensors(const std::vector& input_tensors) { input_tensors_ = input_tensors; } + void set_input_tensors_as_shapes( + const std::vector& input_tensors_as_shapes) { + input_tensors_as_shapes_ = input_tensors_as_shapes; + } + void set_output(int idx, ShapeHandle shape) { outputs_[idx] = shape; } Status set_output(StringPiece output_name, const std::vector& shapes); @@ -336,8 +363,8 @@ class InferenceContext { // Returns in the result of dividing by . // Returns an error if is not positive or if // and does not evenly divide . - Status Divide(DimensionHandle dividend, int64 divisor, bool evenly_divisible, - DimensionHandle* out); + Status Divide(DimensionHandle dividend, DimensionOrConstant divisor, + bool evenly_divisible, DimensionHandle* out); // Returns in the sum of and . Status Add(DimensionHandle first, DimensionOrConstant second, @@ -408,6 +435,15 @@ class InferenceContext { return Status::OK(); } + // Note that shape functions should usually call MakeShapeFromShapeTensor, + // as it does more analysis to provide partial shapes. + // + // Returns in a new shape whose dimension sizes come from tensor . + // The tensor must be a 1-dimensional int32 or int64 tensor. If is NULL, + // then an unknown shape is returned. + Status MakeShapeFromTensor(const Tensor* t, ShapeHandle tensor_shape, + ShapeHandle* out); + private: // Creates and stores shapes for use in InferenceContext. class ShapeManager { @@ -443,7 +479,8 @@ class InferenceContext { // Shared initialization across the two constructors. Remove // once we get rid of one of them. void PreInputInit(const OpDef& op_def, - const std::vector& input_tensors); + const std::vector& input_tensors, + const std::vector& input_tensors_as_shapes); void PostInputInit(); DimensionHandle GetDimension(const DimensionOrConstant& d); @@ -463,11 +500,15 @@ class InferenceContext { ShapeManager shape_manager_; - // inputs_ and outputs_ refer to values from `shape_manager_`. + // inputs_, outputs_, and input_tensors_as_shapes_ refer to values from + // `shape_manager_`. std::vector inputs_; std::vector input_tensors_; std::vector requested_input_tensor_; std::vector outputs_; + // Can have fewer elements than inputs_. + std::vector input_tensors_as_shapes_; + std::vector requested_input_tensor_as_partial_shape_; const NodeDef& node_def_; NameRangeMap input_name_map_; diff --git a/tensorflow/core/framework/shape_inference_test.cc b/tensorflow/core/framework/shape_inference_test.cc index 76a485c678f..06096bfdcc7 100644 --- a/tensorflow/core/framework/shape_inference_test.cc +++ b/tensorflow/core/framework/shape_inference_test.cc @@ -71,7 +71,7 @@ TEST_F(ShapeInferenceTest, InputOutputByName) { .Attr("N", 3) .Input(FakeInput(DT_FLOAT)) .Finalize(&def); - InferenceContext c(&def, op_def, {S({1, 5}), S({2, 5}), S({1, 3})}, {}); + InferenceContext c(&def, op_def, {S({1, 5}), S({2, 5}), S({1, 3})}, {}, {}); EXPECT_EQ("5", c.DebugString(c.NumElements(c.input(0)))); EXPECT_EQ("10", c.DebugString(c.NumElements(c.input(1)))); @@ -107,7 +107,7 @@ static OpDef MakeOpDef(int num_inputs, int num_outputs) { TEST_F(ShapeInferenceTest, DimensionOrConstant) { NodeDef def; - InferenceContext c(&def, MakeOpDef(1, 1), {Unknown()}, {}); + InferenceContext c(&def, MakeOpDef(1, 1), {Unknown()}, {}, {}); EXPECT_EQ(InferenceContext::kUnknownDim, c.Value(InferenceContext::kUnknownDim)); EXPECT_EQ(1, c.Value(1)); @@ -122,7 +122,7 @@ TEST_F(ShapeInferenceTest, Run) { NodeDef def; def.set_name("foo"); def.set_op("foo_op"); - InferenceContext c(&def, MakeOpDef(3, 2), {S({1})}, {}); + InferenceContext c(&def, MakeOpDef(3, 2), {S({1})}, {}, {}); { auto fn = [](InferenceContext* c) { @@ -154,7 +154,7 @@ TEST_F(ShapeInferenceTest, Run) { TEST_F(ShapeInferenceTest, RankAndDimInspection) { NodeDef def; InferenceContext c(&def, MakeOpDef(3, 2), {Unknown(), S({1, -1, 3}), S({})}, - {}); + {}, {}); EXPECT_EQ(3, c.num_inputs()); EXPECT_EQ(2, c.num_outputs()); @@ -195,7 +195,7 @@ TEST_F(ShapeInferenceTest, RankAndDimInspection) { TEST_F(ShapeInferenceTest, NumElements) { NodeDef def; InferenceContext c(&def, MakeOpDef(3, 2), - {Unknown(), S({1, -1, 3}), S({5, 4, 3, 2})}, {}); + {Unknown(), S({1, -1, 3}), S({5, 4, 3, 2})}, {}, {}); EXPECT_EQ("?", c.DebugString(c.NumElements(c.input(0)))); EXPECT_EQ("?", c.DebugString(c.NumElements(c.input(1)))); @@ -208,7 +208,7 @@ TEST_F(ShapeInferenceTest, NumElements) { TEST_F(ShapeInferenceTest, WithRank) { NodeDef def; - InferenceContext c(&def, MakeOpDef(2, 2), {Unknown(), S({1, -1, 3})}, {}); + InferenceContext c(&def, MakeOpDef(2, 2), {Unknown(), S({1, -1, 3})}, {}, {}); auto in0 = c.input(0); auto in1 = c.input(1); @@ -246,7 +246,7 @@ TEST_F(ShapeInferenceTest, WithRank) { TEST_F(ShapeInferenceTest, WithRankAtMost) { NodeDef def; - InferenceContext c(&def, MakeOpDef(2, 2), {Unknown(), S({1, -1, 3})}, {}); + InferenceContext c(&def, MakeOpDef(2, 2), {Unknown(), S({1, -1, 3})}, {}, {}); auto in0 = c.input(0); auto in1 = c.input(1); @@ -284,7 +284,7 @@ TEST_F(ShapeInferenceTest, WithRankAtMost) { TEST_F(ShapeInferenceTest, WithRankAtLeast) { NodeDef def; - InferenceContext c(&def, MakeOpDef(2, 2), {Unknown(), S({1, -1, 3})}, {}); + InferenceContext c(&def, MakeOpDef(2, 2), {Unknown(), S({1, -1, 3})}, {}, {}); auto in0 = c.input(0); auto in1 = c.input(1); @@ -322,7 +322,7 @@ TEST_F(ShapeInferenceTest, WithRankAtLeast) { TEST_F(ShapeInferenceTest, WithValue) { NodeDef def; - InferenceContext c(&def, MakeOpDef(1, 2), {S({1, -1})}, {}); + InferenceContext c(&def, MakeOpDef(1, 2), {S({1, -1})}, {}, {}); auto d0 = c.Dim(c.input(0), 0); auto d1 = c.Dim(c.input(0), 1); @@ -363,7 +363,7 @@ TEST_F(ShapeInferenceTest, WithValue) { TEST_F(ShapeInferenceTest, MergeDim) { NodeDef def; - InferenceContext c(&def, MakeOpDef(1, 2), {S({2, -1, 2, 1, -1})}, {}); + InferenceContext c(&def, MakeOpDef(1, 2), {S({2, -1, 2, 1, -1})}, {}, {}); auto d2 = c.Dim(c.input(0), 0); auto d_unknown = c.Dim(c.input(0), 1); @@ -412,7 +412,7 @@ TEST_F(ShapeInferenceTest, MergeShape) { InferenceContext c(&def, MakeOpDef(7, 2), {Unknown(), S({1, 2}), S({-1, 2}), S({1, -1}), S({1, 3}), Unknown(), S({1})}, - {}); + {}, {}); auto s_unknown = c.input(0); auto s_1_2 = c.input(1); @@ -483,7 +483,7 @@ TEST_F(ShapeInferenceTest, MergePrefix) { { Unknown(), S({-1, 2}), S({1, -1, 3}), S({2, 4}), }, - {}); + {}, {}); auto s_unknown = c.input(0); auto s_u_2 = c.input(1); @@ -536,7 +536,7 @@ TEST_F(ShapeInferenceTest, MergePrefix) { TEST_F(ShapeInferenceTest, Subshape) { NodeDef def; InferenceContext c(&def, MakeOpDef(2, 2), {S({1, 2, 3, -1, 5}), Unknown()}, - {}); + {}, {}); ShapeHandle unknown = c.input(1); ShapeHandle out; @@ -611,7 +611,7 @@ TEST_F(ShapeInferenceTest, Subshape) { TEST_F(ShapeInferenceTest, Concatenate) { NodeDef def; InferenceContext c(&def, MakeOpDef(3, 2), - {S({1, -1, 3}), S({4, 5}), Unknown()}, {}); + {S({1, -1, 3}), S({4, 5}), Unknown()}, {}, {}); auto in0 = c.input(0); auto in1 = c.input(1); @@ -637,7 +637,7 @@ TEST_F(ShapeInferenceTest, Concatenate) { TEST_F(ShapeInferenceTest, ReplaceDim) { NodeDef def; - InferenceContext c(&def, MakeOpDef(2, 0), {S({1, 2, 3}), Unknown()}, {}); + InferenceContext c(&def, MakeOpDef(2, 0), {S({1, 2, 3}), Unknown()}, {}, {}); auto in = c.input(0); auto unknown = c.input(1); @@ -668,7 +668,7 @@ TEST_F(ShapeInferenceTest, ReplaceDim) { TEST_F(ShapeInferenceTest, MakeShape) { NodeDef def; - InferenceContext c(&def, MakeOpDef(1, 2), {S({1, 2, 3, -1, 5})}, {}); + InferenceContext c(&def, MakeOpDef(1, 2), {S({1, 2, 3, -1, 5})}, {}, {}); std::vector dims; auto in0 = c.input(0); @@ -693,7 +693,7 @@ TEST_F(ShapeInferenceTest, MakeShape) { TEST_F(ShapeInferenceTest, UnknownShape) { NodeDef def; std::vector empty; - InferenceContext c(&def, MakeOpDef(0, 2), empty, {}); + InferenceContext c(&def, MakeOpDef(0, 2), empty, {}, {}); auto u0 = c.UnknownShape(); auto u1 = c.UnknownShape(); @@ -705,7 +705,7 @@ TEST_F(ShapeInferenceTest, UnknownShape) { TEST_F(ShapeInferenceTest, Scalar) { NodeDef def; std::vector empty; - InferenceContext c(&def, MakeOpDef(0, 2), empty, {}); + InferenceContext c(&def, MakeOpDef(0, 2), empty, {}, {}); auto s0 = c.Scalar(); EXPECT_EQ("[]", c.DebugString(s0)); @@ -716,7 +716,7 @@ TEST_F(ShapeInferenceTest, Scalar) { TEST_F(ShapeInferenceTest, Vector) { NodeDef def; std::vector empty; - InferenceContext c(&def, MakeOpDef(0, 2), empty, {}); + InferenceContext c(&def, MakeOpDef(0, 2), empty, {}, {}); auto s0 = c.Vector(1); EXPECT_EQ("[1]", c.DebugString(s0)); @@ -732,7 +732,7 @@ TEST_F(ShapeInferenceTest, Vector) { TEST_F(ShapeInferenceTest, Matrix) { NodeDef def; std::vector empty; - InferenceContext c(&def, MakeOpDef(0, 2), empty, {}); + InferenceContext c(&def, MakeOpDef(0, 2), empty, {}, {}); auto s0 = c.Matrix(1, 2); EXPECT_EQ("[1,2]", c.DebugString(s0)); @@ -754,7 +754,7 @@ TEST_F(ShapeInferenceTest, Matrix) { TEST_F(ShapeInferenceTest, MakeShapeFromShapeTensor) { auto create = [&](Tensor* t) { NodeDef def; - InferenceContext c(&def, MakeOpDef(1, 0), {Unknown()}, {t}); + InferenceContext c(&def, MakeOpDef(1, 0), {Unknown()}, {t}, {}); ShapeHandle out; Status s = c.MakeShapeFromShapeTensor(0, &out); if (s.ok()) { @@ -774,6 +774,9 @@ TEST_F(ShapeInferenceTest, MakeShapeFromShapeTensor) { t = ::tensorflow::test::AsTensor({3, 2, 1}); EXPECT_EQ("[3,2,1]", create(&t)); + t = ::tensorflow::test::AsTensor({3, -1, 1}); + EXPECT_EQ("[3,?,1]", create(&t)); + t = ::tensorflow::test::AsTensor({}); EXPECT_EQ("[]", create(&t)); @@ -790,10 +793,20 @@ TEST_F(ShapeInferenceTest, MakeShapeFromShapeTensor) { EXPECT_TRUE(StringPiece(create(&t)) .contains("Input tensor must be rank 1, but was rank 2")); + // Test negative values for the dims. + t = ::tensorflow::test::AsTensor({3, -2, 1}); + EXPECT_TRUE(StringPiece(create(&t)) + .contains("Invalid value in tensor used for shape: -2")); + + // Test negative values for the dims. + t = ::tensorflow::test::AsTensor({3, -2, 1}); + EXPECT_TRUE(StringPiece(create(&t)) + .contains("Invalid value in tensor used for shape: -2")); + // Test when the input shape is wrong. { NodeDef def; - InferenceContext c(&def, MakeOpDef(1, 0), {S({1, -1})}, {nullptr}); + InferenceContext c(&def, MakeOpDef(1, 0), {S({1, -1})}, {nullptr}, {}); ShapeHandle out; EXPECT_EQ("Shape must be rank 1 but is rank 2", c.MakeShapeFromShapeTensor(0, &out).error_message()); @@ -803,7 +816,7 @@ TEST_F(ShapeInferenceTest, MakeShapeFromShapeTensor) { TEST_F(ShapeInferenceTest, MakeShapeFromShapeProto) { NodeDef def; std::vector empty; - InferenceContext c(&def, MakeOpDef(0, 2), empty, {}); + InferenceContext c(&def, MakeOpDef(0, 2), empty, {}, {}); TensorShapeProto proto; // With a set unknown rank. @@ -839,7 +852,7 @@ TEST_F(ShapeInferenceTest, MakeShapeFromShapeProto) { TEST_F(ShapeInferenceTest, MakeDim) { NodeDef def; std::vector empty; - InferenceContext c(&def, MakeOpDef(0, 2), empty, {}); + InferenceContext c(&def, MakeOpDef(0, 2), empty, {}, {}); auto d0 = c.MakeDim(1); auto d1 = c.MakeDim(1); @@ -853,7 +866,7 @@ TEST_F(ShapeInferenceTest, MakeDim) { TEST_F(ShapeInferenceTest, UnknownDim) { NodeDef def; std::vector empty; - InferenceContext c(&def, MakeOpDef(0, 2), empty, {}); + InferenceContext c(&def, MakeOpDef(0, 2), empty, {}, {}); auto d0 = c.UnknownDim(); auto d1 = c.UnknownDim(); @@ -865,7 +878,7 @@ TEST_F(ShapeInferenceTest, UnknownDim) { TEST_F(ShapeInferenceTest, UnknownShapeOfRank) { NodeDef def; std::vector empty; - InferenceContext c(&def, MakeOpDef(0, 2), empty, {}); + InferenceContext c(&def, MakeOpDef(0, 2), empty, {}, {}); auto unknown_shape_of_rank_3 = c.UnknownShapeOfRank(3); EXPECT_EQ("[?,?,?]", c.DebugString(unknown_shape_of_rank_3)); @@ -879,7 +892,7 @@ TEST_F(ShapeInferenceTest, InputTensors) { const Tensor t2 = tensorflow::test::AsTensor({20, 30}); NodeDef def; InferenceContext c(&def, MakeOpDef(3, 2), {S({1}), S({2}), S({3})}, - {&t1, &t2}); + {&t1, &t2}, {}); EXPECT_TRUE(c.input_tensor(0) == &t1); EXPECT_TRUE(c.input_tensor(1) == &t2); @@ -890,7 +903,7 @@ TEST_F(ShapeInferenceTest, MakeDimForScalarInput) { Tensor t1 = tensorflow::test::AsScalar(20); Tensor t2 = tensorflow::test::AsScalar(-1); NodeDef def; - InferenceContext c(&def, MakeOpDef(2, 2), {S({}), S({})}, {&t1, &t2}); + InferenceContext c(&def, MakeOpDef(2, 2), {S({}), S({})}, {&t1, &t2}, {}); DimensionHandle d; EXPECT_TRUE(c.MakeDimForScalarInput(0, &d).ok()); @@ -921,7 +934,7 @@ TEST_F(ShapeInferenceTest, GetAttr) { .ok()); std::vector empty; - InferenceContext c(&def, op_reg_data.op_def, empty, {}); + InferenceContext c(&def, op_reg_data.op_def, empty, {}, {}); string value; EXPECT_TRUE(c.GetAttr("foo", &value).ok()); EXPECT_EQ("bar", value); @@ -929,11 +942,14 @@ TEST_F(ShapeInferenceTest, GetAttr) { TEST_F(ShapeInferenceTest, Divide) { NodeDef def; - InferenceContext c(&def, MakeOpDef(1, 2), {S({6, -1})}, {}); + InferenceContext c(&def, MakeOpDef(1, 2), {S({6, -1, 1, 2, 0})}, {}, {}); auto s = c.input(0); auto d_6 = c.Dim(s, 0); auto d_unknown = c.Dim(s, 1); + auto d_1 = c.Dim(s, 2); + auto d_2 = c.Dim(s, 3); + auto d_0 = c.Dim(s, 4); bool evenly_divisible = true; // Dividing unknown by non-1 gives new unknown. @@ -947,9 +963,15 @@ TEST_F(ShapeInferenceTest, Divide) { EXPECT_TRUE(SameHandle(out, d_unknown)); EXPECT_TRUE(c.Divide(d_6, 1, evenly_divisible, &out).ok()); EXPECT_TRUE(SameHandle(out, d_6)); + EXPECT_TRUE(c.Divide(d_unknown, d_1, evenly_divisible, &out).ok()); + EXPECT_TRUE(SameHandle(out, d_unknown)); + EXPECT_TRUE(c.Divide(d_6, d_1, evenly_divisible, &out).ok()); + EXPECT_TRUE(SameHandle(out, d_6)); EXPECT_TRUE(c.Divide(d_6, 2, evenly_divisible, &out).ok()); EXPECT_EQ("3", c.DebugString(out)); + EXPECT_TRUE(c.Divide(d_6, d_2, evenly_divisible, &out).ok()); + EXPECT_EQ("3", c.DebugString(out)); EXPECT_TRUE( StringPiece(c.Divide(d_6, 5, evenly_divisible, &out).error_message()) @@ -958,6 +980,9 @@ TEST_F(ShapeInferenceTest, Divide) { EXPECT_TRUE( StringPiece(c.Divide(d_6, 0, evenly_divisible, &out).error_message()) .contains("Divisor must be positive but is 0")); + EXPECT_TRUE( + StringPiece(c.Divide(d_6, d_0, evenly_divisible, &out).error_message()) + .contains("Divisor must be positive but is 0")); EXPECT_TRUE( StringPiece(c.Divide(d_6, -1, evenly_divisible, &out).error_message()) @@ -979,7 +1004,7 @@ TEST_F(ShapeInferenceTest, Divide) { TEST_F(ShapeInferenceTest, Add) { NodeDef def; - InferenceContext c(&def, MakeOpDef(1, 2), {S({6, -1, 0})}, {}); + InferenceContext c(&def, MakeOpDef(1, 2), {S({6, -1, 0})}, {}, {}); auto s = c.input(0); auto d_6 = c.Dim(s, 0); @@ -1030,7 +1055,7 @@ TEST_F(ShapeInferenceTest, Add) { TEST_F(ShapeInferenceTest, Subtract) { NodeDef def; - InferenceContext c(&def, MakeOpDef(1, 2), {S({6, -1, 0, 5})}, {}); + InferenceContext c(&def, MakeOpDef(1, 2), {S({6, -1, 0, 5})}, {}, {}); auto s = c.input(0); auto d_6 = c.Dim(s, 0); @@ -1079,7 +1104,7 @@ TEST_F(ShapeInferenceTest, Subtract) { TEST_F(ShapeInferenceTest, Multiply) { NodeDef def; - InferenceContext c(&def, MakeOpDef(1, 2), {S({6, -1, 0, 1})}, {}); + InferenceContext c(&def, MakeOpDef(1, 2), {S({6, -1, 0, 1})}, {}, {}); auto s = c.input(0); auto d_6 = c.Dim(s, 0); @@ -1132,7 +1157,7 @@ TEST_F(ShapeInferenceTest, Multiply) { TEST_F(ShapeInferenceTest, FullyDefined) { NodeDef def; std::vector empty; - InferenceContext c(&def, MakeOpDef(0, 2), empty, {}); + InferenceContext c(&def, MakeOpDef(0, 2), empty, {}, {}); // No rank or missing dimension information should return false. EXPECT_FALSE(c.FullyDefined(c.UnknownShape())); @@ -1145,7 +1170,7 @@ TEST_F(ShapeInferenceTest, FullyDefined) { TEST_F(ShapeInferenceTest, Min) { NodeDef def; - InferenceContext c(&def, MakeOpDef(1, 2), {S({1, 2, -1, 0})}, {}); + InferenceContext c(&def, MakeOpDef(1, 2), {S({1, 2, -1, 0})}, {}, {}); auto s = c.input(0); auto d_1 = c.Dim(s, 0); @@ -1193,7 +1218,7 @@ TEST_F(ShapeInferenceTest, Min) { TEST_F(ShapeInferenceTest, Max) { NodeDef def; - InferenceContext c(&def, MakeOpDef(1, 2), {S({1, 2, -1})}, {}); + InferenceContext c(&def, MakeOpDef(1, 2), {S({1, 2, -1})}, {}, {}); auto s = c.input(0); auto d_1 = c.Dim(s, 0); @@ -1231,7 +1256,7 @@ TEST_F(ShapeInferenceTest, Max) { TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownShapes) { NodeDef def; InferenceContext c(&def, MakeOpDef(3, 1), {Unknown(), Unknown(), Unknown()}, - {}); + {}, {}); EXPECT_EQ(3, c.num_inputs()); EXPECT_EQ(1, c.num_outputs()); @@ -1243,7 +1268,7 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownShapes) { TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownDims) { NodeDef def; - InferenceContext c(&def, MakeOpDef(3, 1), {S({-1, -1}), S({-1}), S({-1})}, + InferenceContext c(&def, MakeOpDef(3, 1), {S({-1, -1}), S({-1}), S({-1})}, {}, {}); EXPECT_EQ(3, c.num_inputs()); EXPECT_EQ(1, c.num_outputs()); @@ -1256,7 +1281,8 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownDims) { TEST_F(ShapeInferenceTest, ValidateSparseTensor_InvalidIndicesRank) { NodeDef def; - InferenceContext c(&def, MakeOpDef(3, 1), {S({-1}), S({-1}), S({-1})}, {}); + InferenceContext c(&def, MakeOpDef(3, 1), {S({-1}), S({-1}), S({-1})}, {}, + {}); EXPECT_EQ(3, c.num_inputs()); EXPECT_EQ(1, c.num_outputs()); @@ -1269,7 +1295,8 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_InvalidIndicesRank) { TEST_F(ShapeInferenceTest, ValidateSparseTensor_InvalidNumElements) { NodeDef def; - InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({4}), S({3})}, {}); + InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({4}), S({3})}, {}, + {}); EXPECT_EQ(3, c.num_inputs()); EXPECT_EQ(1, c.num_outputs()); @@ -1282,7 +1309,8 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_InvalidNumElements) { TEST_F(ShapeInferenceTest, ValidateSparseTensor_InvalidRank) { NodeDef def; - InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({5}), S({4})}, {}); + InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({5}), S({4})}, {}, + {}); EXPECT_EQ(3, c.num_inputs()); EXPECT_EQ(1, c.num_outputs()); @@ -1295,7 +1323,8 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_InvalidRank) { TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownNumIndexElements) { NodeDef def; - InferenceContext c(&def, MakeOpDef(3, 1), {S({-1, 3}), S({5}), S({3})}, {}); + InferenceContext c(&def, MakeOpDef(3, 1), {S({-1, 3}), S({5}), S({3})}, {}, + {}); EXPECT_EQ(3, c.num_inputs()); EXPECT_EQ(1, c.num_outputs()); @@ -1307,7 +1336,8 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownNumIndexElements) { TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownNumValueElements) { NodeDef def; - InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({-1}), S({3})}, {}); + InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({-1}), S({3})}, {}, + {}); EXPECT_EQ(3, c.num_inputs()); EXPECT_EQ(1, c.num_outputs()); @@ -1319,7 +1349,8 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownNumValueElements) { TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownIndexRank) { NodeDef def; - InferenceContext c(&def, MakeOpDef(3, 1), {S({5, -1}), S({5}), S({3})}, {}); + InferenceContext c(&def, MakeOpDef(3, 1), {S({5, -1}), S({5}), S({3})}, {}, + {}); EXPECT_EQ(3, c.num_inputs()); EXPECT_EQ(1, c.num_outputs()); @@ -1331,7 +1362,8 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownIndexRank) { TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownShapeRank) { NodeDef def; - InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({5}), S({-1})}, {}); + InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({5}), S({-1})}, {}, + {}); EXPECT_EQ(3, c.num_inputs()); EXPECT_EQ(1, c.num_outputs()); @@ -1343,7 +1375,8 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownShapeRank) { TEST_F(ShapeInferenceTest, ValidateSparseTensor) { NodeDef def; - InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({5}), S({3})}, {}); + InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({5}), S({3})}, {}, + {}); EXPECT_EQ(3, c.num_inputs()); EXPECT_EQ(1, c.num_outputs()); diff --git a/tensorflow/core/framework/shape_inference_testutil.cc b/tensorflow/core/framework/shape_inference_testutil.cc index 6cad1f8efaa..ed1d3ec5201 100644 --- a/tensorflow/core/framework/shape_inference_testutil.cc +++ b/tensorflow/core/framework/shape_inference_testutil.cc @@ -44,7 +44,8 @@ Status ShapeInferenceTestutil::InferShapes(ShapeInferenceTestOp op, } shape_inference::InferenceContext c(&op.node_def, op_reg_data->op_def, - in_shapes, op.input_tensors); + in_shapes, op.input_tensors, + {} /* input_tensors_as_shapes */); TF_RETURN_IF_ERROR(c.construction_status()); if (op_reg_data->shape_inference_fn == nullptr) { return errors::InvalidArgument( diff --git a/tensorflow/core/graph/costmodel.cc b/tensorflow/core/graph/costmodel.cc index 023014671c1..f6429806fe8 100644 --- a/tensorflow/core/graph/costmodel.cc +++ b/tensorflow/core/graph/costmodel.cc @@ -243,6 +243,11 @@ void CostModel::RecordMaxMemorySize(const Node* node, int output_slot, if (id < 0) return; Ensure(id); auto& current_max = max_mem_usage_[id].output_port_mem[output_slot]; + // If the memory allocator doesn't track memory usage, let's infer a lower + // bound from the tensor shape and its data type. + if (bytes.value() < 0) { + bytes = MinTensorMemoryUsage(tensor_shape, dtype); + } if (bytes.value() > current_max.value()) { current_max = bytes.value(); max_mem_usage_[id].output_port_shape[output_slot] = tensor_shape; @@ -476,4 +481,18 @@ void CostModel::WriteSummaryToLog() const { } } +Bytes CostModel::MinTensorMemoryUsage(const TensorShapeProto& tensor_shape, + const DataType& dtype) { + if (tensor_shape.unknown_rank()) { + return Bytes(-1); + } + + size_t num_coefficients = 1; + for (const TensorShapeProto::Dim& dim : tensor_shape.dim()) { + // If the dimension is unknown, it has to be at least 1 + num_coefficients *= std::max(dim.size(), 1); + } + return Bytes(num_coefficients * DataTypeSize(dtype)); +} + } // namespace tensorflow diff --git a/tensorflow/core/graph/costmodel.h b/tensorflow/core/graph/costmodel.h index 95bd0b9da17..0d942338b08 100644 --- a/tensorflow/core/graph/costmodel.h +++ b/tensorflow/core/graph/costmodel.h @@ -159,6 +159,9 @@ class CostModel { void WriteSummaryToLog() const; private: + static Bytes MinTensorMemoryUsage(const TensorShapeProto& tensor_shape, + const DataType& dtype); + const bool is_global_; // Resizes vectors so that they are large enough for "id". diff --git a/tensorflow/core/graph/graph_constructor.cc b/tensorflow/core/graph/graph_constructor.cc index 7acdfaa70a2..92d35977f9f 100644 --- a/tensorflow/core/graph/graph_constructor.cc +++ b/tensorflow/core/graph/graph_constructor.cc @@ -355,33 +355,19 @@ Status GraphConstructor::ValidateShape(Node* node) { // functions that are not critical to correct execution but // would cause graphs to fail if imported after correcting. // - // This can be removed after 2017/03/08. const string& op = node->def().op(); - const std::vector whitelist = {"RandomShuffleQueue", - "PaddingFIFOQueue", - "FIFOQueue", - "PriorityQueue", - "QueueSize", - "Stack", - "Barrier", - "BarrierReadySize", - "BarrierIncompleteSize", - "HashTable", - "MutableHashTable", - "MutableHashTableOfTensors", - "Mutex", - "CuckooTable", - "IndexTable", - "WholeFileReader", - "TextLineReader", - "FixedLengthRecordReader", - "TFRecordReader", - "IdentityReader", - "RefSwitch", - "RefEnter", - "RefNextIteration", - "RefMerge", - "RefIdentity"}; + const std::vector whitelist = { + // To be removed after 2017/03/08. + "RandomShuffleQueue", "PaddingFIFOQueue", "FIFOQueue", + "PriorityQueue", "QueueSize", "Stack", "Barrier", "BarrierReadySize", + "BarrierIncompleteSize", "HashTable", "MutableHashTable", + "MutableHashTableOfTensors", "Mutex", "CuckooTable", "IndexTable", + "WholeFileReader", "TextLineReader", "FixedLengthRecordReader", + "TFRecordReader", "IdentityReader", "RefSwitch", "RefEnter", + "RefNextIteration", "RefMerge", "RefIdentity", + // To be removed after 2017/04/24. + "ConditionalAccumulator", "SparseConditionalAccumulator", "Table", + }; if (std::find(whitelist.begin(), whitelist.end(), op) == whitelist.end()) { return errors::InvalidArgument( diff --git a/tensorflow/core/graph/graph_partition.cc b/tensorflow/core/graph/graph_partition.cc index 454cb2aa615..3275cde762c 100644 --- a/tensorflow/core/graph/graph_partition.cc +++ b/tensorflow/core/graph/graph_partition.cc @@ -77,7 +77,6 @@ struct ControlFlowInfo { const Node* frame = nullptr; // frame of a node const Node* parent_frame = nullptr; // parent frame of a node string frame_name; // frame name of a node - int iter_level = -1; // level of a node }; struct PairIntHash { @@ -365,11 +364,13 @@ Status BuildControlFlowInfo(Graph* g, std::vector* info) { info->clear(); info->resize(g->num_node_ids()); + std::vector parent_nodes; + parent_nodes.resize(g->num_node_ids()); + Node* src_node = g->source_node(); ControlFlowInfo& src_info = (*info)[src_node->id()]; src_info.frame = src_node; src_info.parent_frame = src_node; - src_info.iter_level = 0; string frame_name; std::deque ready; @@ -381,7 +382,6 @@ Status BuildControlFlowInfo(Graph* g, std::vector* info) { const Node* frame = curr_info.frame; const Node* parent = curr_info.parent_frame; frame_name = curr_info.frame_name; - int iter_level = curr_info.iter_level; if (IsExit(curr_node)) { // Exit to the parent frame. @@ -389,7 +389,6 @@ Status BuildControlFlowInfo(Graph* g, std::vector* info) { frame = parent_info.frame; parent = parent_info.parent_frame; frame_name = parent_info.frame_name; - iter_level = parent_info.iter_level; } // Optimize colocation for control flow nodes. @@ -400,23 +399,29 @@ Status BuildControlFlowInfo(Graph* g, std::vector* info) { int out_id = out->id(); ControlFlowInfo* out_info = &(*info)[out_id]; const Node* out_parent = out_info->parent_frame; - bool is_visited = (out_info->iter_level != -1); + bool is_visited = (parent_nodes[out_id] != nullptr); // Skip Sink/Source nodes. if (!out->IsOp()) continue; // Add to ready queue if not seen. if (!is_visited) { + parent_nodes[out->id()] = curr_node; ready.push_back(out); } // Process the node 'out'. if (IsEnter(out)) { if (is_visited) { - const string& parent_name = (*info)[out_parent->id()].frame_name; - if (parent_name != frame_name || iter_level != out_info->iter_level) { - return errors::InvalidArgument("All inputs to node ", out->name(), - " must be from the same frame."); + const string& parent_frame = (*info)[out_parent->id()].frame_name; + if (parent_frame != frame_name) { + return errors::InvalidArgument( + "The node '", out->name(), + "' has inputs from different " + "frames. The input '", + curr_node->name(), "' is in frame '", frame_name, + "'. The input '", parent_nodes[out->id()]->name(), + "' is in frame '", parent_frame, "'."); } } else { out_info->frame = out; @@ -427,36 +432,26 @@ Status BuildControlFlowInfo(Graph* g, std::vector* info) { return errors::InvalidArgument("The Enter node ", out->name(), " must have a frame name."); } - out_info->iter_level = 0; - } - } else if (IsNextIteration(out)) { - if (is_visited) { - if (out_info->frame_name != frame_name) { - return errors::InvalidArgument("All inputs to node ", out->name(), - " must be from the same frame."); - } - } else { - out_info->frame = frame; - out_info->parent_frame = parent; - out_info->frame_name = frame_name; - out_info->iter_level = iter_level + 1; } } else { if (is_visited) { if (out_info->frame_name != frame_name) { - return errors::InvalidArgument("All inputs to node ", out->name(), - " must be from the same frame."); + return errors::InvalidArgument( + "The node '", out->name(), + "' has inputs from different " + "frames. The input '", + curr_node->name(), "' is in frame '", frame_name, + "'. The input '", parent_nodes[out->id()]->name(), + "' is in frame '", out_info->frame_name, "'."); } } else { out_info->frame = frame; out_info->parent_frame = parent; out_info->frame_name = frame_name; - out_info->iter_level = iter_level; } } } } - return Status::OK(); } @@ -559,7 +554,6 @@ void AddControlFlowInfo(const Node* node, const Node* src, info->frame = src_info.frame; info->parent_frame = src_info.parent_frame; info->frame_name = src_info.frame_name; - info->iter_level = src_info.iter_level; } // Constructs a control loop. Returns a struct containing the newly created diff --git a/tensorflow/core/graph/node_builder.cc b/tensorflow/core/graph/node_builder.cc index 27d89295958..46e54c9eabe 100644 --- a/tensorflow/core/graph/node_builder.cc +++ b/tensorflow/core/graph/node_builder.cc @@ -129,7 +129,7 @@ Status NodeBuilder::Finalize(Graph* graph, Node** created_node) const { void NodeBuilder::AddIndexError(Node* node, int i) { if (node == nullptr) { errors_.emplace_back( - strings::StrCat("Attempt to add nullptr Node to node with type", + strings::StrCat("Attempt to add nullptr Node to node with type ", def_builder_.op_def().name())); } else { errors_.emplace_back( diff --git a/tensorflow/core/graph/types.h b/tensorflow/core/graph/types.h index accd2cd888b..c7078099277 100644 --- a/tensorflow/core/graph/types.h +++ b/tensorflow/core/graph/types.h @@ -24,6 +24,9 @@ namespace tensorflow { // We model running time in microseconds. TF_LIB_GTL_DEFINE_INT_TYPE(Microseconds, int64); +// We can also model running time in nanoseconds for more accuracy. +TF_LIB_GTL_DEFINE_INT_TYPE(Nanoseconds, int64); + // We model size in bytes. TF_LIB_GTL_DEFINE_INT_TYPE(Bytes, int64); diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 94e685731c8..34954f00664 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -57,6 +57,7 @@ tf_kernel_library( name = "strided_slice_op", srcs = [ "strided_slice_op.cc", + "strided_slice_op_inst_0.cc", "strided_slice_op_inst_1.cc", "strided_slice_op_inst_2.cc", "strided_slice_op_inst_3.cc", @@ -404,7 +405,6 @@ ARRAY_DEPS = [ "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", - "//tensorflow/core:gpu_runtime", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:proto_text", @@ -419,7 +419,9 @@ tf_kernel_libraries( "debug_ops", "immutable_constant_op", ], - deps = ARRAY_DEPS, + deps = ARRAY_DEPS + [ + "//tensorflow/core:gpu_runtime", + ], ) tf_kernel_libraries( @@ -563,6 +565,24 @@ tf_cc_test( ], ) +tf_cc_test( + name = "fake_quant_ops_test", + size = "small", + srcs = ["fake_quant_ops_test.cc"], + deps = [ + ":fake_quant_ops", + ":ops_testutil", + ":ops_util", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) + tf_cc_test( name = "fused_batch_norm_op_test", size = "small", @@ -1058,6 +1078,7 @@ tf_kernel_libraries( ":image_resizer_state", "//tensorflow/core:framework", "//tensorflow/core:image_ops_op_lib", + "//tensorflow/core:jpeg", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", @@ -1710,6 +1731,22 @@ tf_kernel_library( ], ) +tf_kernel_library( + name = "fake_quant_ops", + srcs = ["fake_quant_ops.cc"], + hdrs = ["fake_quant_ops_functor.h"], + gpu_srcs = [ + "fake_quant_ops_gpu.cu.cc", + "fake_quant_ops_functor.h", + ], + deps = [ + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//third_party/eigen3", + ], + alwayslink = 1, +) + tf_kernel_library( name = "fused_batch_norm_util", gpu_srcs = [ @@ -2226,6 +2263,7 @@ filegroup( "strided_slice_op.cc", "strided_slice_op.h", "strided_slice_op_impl.h", + "strided_slice_op_inst_0.cc", "strided_slice_op_inst_1.cc", "strided_slice_op_inst_2.cc", "strided_slice_op_inst_3.cc", @@ -2393,6 +2431,8 @@ filegroup( name = "android_quantized_ops", srcs = [ "dequantize_op.cc", + "meta_support.cc", + "meta_support.h", "quantization_utils.cc", "quantization_utils.h", "quantize_down_and_shrink_range.cc", @@ -2406,6 +2446,7 @@ filegroup( "quantized_pooling_ops.cc", "quantized_reshape_op.cc", "reference_gemm.h", + "requantization_range_op.cc", "requantize.cc", "reshape_op.h", ], @@ -2493,6 +2534,7 @@ tf_kernel_library( name = "quantized_ops", srcs = [ "dequantize_op.cc", + "meta_support.cc", "quantization_utils.cc", "quantize_down_and_shrink_range.cc", "quantize_op.cc", @@ -2504,10 +2546,12 @@ tf_kernel_library( "quantized_matmul_op.cc", "quantized_pooling_ops.cc", "quantized_reshape_op.cc", + "requantization_range_op.cc", "requantize.cc", "reshape_op.h", ], hdrs = [ + "meta_support.h", "quantization_utils.h", "reference_gemm.h", ], @@ -2528,6 +2572,22 @@ tf_kernel_library( ], ) +tf_cc_test( + name = "requantization_range_op_test", + size = "small", + srcs = ["requantization_range_op_test.cc"], + deps = [ + ":quantized_ops", + "//tensorflow/core:framework", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/kernels:ops_testutil", + "//tensorflow/core/kernels:ops_util", + ], +) + tf_cc_test( name = "quantize_down_and_shrink_range_op_test", size = "small", @@ -2621,6 +2681,7 @@ tf_cc_test( name = "quantized_conv_ops_test", size = "small", srcs = ["quantized_conv_ops_test.cc"], + tags = ["nomsan"], # http://b/32242946 deps = [ ":quantized_ops", "//tensorflow/core:array_ops_op_lib", @@ -2659,6 +2720,7 @@ tf_cc_test( name = "quantized_matmul_op_test", size = "small", srcs = ["quantized_matmul_op_test.cc"], + tags = ["nomsan"], # http://b/32242946 deps = [ ":quantized_ops", "//tensorflow/core:array_ops_op_lib", diff --git a/tensorflow/core/kernels/constant_op.cc b/tensorflow/core/kernels/constant_op.cc index a743be66124..dba37ca396d 100644 --- a/tensorflow/core/kernels/constant_op.cc +++ b/tensorflow/core/kernels/constant_op.cc @@ -209,6 +209,7 @@ TF_CALL_ALL_TYPES(REGISTER_CPU); #undef REGISTER_CPU #if GOOGLE_CUDA +REGISTER_KERNEL(bool, GPU); REGISTER_KERNEL(Eigen::half, GPU); REGISTER_KERNEL(float, GPU); REGISTER_KERNEL(double, GPU); diff --git a/tensorflow/core/kernels/constant_op_gpu.cu.cc b/tensorflow/core/kernels/constant_op_gpu.cu.cc index 29f39a72f39..f12cf3fe7fd 100644 --- a/tensorflow/core/kernels/constant_op_gpu.cu.cc +++ b/tensorflow/core/kernels/constant_op_gpu.cu.cc @@ -89,6 +89,7 @@ struct SetZeroFunctor { }; #define DEFINE_SETZERO_GPU(T) template struct SetZeroFunctor +DEFINE_SETZERO_GPU(bool); DEFINE_SETZERO_GPU(Eigen::half); DEFINE_SETZERO_GPU(float); DEFINE_SETZERO_GPU(double); diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h index e7d4c4778ea..572a729b34b 100644 --- a/tensorflow/core/kernels/cwise_ops.h +++ b/tensorflow/core/kernels/cwise_ops.h @@ -237,7 +237,7 @@ struct functor_traits> { }; // TODO(b/32239616): This kernel should be moved into Eigen and vectorized. -template +template struct google_floor_div { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(const T& x, const T& y) const { @@ -251,6 +251,15 @@ struct google_floor_div { } }; +template +struct google_floor_div< + T, typename std::enable_if::value>::type> { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const T operator()(const T& x, + const T& y) const { + return x / y; + } +}; + template struct functor_traits> { enum { diff --git a/tensorflow/core/kernels/dequantize_op.cc b/tensorflow/core/kernels/dequantize_op.cc index 375287000eb..c28909e03ba 100644 --- a/tensorflow/core/kernels/dequantize_op.cc +++ b/tensorflow/core/kernels/dequantize_op.cc @@ -17,11 +17,12 @@ limitations under the License. #define EIGEN_USE_THREADS -#include "tensorflow/core/kernels/quantization_utils.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/type_traits.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/meta_support.h" +#include "tensorflow/core/kernels/quantization_utils.h" #include "tensorflow/core/lib/core/errors.h" namespace { @@ -75,9 +76,15 @@ class DequantizeOp : public OpKernel { scale_factor) + min_range; } else if (mode_ == QUANTIZE_MODE_MIN_FIRST) { - QuantizedTensorToFloatInPlaceUsingEigen( - ctx->template eigen_device(), input, min_range, max_range, - output); + if (meta::IsSupportedAndEnabled() && std::is_same()) { + auto input_ui8_array = input.flat(); + meta::Dequantize(ctx, input_ui8_array.data(), input_ui8_array.size(), + min_range, max_range, output->flat().data()); + } else { + QuantizedTensorToFloatInPlaceUsingEigen( + ctx->template eigen_device(), input, min_range, max_range, + output); + } } } diff --git a/tensorflow/core/kernels/example_parsing_ops_test.cc b/tensorflow/core/kernels/example_parsing_ops_test.cc index 187d72685ec..67ac4777130 100644 --- a/tensorflow/core/kernels/example_parsing_ops_test.cc +++ b/tensorflow/core/kernels/example_parsing_ops_test.cc @@ -33,66 +33,83 @@ limitations under the License. namespace tensorflow { -typedef std::map, Tensor> ExampleTensorMap; +typedef std::map, Tensor> ExampleTensorMap; // Fillers to fill the underlying repeated array in protobuf. class BytesFiller { public: - BytesFiller() : dense_default(DT_STRING, TensorShape()) {} - void operator()(Feature* f) const { - f->mutable_bytes_list()->add_value("abcd1234abcd1234abcd1234abcd1234!"); + BytesFiller() {} + void operator()(Feature* f, int feature_size) const { + for (int i = 0; i < feature_size; ++i) { + f->mutable_bytes_list()->add_value("abcd1234abcd1234abcd1234abcd1234!"); + } + } + Tensor make_dense_default(int feature_size) { + return Tensor(dtype, TensorShape({feature_size})); } - Tensor dense_default; DataType dtype = DT_STRING; }; class Int64Filler { public: - Int64Filler() : dense_default(DT_INT64, TensorShape()) {} - void operator()(Feature* f) const { - f->mutable_int64_list()->add_value(1729); + Int64Filler() {} + void operator()(Feature* f, int feature_size) const { + for (int i = 0; i < feature_size; ++i) { + f->mutable_int64_list()->add_value(1729); + } + } + Tensor make_dense_default(int feature_size) { + return Tensor(dtype, TensorShape({feature_size})); } - Tensor dense_default; DataType dtype = DT_INT64; }; class FloatFiller { public: - FloatFiller() : dense_default(DT_FLOAT, TensorShape()) {} - void operator()(Feature* f) const { - f->mutable_float_list()->add_value(1.729); + FloatFiller() {} + void operator()(Feature* f, int feature_size) const { + for (int i = 0; i < feature_size; ++i) { + f->mutable_float_list()->add_value(1.729); + } + } + Tensor make_dense_default(int feature_size) { + return Tensor(dtype, TensorShape({feature_size})); } - Tensor dense_default; DataType dtype = DT_FLOAT; }; template struct ExampleStore { typedef T Filler; - static ExampleTensorMap GetSerializedExamples() { - ExampleTensorMap examples; - int keys[] = {10, 100, 1000}; - int batch_sizes[] = {128, 512}; + static void AddExample(ExampleTensorMap* examples, int num_keys, + int batch_size, int feature_size) { Example example; Filler fill; - for (int num_keys : keys) { - for (int batch_size : batch_sizes) { - Tensor record_string(DT_STRING, TensorShape({batch_size})); - auto string_t = record_string.vec(); - example.Clear(); - for (int b = 0; b < batch_size; ++b) { - for (int k = 0; k < num_keys; ++k) { - string k_str = strings::Printf("feature_%d", k); - Feature f; - fill(&f); - Features* features = example.mutable_features(); - (*features->mutable_feature())[k_str] = f; - } - CHECK(example.SerializeToString(&string_t(b))); - } - examples[std::make_pair(batch_size, num_keys)] = record_string; + Tensor record_string(DT_STRING, TensorShape({batch_size})); + auto string_t = record_string.vec(); + example.Clear(); + for (int b = 0; b < batch_size; ++b) { + for (int k = 0; k < num_keys; ++k) { + string k_str = strings::Printf("feature_%d", k); + Feature f; + fill(&f, feature_size); + Features* features = example.mutable_features(); + (*features->mutable_feature())[k_str] = f; } + CHECK(example.SerializeToString(&string_t(b))); } + (*examples)[std::make_tuple(batch_size, num_keys, feature_size)] = + record_string; + } + static ExampleTensorMap GetSerializedExamples() { + ExampleTensorMap examples; + AddExample(&examples, 10, 128, 1); + AddExample(&examples, 100, 128, 1); + AddExample(&examples, 1000, 128, 1); + AddExample(&examples, 10, 512, 1); + AddExample(&examples, 100, 512, 1); + AddExample(&examples, 1000, 512, 1); + AddExample(&examples, 1, 1, 1000000); return examples; } static ExampleTensorMap serialized_example; @@ -118,10 +135,10 @@ struct BenchmarkOptions { }; template -static Graph* ParseExample(int batch_size, int num_keys) { +static Graph* ParseExample(int batch_size, int num_keys, int feature_size) { Graph* g = new Graph(OpRegistry::Global()); - Tensor& serialized = - Options::Store::serialized_example[std::make_pair(batch_size, num_keys)]; + Tensor& serialized = Options::Store::serialized_example[std::make_tuple( + batch_size, num_keys, feature_size)]; Tensor names(DT_STRING, TensorShape({batch_size})); std::vector sparse_keys; @@ -135,9 +152,9 @@ static Graph* ParseExample(int batch_size, int num_keys) { key.scalar()() = strings::Printf("feature_%d", i); if (opt.benchmark_dense) { dense_keys.emplace_back(test::graph::Constant(g, key)); - dense_defaults.emplace_back( - test::graph::Constant(g, opt.filler.dense_default)); - dense_shapes.push_back(TensorShape()); + dense_defaults.emplace_back(test::graph::Constant( + g, opt.filler.make_dense_default(feature_size))); + dense_shapes.push_back(TensorShape({feature_size})); } else { sparse_keys.emplace_back(test::graph::Constant(g, key)); sparse_types.push_back(opt.filler.dtype); @@ -166,23 +183,25 @@ typedef BenchmarkOptions, true> DenseInt64; typedef BenchmarkOptions, false> SparseFloat; typedef BenchmarkOptions, true> DenseFloat; -// B == batch_size, K == num_keys. K must be one of 10, 100, 1000 -#define BM_ParseExample(TYPE, B, K) \ - static void BM_ParseExample##_##TYPE##_##B##_##K(int iters) { \ - int64 items_per_iter = static_cast(B) * K; \ +// B == batch_size, K == num_keys. F == feature_size. +// K must be one of 10, 100, 1000 +#define BM_ParseExample(TYPE, B, K, F) \ + static void BM_ParseExample##_##TYPE##_##B##_##K##_##F(int iters) { \ + int64 items_per_iter = static_cast(B) * K * F; \ testing::UseRealTime(); \ testing::ItemsProcessed(static_cast(iters) * items_per_iter); \ - test::Benchmark("cpu", ParseExample(B, K)).Run(iters); \ + test::Benchmark("cpu", ParseExample(B, K, F)).Run(iters); \ } \ - BENCHMARK(BM_ParseExample##_##TYPE##_##B##_##K); + BENCHMARK(BM_ParseExample##_##TYPE##_##B##_##K##_##F); -#define BM_AllParseExample(Type) \ - BM_ParseExample(Type, 128, 10); \ - BM_ParseExample(Type, 512, 10); \ - BM_ParseExample(Type, 128, 100); \ - BM_ParseExample(Type, 512, 100); \ - BM_ParseExample(Type, 128, 1000); \ - BM_ParseExample(Type, 512, 1000); +#define BM_AllParseExample(Type) \ + BM_ParseExample(Type, 128, 10, 1); \ + BM_ParseExample(Type, 512, 10, 1); \ + BM_ParseExample(Type, 128, 100, 1); \ + BM_ParseExample(Type, 512, 100, 1); \ + BM_ParseExample(Type, 128, 1000, 1); \ + BM_ParseExample(Type, 512, 1000, 1); \ + BM_ParseExample(Type, 1, 1, 1000000); BM_AllParseExample(SparseString); BM_AllParseExample(DenseString); diff --git a/tensorflow/core/kernels/fake_quant_ops.cc b/tensorflow/core/kernels/fake_quant_ops.cc new file mode 100644 index 00000000000..41f9c218437 --- /dev/null +++ b/tensorflow/core/kernels/fake_quant_ops.cc @@ -0,0 +1,580 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#define EIGEN_USE_THREADS + +#ifdef GOOGLE_CUDA +#define EIGEN_USE_GPU +#endif // GOOGLE_CUDA + +#define FAKE_QUANT_NO_DEBUG + +#include "tensorflow/core/kernels/fake_quant_ops_functor.h" + +#include "tensorflow/core/framework/numeric_op.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/platform/protobuf.h" + +using tensorflow::BinaryElementWiseOp; +using tensorflow::DEVICE_CPU; +#if GOOGLE_CUDA +using tensorflow::DEVICE_GPU; +#endif +using tensorflow::DT_BOOL; +using tensorflow::OpKernel; +using tensorflow::OpKernelConstruction; +using tensorflow::OpKernelContext; +using tensorflow::PersistentTensor; +using tensorflow::Tensor; +using tensorflow::TensorShape; +using tensorflow::TTypes; // NOLINT This is needed in CUDA mode, do not remove. +using tensorflow::UnaryElementWiseOp; +using tensorflow::errors::InvalidArgument; + +namespace tensorflow { + +typedef Eigen::ThreadPoolDevice CPUDevice; + +// ----------------------------------------------------------------------------- +// Implementation of FakeQuantWithMinMaxArgsOp, see its documentation in +// core/ops/array_ops.cc. +template +class FakeQuantWithMinMaxArgsOp + : public UnaryElementWiseOp> { + public: + typedef UnaryElementWiseOp> Base; + explicit FakeQuantWithMinMaxArgsOp(OpKernelConstruction* context) + : Base::UnaryElementWiseOp(context) { + OP_REQUIRES_OK(context, context->GetAttr("min", &min_)); + OP_REQUIRES_OK(context, context->GetAttr("max", &max_)); + OP_REQUIRES(context, min_ < max_, + InvalidArgument("min has to be smaller than max, was: ", min_, + " >= ", max_)); + } + + void Operate(OpKernelContext* context, const Tensor& input, Tensor* output) { + FakeQuantWithMinMaxArgsFunctor functor; + functor(context->eigen_device(), input.flat(), min_, max_, + output->flat()); + } + private: + float min_; + float max_; +}; + +// Implementation of FakeQuantWithMinMaxArgsGradientOp, see its documentation in +// core/ops/array_ops.cc. +template +class FakeQuantWithMinMaxArgsGradientOp + : public BinaryElementWiseOp> { + public: + typedef BinaryElementWiseOp> + Base; + explicit FakeQuantWithMinMaxArgsGradientOp(OpKernelConstruction* context) + : Base::BinaryElementWiseOp(context) { + OP_REQUIRES_OK(context, context->GetAttr("min", &min_)); + OP_REQUIRES_OK(context, context->GetAttr("max", &max_)); + OP_REQUIRES(context, min_ < max_, + InvalidArgument("min has to be smaller than max, was: ", min_, + " >= ", max_)); + } + + template + void Operate(OpKernelContext* context, const Tensor& gradient, + const Tensor& input, Tensor* output) { + OperateNoTemplate(context, gradient, input, output); + } + + void OperateNoTemplate(OpKernelContext* context, const Tensor& gradient, + const Tensor& input, Tensor* output) { + OP_REQUIRES(context, input.IsSameSize(gradient), + InvalidArgument("gradient and input must be the same size")); + FakeQuantWithMinMaxArgsGradientFunctor functor; + functor(context->eigen_device(), gradient.flat(), + input.flat(), min_, max_, output->flat()); + } + private: + float min_; + float max_; +}; + +REGISTER_KERNEL_BUILDER(Name("FakeQuantWithMinMaxArgs").Device(DEVICE_CPU), + FakeQuantWithMinMaxArgsOp); +REGISTER_KERNEL_BUILDER( + Name("FakeQuantWithMinMaxArgsGradient").Device(DEVICE_CPU), + FakeQuantWithMinMaxArgsGradientOp); + +#if GOOGLE_CUDA +typedef Eigen::GpuDevice GPUDevice; + +// Forward declarations for functor specializations for GPU. +template <> +void FakeQuantWithMinMaxArgsFunctor::operator()( + const GPUDevice& d, + typename TTypes::ConstFlat inputs, + const float min, const float max, + typename TTypes::Flat outputs); +extern template struct FakeQuantWithMinMaxArgsFunctor; +REGISTER_KERNEL_BUILDER(Name("FakeQuantWithMinMaxArgs").Device(DEVICE_GPU), + FakeQuantWithMinMaxArgsOp); + +template <> +void FakeQuantWithMinMaxArgsGradientFunctor::operator()( + const GPUDevice& d, + typename TTypes::ConstFlat gradients, + typename TTypes::ConstFlat inputs, + const float min, const float max, + typename TTypes::Flat backprops); +REGISTER_KERNEL_BUILDER( + Name("FakeQuantWithMinMaxArgsGradient").Device(DEVICE_GPU), + FakeQuantWithMinMaxArgsGradientOp); +#endif // GOOGLE_CUDA + +// ----------------------------------------------------------------------------- +// Implementation of FakeQuantWithMinMaxVarsOp, see its documentation in +// core/ops/array_ops.cc. +template +class FakeQuantWithMinMaxVarsOp : public OpKernel { + public: + explicit FakeQuantWithMinMaxVarsOp(OpKernelConstruction* context) + : OpKernel::OpKernel(context) { +#ifndef FAKE_QUANT_NO_DEBUG + OP_REQUIRES_OK(context, + context->allocate_persistent(DT_BOOL, {}, + &check_min_max_handle_, + nullptr)); +#endif + } + + void Compute(OpKernelContext* context) override { + CHECK_EQ(3, context->num_inputs()); + const Tensor& input = context->input(0); + const Tensor& min = context->input(1); + const Tensor& max = context->input(2); +#ifndef FAKE_QUANT_NO_DEBUG + Tensor* check_min_max = check_min_max_handle_.AccessTensor(context); +#endif + + Tensor* output; + OP_REQUIRES_OK(context, + context->allocate_output(0, input.shape(), &output)); + + FakeQuantWithMinMaxVarsFunctor functor; + functor(context->eigen_device(), input.flat(), + min.scalar(), max.scalar(), +#ifndef FAKE_QUANT_NO_DEBUG + check_min_max->scalar(), +#endif + output->flat()); + } + + private: +#ifndef FAKE_QUANT_NO_DEBUG + PersistentTensor check_min_max_handle_; +#endif +}; + +// Implementation of FakeQuantWithMinMaxVarsGradientOp, see its documentation in +// core/ops/array_ops.cc. +template +class FakeQuantWithMinMaxVarsGradientOp : public OpKernel { + public: + explicit FakeQuantWithMinMaxVarsGradientOp(OpKernelConstruction* context) + : OpKernel::OpKernel(context) { +#ifndef FAKE_QUANT_NO_DEBUG + OP_REQUIRES_OK(context, + context->allocate_persistent(DT_BOOL, {}, + &check_min_max_handle_, + nullptr)); +#endif + } + + void Compute(OpKernelContext* context) override { + CHECK_EQ(4, context->num_inputs()); + const Tensor& gradient = context->input(0); + const Tensor& input = context->input(1); + OP_REQUIRES(context, input.IsSameSize(gradient), + InvalidArgument("gradient and input must be the same size")); + const Tensor& min = context->input(2); + const Tensor& max = context->input(3); +#ifndef FAKE_QUANT_NO_DEBUG + Tensor* check_min_max = check_min_max_handle_.AccessTensor(context); +#endif + + Tensor* grad_wrt_input; + OP_REQUIRES_OK(context, + context->allocate_output(0, input.shape(), &grad_wrt_input)); + + TensorShape scalar_shape; + Tensor* grad_wrt_min; + OP_REQUIRES_OK(context, + context->allocate_output(1, scalar_shape, &grad_wrt_min)); + + Tensor* grad_wrt_max; + OP_REQUIRES_OK(context, + context->allocate_output(2, scalar_shape, &grad_wrt_max)); + + FakeQuantWithMinMaxVarsGradientFunctor functor; + functor(context->eigen_device(), gradient.flat(), + input.flat(), min.scalar(), max.scalar(), +#ifndef FAKE_QUANT_NO_DEBUG + check_min_max->scalar(), +#endif + grad_wrt_input->flat(), grad_wrt_min->scalar(), + grad_wrt_max->scalar()); + } + + private: +#ifndef FAKE_QUANT_NO_DEBUG + PersistentTensor check_min_max_handle_; +#endif +}; + +REGISTER_KERNEL_BUILDER(Name("FakeQuantWithMinMaxVars").Device(DEVICE_CPU), + FakeQuantWithMinMaxVarsOp); +REGISTER_KERNEL_BUILDER( + Name("FakeQuantWithMinMaxVarsGradient").Device(DEVICE_CPU), + FakeQuantWithMinMaxVarsGradientOp); + +#if GOOGLE_CUDA +template <> +void FakeQuantWithMinMaxVarsFunctor::operator()( + const GPUDevice& d, + typename TTypes::ConstFlat inputs, + typename TTypes::ConstScalar min, + typename TTypes::ConstScalar max, +#ifndef FAKE_QUANT_NO_DEBUG + typename TTypes::Scalar check_min_max, +#endif + typename TTypes::Flat output); +extern template struct FakeQuantWithMinMaxVarsFunctor; +REGISTER_KERNEL_BUILDER(Name("FakeQuantWithMinMaxVars") + .Device(DEVICE_GPU) + .HostMemory("min") + .HostMemory("max"), + FakeQuantWithMinMaxVarsOp); + +template <> +void FakeQuantWithMinMaxVarsGradientFunctor::operator()( + const GPUDevice& d, + typename TTypes::ConstFlat gradients, + typename TTypes::ConstFlat inputs, + typename TTypes::ConstScalar min, + typename TTypes::ConstScalar max, +#ifndef FAKE_QUANT_NO_DEBUG + typename TTypes::Scalar check_min_max, +#endif + typename TTypes::Flat backprops_wrt_input, + typename TTypes::Scalar backprop_wrt_min, + typename TTypes::Scalar backprop_wrt_max); +extern template struct FakeQuantWithMinMaxVarsGradientFunctor; +REGISTER_KERNEL_BUILDER(Name("FakeQuantWithMinMaxVarsGradient") + .Device(DEVICE_GPU) + .HostMemory("min") + .HostMemory("max"), + FakeQuantWithMinMaxVarsGradientOp); +#endif // GOOGLE_CUDA + +// ----------------------------------------------------------------------------- +// Implementation of FakeQuantWithMinMaxVarsPerChannelOp, see its documentation +// in core/ops/array_ops.cc. +template +class FakeQuantWithMinMaxVarsPerChannelOp : public OpKernel { + public: + explicit FakeQuantWithMinMaxVarsPerChannelOp(OpKernelConstruction* context) + : OpKernel::OpKernel(context) { +#ifndef FAKE_QUANT_NO_DEBUG + OP_REQUIRES_OK(context, + context->allocate_persistent(DT_BOOL, {}, + &check_min_max_handle_, + nullptr)); +#endif + } + + void Compute(OpKernelContext* context) override { + CHECK_EQ(3, context->num_inputs()); + const Tensor& input = context->input(0); + const int depth = input.dim_size(input.dims() - 1); // last dimension size. + const Tensor& min = context->input(1); + OP_REQUIRES(context, min.dim_size(0) == depth, + InvalidArgument("min has incorrect size, expected ", depth, + " was ", min.dim_size(0))); + const Tensor& max = context->input(2); + OP_REQUIRES(context, max.dim_size(0) == depth, + InvalidArgument("max has incorrect size, expected ", depth, + " was ", max.dim_size(0))); +#ifndef FAKE_QUANT_NO_DEBUG + Tensor* check_min_max = check_min_max_handle_.AccessTensor(context); +#endif + + Tensor* output; + OP_REQUIRES_OK(context, + context->allocate_output(0, input.shape(), &output)); + + switch (input.dims()) { + case 4: { + FakeQuant4WithMinMaxVarsPerChannelFunctor functor; + functor(context->eigen_device(), input.dim_size(0), + input.dim_size(1), input.dim_size(2), input.dim_size(3), + input.flat(), min.vec(), max.vec(), +#ifndef FAKE_QUANT_NO_DEBUG + check_min_max->scalar(), +#endif + output->flat()); + break; + } + case 2: { + FakeQuant2WithMinMaxVarsPerChannelFunctor functor; + functor(context->eigen_device(), + input.dim_size(0), input.dim_size(1), + input.flat(), min.vec(), max.vec(), +#ifndef FAKE_QUANT_NO_DEBUG + check_min_max->scalar(), +#endif + output->flat()); + break; + } + case 1: { + FakeQuant1WithMinMaxVarsPerChannelFunctor functor; + functor(context->eigen_device(), + input.vec(), min.vec(), max.vec(), +#ifndef FAKE_QUANT_NO_DEBUG + check_min_max->scalar(), +#endif + output->vec()); + break; + } + default: + context->SetStatus(InvalidArgument("Only inputs of dimensions 1, 2 or " + "4 supported, was: ", input.dims())); + break; + } + } + + private: +#ifndef FAKE_QUANT_NO_DEBUG + PersistentTensor check_min_max_handle_; +#endif +}; + +// Implementation of FakeQuantWithMinMaxVarsPerChannelGradientOp, see its +// documentation in core/ops/array_ops.cc. +template +class FakeQuantWithMinMaxVarsPerChannelGradientOp : public OpKernel { + public: + explicit FakeQuantWithMinMaxVarsPerChannelGradientOp( + OpKernelConstruction* context) : OpKernel::OpKernel(context) { +#ifndef FAKE_QUANT_NO_DEBUG + OP_REQUIRES_OK(context, + context->allocate_persistent(DT_BOOL, {}, + &check_min_max_handle_, + nullptr)); +#endif + } + + void Compute(OpKernelContext* context) override { + CHECK_EQ(4, context->num_inputs()); + const Tensor& gradient = context->input(0); + const Tensor& input = context->input(1); + OP_REQUIRES(context, input.IsSameSize(gradient), + InvalidArgument("gradient and input must be the same size")); + const int depth = input.dim_size(input.dims() - 1); // last dimension size. + const Tensor& min = context->input(2); + OP_REQUIRES(context, min.dim_size(0) == depth, + InvalidArgument("min has incorrect size, expected ", depth, + " was ", min.dim_size(0))); + const Tensor& max = context->input(3); + OP_REQUIRES(context, max.dim_size(0) == depth, + InvalidArgument("max has incorrect size, expected ", depth, + " was ", max.dim_size(0))); +#ifndef FAKE_QUANT_NO_DEBUG + Tensor* check_min_max = check_min_max_handle_.AccessTensor(context); +#endif + + Tensor* grad_wrt_input; + OP_REQUIRES_OK(context, + context->allocate_output(0, input.shape(), &grad_wrt_input)); + + TensorShape min_max_shape({input.dim_size(input.dims() - 1)}); + Tensor* grad_wrt_min; + OP_REQUIRES_OK(context, + context->allocate_output(1, min_max_shape, &grad_wrt_min)); + + Tensor* grad_wrt_max; + OP_REQUIRES_OK(context, + context->allocate_output(2, min_max_shape, &grad_wrt_max)); + + switch (input.dims()) { + case 4: { + FakeQuant4WithMinMaxVarsPerChannelGradientFunctor functor; + functor(context->eigen_device(), input.dim_size(0), + input.dim_size(1), input.dim_size(2), input.dim_size(3), + gradient.flat(), input.flat(), + min.vec(), max.vec(), +#ifndef FAKE_QUANT_NO_DEBUG + check_min_max->scalar(), +#endif + grad_wrt_input->flat(), + grad_wrt_min->vec(), grad_wrt_max->vec()); + break; + } + case 2: { + FakeQuant2WithMinMaxVarsPerChannelGradientFunctor functor; + functor(context->eigen_device(), + input.dim_size(0), input.dim_size(1), + gradient.flat(), input.flat(), + min.vec(), max.vec(), +#ifndef FAKE_QUANT_NO_DEBUG + check_min_max->scalar(), +#endif + grad_wrt_input->flat(), + grad_wrt_min->vec(), grad_wrt_max->vec()); + break; + } + case 1: { + FakeQuant1WithMinMaxVarsPerChannelGradientFunctor functor; + functor(context->eigen_device(), + gradient.vec(), input.vec(), + min.vec(), max.vec(), +#ifndef FAKE_QUANT_NO_DEBUG + check_min_max->scalar(), +#endif + grad_wrt_input->vec(), + grad_wrt_min->vec(), grad_wrt_max->vec()); + break; + } + default: + context->SetStatus(InvalidArgument("Only inputs of dimensions 1, 2 or " + "4 supported, was: ", input.dims())); + break; + } + } + + private: +#ifndef FAKE_QUANT_NO_DEBUG + PersistentTensor check_min_max_handle_; +#endif +}; + +REGISTER_KERNEL_BUILDER(Name("FakeQuantWithMinMaxVarsPerChannel") + .Device(DEVICE_CPU), + FakeQuantWithMinMaxVarsPerChannelOp); +REGISTER_KERNEL_BUILDER(Name("FakeQuantWithMinMaxVarsPerChannelGradient") + .Device(DEVICE_CPU), + FakeQuantWithMinMaxVarsPerChannelGradientOp); + +#if GOOGLE_CUDA +template <> +void FakeQuant1WithMinMaxVarsPerChannelFunctor::operator()( + const GPUDevice& d, + typename TTypes::ConstVec inputs, + typename TTypes::ConstVec min, + typename TTypes::ConstVec max, +#ifndef FAKE_QUANT_NO_DEBUG + typename TTypes::Scalar check_min_max, +#endif + typename TTypes::Vec outputs); +extern template struct FakeQuant1WithMinMaxVarsPerChannelFunctor; + +template <> +void FakeQuant2WithMinMaxVarsPerChannelFunctor::operator()( + const GPUDevice& d, const Index batch_size, const Index depth, + typename TTypes::ConstFlat inputs, + typename TTypes::ConstFlat min, + typename TTypes::ConstFlat max, +#ifndef FAKE_QUANT_NO_DEBUG + typename TTypes::Scalar check_min_max, +#endif + typename TTypes::Flat outputs); +extern template struct FakeQuant2WithMinMaxVarsPerChannelFunctor; + +template <> +void FakeQuant4WithMinMaxVarsPerChannelFunctor::operator()( + const GPUDevice& d, const Index batch_size, const Index height, + const Index width, const Index depth, + typename TTypes::ConstFlat inputs, + typename TTypes::ConstFlat min, + typename TTypes::ConstFlat max, +#ifndef FAKE_QUANT_NO_DEBUG + typename TTypes::Scalar check_min_max, +#endif + typename TTypes::Flat outputs); +extern template struct FakeQuant4WithMinMaxVarsPerChannelFunctor; + +REGISTER_KERNEL_BUILDER(Name("FakeQuantWithMinMaxVarsPerChannel") + .Device(DEVICE_GPU) + .HostMemory("min") + .HostMemory("max"), + FakeQuantWithMinMaxVarsPerChannelOp); + +template <> +void FakeQuant1WithMinMaxVarsPerChannelGradientFunctor::operator()( + const GPUDevice& d, + typename TTypes::ConstVec gradients, + typename TTypes::ConstVec inputs, + typename TTypes::ConstVec min, + typename TTypes::ConstVec max, +#ifndef FAKE_QUANT_NO_DEBUG + typename TTypes::Scalar check_min_max, +#endif + typename TTypes::Vec backprops_wrt_input, + typename TTypes::Vec backprop_wrt_min, + typename TTypes::Vec backprop_wrt_max); +extern template struct + FakeQuant1WithMinMaxVarsPerChannelGradientFunctor; + +template <> +void FakeQuant2WithMinMaxVarsPerChannelGradientFunctor::operator()( + const GPUDevice& d, const Index batch_size, const Index depth, + typename TTypes::ConstFlat gradients, + typename TTypes::ConstFlat inputs, + typename TTypes::ConstVec min, + typename TTypes::ConstVec max, +#ifndef FAKE_QUANT_NO_DEBUG + typename TTypes::Scalar check_min_max, +#endif + typename TTypes::Flat backprops_wrt_input, + typename TTypes::Vec backprop_wrt_min, + typename TTypes::Vec backprop_wrt_max); +extern template struct + FakeQuant2WithMinMaxVarsPerChannelGradientFunctor; + +template <> +void FakeQuant4WithMinMaxVarsPerChannelGradientFunctor::operator()( + const GPUDevice& d, const Index batch_size, const Index height, + const Index width, const Index depth, + typename TTypes::ConstFlat gradients, + typename TTypes::ConstFlat inputs, + typename TTypes::ConstVec min, + typename TTypes::ConstVec max, +#ifndef FAKE_QUANT_NO_DEBUG + typename TTypes::Scalar check_min_max, +#endif + typename TTypes::Flat backprops_wrt_input, + typename TTypes::Vec backprop_wrt_min, + typename TTypes::Vec backprop_wrt_max); +extern template struct + FakeQuant4WithMinMaxVarsPerChannelGradientFunctor; + +REGISTER_KERNEL_BUILDER(Name("FakeQuantWithMinMaxVarsPerChannelGradient") + .Device(DEVICE_GPU) + .HostMemory("min") + .HostMemory("max"), + FakeQuantWithMinMaxVarsPerChannelGradientOp); +#endif // GOOGLE_CUDA + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/fake_quant_ops_functor.h b/tensorflow/core/kernels/fake_quant_ops_functor.h new file mode 100644 index 00000000000..d3f600cd824 --- /dev/null +++ b/tensorflow/core/kernels/fake_quant_ops_functor.h @@ -0,0 +1,434 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_FAKE_QUANT_FUNCTOR_H_ +#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_FAKE_QUANT_FUNCTOR_H_ + +#include + +#define EIGEN_STACK_ALLOCATION_LIMIT 0 +#define EIGEN_USE_THREADS +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { + +static constexpr int kSteps = 255; +static constexpr float kStepsFloat = static_cast(kSteps); + +// Gymnastics with nudged zero point is to ensure that real zero maps to +// an integer, which is required for e.g. zero-padding in convolutional layers. +// Returns (nudged_min, nudged_max, nudged_scale). +template +std::tuple Nudge(const float min, const float max) { + const float scale = (max - min) / (kStepsFloat - 0.0f); + const float zero_point_from_min = 0.0f - min / scale; + const uint8 nudged_zero_point = [zero_point_from_min] { + if (zero_point_from_min < 0.0f) { + return static_cast(0); + } else if (zero_point_from_min > kStepsFloat) { + return static_cast(kSteps); + } else { + return static_cast(std::round(zero_point_from_min)); + } + }(); + + const float nudged_min = (0.0f - nudged_zero_point) * scale; + const float nudged_max = (kStepsFloat - nudged_zero_point) * scale; + return std::make_tuple(nudged_min, nudged_max, scale); +} + +template using ConstScalar = + typename tensorflow::TTypes::ConstScalar; +template using Scalar = typename tensorflow::TTypes::Scalar; +template using ConstVec = typename tensorflow::TTypes::ConstVec; +template using Vec = typename tensorflow::TTypes::Vec; +template using ConstFlat = + typename tensorflow::TTypes::ConstFlat; +template using Flat = typename tensorflow::TTypes::Flat; + +// Functor called by FakeQuantWithMinMaxArgsOp to do the work. Compiles both +// for CPU and GPU. +template +struct FakeQuantWithMinMaxArgsFunctor { + void operator()(const Device& d, ConstFlat inputs, + const float min, const float max, Flat outputs) { + eigen_assert(min <= 0.0f && "min should be <= 0.0"); + eigen_assert(max >= 0.0f && "max should be >= 0.0"); + eigen_assert(min < max && "min should be < max"); + + float nudged_min, nudged_max, nudged_scale; + std::tie(nudged_min, nudged_max, nudged_scale) = Nudge(min, max); + const float inv_nudged_scale = 1.0f / nudged_scale; + + auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min); + auto clamped_shifted = clamped - nudged_min; + outputs.device(d) = (clamped_shifted * inv_nudged_scale + 0.5f).floor() * + nudged_scale + nudged_min; + } +}; + +// Functor called by FakeQuantWithMinMaxArgsGradientOp to do the work. Compiles +// both for CPU and GPU. +template +struct FakeQuantWithMinMaxArgsGradientFunctor { + void operator()(const Device& d, ConstFlat gradients, + ConstFlat inputs, const float min, const float max, + Flat backprops) { + eigen_assert(min <= 0.0f && "min should be <= 0.0"); + eigen_assert(max >= 0.0f && "max should be >= 0.0"); + eigen_assert(min < max && "min should be < max"); + + float nudged_min, nudged_max, nudged_scale; + std::tie(nudged_min, nudged_max, nudged_scale) = Nudge(min, max); + + auto between_nudged_min_max = (inputs >= nudged_min && inputs <= nudged_max) + .select(inputs.constant(1.0f), inputs.constant(0.0f)); + backprops.device(d) = gradients * between_nudged_min_max; + } +}; + +// Functor called by FakeQuantWithMinMaxVarsOp to do the work. Compiles both +// for CPU and GPU. +template +struct FakeQuantWithMinMaxVarsFunctor { + void operator()(const Device& d, ConstFlat inputs, + ConstScalar min, ConstScalar max, +#ifndef FAKE_QUANT_NO_DEBUG + Scalar check_min_max, +#endif + Flat outputs) { +#ifndef FAKE_QUANT_NO_DEBUG + check_min_max.device(d) = (min <= 0.0f).all(); + eigen_assert(check_min_max() && "min should be <= 0.0 coeff-wise"); + check_min_max.device(d) = (max >= 0.0f).all(); + eigen_assert(check_min_max() >= 0.0f && "max should be >= 0.0 coeff-wise"); + check_min_max.device(d) = (min < max).all(); + eigen_assert(check_min_max() && "min should be < max coeff-wise"); +#endif + + float nudged_min, nudged_max, nudged_scale; + std::tie(nudged_min, nudged_max, nudged_scale) = + Nudge(min(), max()); + const auto nudged_scale_repl = inputs.constant(nudged_scale); + + const auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min); + const auto clamped_shifted = clamped - nudged_min; + outputs.device(d) = (clamped_shifted / nudged_scale_repl + 0.5f).floor() * + nudged_scale_repl + nudged_min; + } +}; + +// Functor called by FakeQuantWithMinMaxVarsGradientOp to do the work. Compiles +// both for CPU and GPU. +template +struct FakeQuantWithMinMaxVarsGradientFunctor { + void operator()(const Device& d, + ConstFlat gradients, ConstFlat inputs, + ConstScalar min, ConstScalar max, +#ifndef FAKE_QUANT_NO_DEBUG + Scalar check_min_max, +#endif + Flat backprops_wrt_input, + Scalar backprop_wrt_min, + Scalar backprop_wrt_max) { +#ifndef FAKE_QUANT_NO_DEBUG + check_min_max.device(d) = (min <= 0.0f).all(); + eigen_assert(check_min_max() && "min should be <= 0.0 coeff-wise"); + check_min_max.device(d) = (max >= 0.0f).all(); + eigen_assert(check_min_max() >= 0.0f && "max should be >= 0.0 coeff-wise"); + check_min_max.device(d) = (min < max).all(); + eigen_assert(check_min_max() && "min should be < max coeff-wise"); +#endif + + float nudged_min, nudged_max, nudged_scale; + std::tie(nudged_min, nudged_max, nudged_scale) = + Nudge(min(), max()); + + const auto between_min_max = (inputs >= nudged_min && inputs <= nudged_max) + .select(inputs.constant(1.0f), inputs.constant(0.0f)); + backprops_wrt_input.device(d) = gradients * between_min_max; + + const auto below_min = (inputs < nudged_min) + .select(inputs.constant(1.0f), inputs.constant(0.0f)); + backprop_wrt_min.device(d) = (gradients * below_min).sum(); + + const auto above_max = (inputs > nudged_max) + .select(inputs.constant(1.0f), inputs.constant(0.0f)); + backprop_wrt_max.device(d) = (gradients * above_max).sum(); + } +}; + +using Index = typename tensorflow::TTypes::ConstTensor::Index; + +// Functor called by FakeQuantWithMinMaxVarsPerChannelOp to do the work. +// Compiles both for CPU and GPU. +// +// Already verified: inputs, outputs, min, max are of shape [d]. +template +struct FakeQuant1WithMinMaxVarsPerChannelFunctor { + void operator()(const Device& d, ConstVec inputs, + ConstVec min, ConstVec max, +#ifndef FAKE_QUANT_NO_DEBUG + Scalar check_min_max, +#endif + Vec outputs) { +#ifndef FAKE_QUANT_NO_DEBUG + check_min_max.device(d) = (min <= 0.0f).all(); + eigen_assert(check_min_max() && "min should be <= 0.0 coeff-wise"); + check_min_max.device(d) = (max >= 0.0f).all(); + eigen_assert(check_min_max() >= 0.0f && "max should be >= 0.0 coeff-wise"); + check_min_max.device(d) = (min < max).all(); + eigen_assert(check_min_max() && "min should be < max coeff-wise"); +#endif + + for (Index i = 0; i < min.size(); ++i) { + float nudged_min, nudged_max, nudged_scale; + std::tie(nudged_min, nudged_max, nudged_scale) = + Nudge(min(i), max(i)); + const float clamped = + std::max(std::min(inputs(i), nudged_max), nudged_min); + const float clamped_shifted = clamped - nudged_min; + + outputs(i) = std::round(clamped_shifted / nudged_scale) * nudged_scale + + nudged_min; + } + } +}; + +// Already verified: inputs, outputs are of shape [b, d], min, max are of shape +// [d]. +template +struct FakeQuant2WithMinMaxVarsPerChannelFunctor { + void operator()(const Device& d, const Index batch_size, const Index depth, + ConstFlat inputs, + ConstVec min, ConstVec max, +#ifndef FAKE_QUANT_NO_DEBUG + Scalar check_min_max, +#endif + Flat outputs) { +#ifndef FAKE_QUANT_NO_DEBUG + check_min_max.device(d) = (min <= 0.0f).all(); + eigen_assert(check_min_max() && "min should be <= 0.0 coeff-wise"); + check_min_max.device(d) = (max >= 0.0f).all(); + eigen_assert(check_min_max() >= 0.0f && "max should be >= 0.0 coeff-wise"); + check_min_max.device(d) = (min < max).all(); + eigen_assert(check_min_max() && "min should be < max coeff-wise"); +#endif + + Eigen::DSizes restored(batch_size, depth); + const auto inputs_restored = inputs.reshape(restored); + for (Index i = 0; i < min.size(); ++i) { + float nudged_min, nudged_max, nudged_scale; + std::tie(nudged_min, nudged_max, nudged_scale) = + Nudge(min(i), max(i)); + const auto clamped = inputs_restored.chip<1>(i) + .cwiseMin(nudged_max).cwiseMax(nudged_min); + const auto clamped_shifted = clamped - nudged_min; + + outputs.reshape(restored).chip<1>(i).device(d) = + (clamped_shifted / nudged_scale + 0.5f).floor() * nudged_scale + + nudged_min; + } + } +}; + +// Already verified: inputs, outputs are of shape [b, h, w, d], min, max are +// of shape [d]. +template +struct FakeQuant4WithMinMaxVarsPerChannelFunctor { + void operator()(const Device& d, const Index batch_size, const Index height, + const Index width, const Index depth, + ConstFlat inputs, + ConstVec min, ConstVec max, +#ifndef FAKE_QUANT_NO_DEBUG + Scalar check_min_max, +#endif + Flat outputs) { +#ifndef FAKE_QUANT_NO_DEBUG + check_min_max.device(d) = (min <= 0.0f).all(); + eigen_assert(check_min_max() && "min should be <= 0.0 coeff-wise"); + check_min_max.device(d) = (max >= 0.0f).all(); + eigen_assert(check_min_max() >= 0.0f && "max should be >= 0.0 coeff-wise"); + check_min_max.device(d) = (min < max).all(); + eigen_assert(check_min_max() && "min should be < max coeff-wise"); +#endif + + Eigen::DSizes restored(batch_size, height, width, depth); + const auto inputs_restored = inputs.reshape(restored); + for (Index i = 0; i < min.size(); ++i) { + float nudged_min, nudged_max, nudged_scale; + std::tie(nudged_min, nudged_max, nudged_scale) = + Nudge(min(i), max(i)); + const auto clamped = inputs_restored.chip<3>(i) + .cwiseMin(nudged_max).cwiseMax(nudged_min); + const auto clamped_shifted = clamped - nudged_min; + + outputs.reshape(restored).chip<3>(i).device(d) = + (clamped_shifted / nudged_scale + 0.5f).floor() * nudged_scale + + nudged_min; + } + } +}; + +// Functor called by FakeQuantWithMinMaxVarsPerChannelGradientOp to do the work. +// Compiles both for CPU and GPU. +// +// Already verified: gradients, inputs, outputs, min, max, backprops_wrt_input, +// backprop_wrt_min, backprop_wrt_max are of shape [d]. +template +struct FakeQuant1WithMinMaxVarsPerChannelGradientFunctor { + void operator()(const Device& d, + ConstVec gradients, ConstVec inputs, + ConstVec min, ConstVec max, +#ifndef FAKE_QUANT_NO_DEBUG + Scalar check_min_max, +#endif + Vec backprops_wrt_input, Vec backprop_wrt_min, + Vec backprop_wrt_max) { +#ifndef FAKE_QUANT_NO_DEBUG + check_min_max.device(d) = (min <= 0.0f).all(); + eigen_assert(check_min_max() && "min should be <= 0.0 coeff-wise"); + check_min_max.device(d) = (max >= 0.0f).all(); + eigen_assert(check_min_max() >= 0.0f && "max should be >= 0.0 coeff-wise"); + check_min_max.device(d) = (min < max).all(); + eigen_assert(check_min_max() && "min should be < max coeff-wise"); +#endif + + for (Index i = 0; i < min.size(); ++i) { + float nudged_min, nudged_max, nudged_scale; + std::tie(nudged_min, nudged_max, nudged_scale) = + Nudge(min(i), max(i)); + + const bool between_min_max = + inputs(i) >= nudged_min && inputs(i) <= nudged_max; + backprops_wrt_input(i) = between_min_max ? gradients(i) : 0.0f; + + const bool below_min = inputs(i) < nudged_min; + backprop_wrt_min(i) = below_min ? gradients(i) : 0.0f; + + const bool above_max = inputs(i) > nudged_max; + backprop_wrt_max(i) = above_max ? gradients(i) : 0.0f; + } + } +}; + +// Already verified: gradients, inputs, backprops_wrt_input are of shape [b, d], +// min, max, backprop_wrt_min, backprop_wrt_max are of shape [d]. +template +struct FakeQuant2WithMinMaxVarsPerChannelGradientFunctor { + void operator()(const Device& d, const Index batch_size, const Index depth, + ConstFlat gradients, ConstFlat inputs, + ConstVec min, ConstVec max, +#ifndef FAKE_QUANT_NO_DEBUG + Scalar check_min_max, +#endif + Flat backprops_wrt_input, + Vec backprop_wrt_min, Vec backprop_wrt_max) { +#ifndef FAKE_QUANT_NO_DEBUG + check_min_max.device(d) = (min <= 0.0f).all(); + eigen_assert(check_min_max() && "min should be <= 0.0 coeff-wise"); + check_min_max.device(d) = (max >= 0.0f).all(); + eigen_assert(check_min_max() >= 0.0f && "max should be >= 0.0 coeff-wise"); + check_min_max.device(d) = (min < max).all(); + eigen_assert(check_min_max() && "min should be < max coeff-wise"); +#endif + + Eigen::DSizes restored(batch_size, depth); + const auto gradients_restored = gradients.reshape(restored); + const auto inputs_restored = inputs.reshape(restored); + for (Index i = 0; i < min.size(); ++i) { + float nudged_min, nudged_max, nudged_scale; + std::tie(nudged_min, nudged_max, nudged_scale) = + Nudge(min(i), max(i)); + const auto gradients_chip = gradients_restored.chip<1>(i); + const auto inputs_chip = inputs_restored.chip<1>(i); + + const auto between_min_max = + (inputs_chip >= nudged_min && inputs_chip <= nudged_max) + .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f)); + backprops_wrt_input.reshape(restored).chip<1>(i).device(d) = + gradients_chip * between_min_max; + + const auto below_min = (inputs_chip < nudged_min) + .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f)); + Eigen::DSizes reduce(0); + backprop_wrt_min.chip<0>(i).device(d) = + (gradients_chip * below_min).sum(reduce); + + const auto above_max = (inputs_chip > nudged_max) + .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f)); + backprop_wrt_max.chip<0>(i).device(d) = + (gradients_chip * above_max).sum(reduce); + } + } +}; + +// Already verified: gradients, inputs, backprops_wrt_input are of shape +// [b, h, w, d], min, max, backprop_wrt_min, backprop_wrt_max are of shape [d]. +template +struct FakeQuant4WithMinMaxVarsPerChannelGradientFunctor { + void operator()(const Device& d, const Index batch_size, const Index height, + const Index width, const Index depth, + ConstFlat gradients, ConstFlat inputs, + ConstVec min, ConstVec max, +#ifndef FAKE_QUANT_NO_DEBUG + Scalar check_min_max, +#endif + Flat backprops_wrt_input, + Vec backprop_wrt_min, Vec backprop_wrt_max) { +#ifndef FAKE_QUANT_NO_DEBUG + check_min_max.device(d) = (min <= 0.0f).all(); + eigen_assert(check_min_max() && "min should be <= 0.0 coeff-wise"); + check_min_max.device(d) = (max >= 0.0f).all(); + eigen_assert(check_min_max() >= 0.0f && "max should be >= 0.0 coeff-wise"); + check_min_max.device(d) = (min < max).all(); + eigen_assert(check_min_max() && "min should be < max coeff-wise"); +#endif + + Eigen::DSizes restored(batch_size, height, width, depth); + const auto gradients_restored = gradients.reshape(restored); + const auto inputs_restored = inputs.reshape(restored); + for (Index i = 0; i < min.size(); ++i) { + float nudged_min, nudged_max, nudged_scale; + std::tie(nudged_min, nudged_max, nudged_scale) = + Nudge(min(i), max(i)); + const auto gradients_chip = gradients_restored.chip<3>(i); + const auto inputs_chip = inputs_restored.chip<3>(i); + + const auto between_min_max = + (inputs_chip >= nudged_min && inputs_chip <= nudged_max) + .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f)); + backprops_wrt_input.reshape(restored).chip<3>(i).device(d) = + gradients_chip * between_min_max; + + const auto below_min = (inputs_chip < nudged_min) + .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f)); + Eigen::DSizes reduce(0, 1, 2); + backprop_wrt_min.chip<0>(i).device(d) = + (gradients_chip * below_min).sum(reduce); + + const auto above_max = (inputs_chip > nudged_max) + .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f)); + backprop_wrt_max.chip<0>(i).device(d) = + (gradients_chip * above_max).sum(reduce); + } + } +}; + +} // namespace tensorflow + +#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_FAKE_QUANT_FUNCTOR_H_ diff --git a/tensorflow/core/kernels/fake_quant_ops_gpu.cu.cc b/tensorflow/core/kernels/fake_quant_ops_gpu.cu.cc new file mode 100644 index 00000000000..ad327937877 --- /dev/null +++ b/tensorflow/core/kernels/fake_quant_ops_gpu.cu.cc @@ -0,0 +1,41 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#if GOOGLE_CUDA + +#define FAKE_QUANT_NO_DEBUG + +#define EIGEN_USE_GPU +#include "tensorflow/core/kernels/fake_quant_ops_functor.h" + +namespace tensorflow { + +typedef Eigen::GpuDevice GPUDevice; + +// Just instantiate GPU functor implementations. +template struct FakeQuantWithMinMaxArgsFunctor; +template struct FakeQuantWithMinMaxArgsGradientFunctor; +template struct FakeQuantWithMinMaxVarsFunctor; +template struct FakeQuantWithMinMaxVarsGradientFunctor; +template struct FakeQuant1WithMinMaxVarsPerChannelFunctor; +template struct FakeQuant2WithMinMaxVarsPerChannelFunctor; +template struct FakeQuant4WithMinMaxVarsPerChannelFunctor; +template struct FakeQuant1WithMinMaxVarsPerChannelGradientFunctor; +template struct FakeQuant2WithMinMaxVarsPerChannelGradientFunctor; +template struct FakeQuant4WithMinMaxVarsPerChannelGradientFunctor; + +} // namespace tensorflow + +#endif // GOOGLE_CUDA diff --git a/tensorflow/core/kernels/fake_quant_ops_test.cc b/tensorflow/core/kernels/fake_quant_ops_test.cc new file mode 100644 index 00000000000..38ad345f0d3 --- /dev/null +++ b/tensorflow/core/kernels/fake_quant_ops_test.cc @@ -0,0 +1,821 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/kernels/ops_testutil.h" + +namespace tensorflow { + +using tensorflow::AllocatorAttributes; +using tensorflow::DT_FLOAT; +using tensorflow::NodeDefBuilder; +using tensorflow::OpsTestBase; +using tensorflow::Tensor; +using tensorflow::TensorShape; +using tensorflow::test::ExpectClose; +using tensorflow::test::FillValues; + +class QuantOpsTest : public OpsTestBase { + protected: + void AddRandomInput(const TensorShape& shape) { + CHECK_GT(input_types_.size(), inputs_.size()) + << "Adding more inputs than types; perhaps you need to call MakeOp"; + Tensor* input = new Tensor(device_->GetAllocator(AllocatorAttributes()), + DT_FLOAT, shape); + input->flat().setRandom(); + tensors_.push_back(input); + bool is_ref = IsRefType(input_types_[inputs_.size()]); + if (is_ref) { + CHECK_EQ(RemoveRefType(input_types_[inputs_.size()]), DT_FLOAT); + inputs_.push_back({&lock_for_refs_, input}); + } else { + CHECK_EQ(input_types_[inputs_.size()], DT_FLOAT); + inputs_.push_back({nullptr, input}); + } + } +}; + +TEST_F(QuantOpsTest, WithArgsNoNudging) { + // Original quantization range: [-10 + 0 / 4, -10 + 255 / 4], scale: 1/4. + // Original zero point: 40, no nudging necessary. + // Expected quantized values: -10.0, -10.25, ..., 53.75. + TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxArgs") + .Input(FakeInput(DT_FLOAT)) // inputs + .Attr("min", -10.0f) + .Attr("max", 53.75f) + .Finalize(node_def())); + TF_EXPECT_OK(InitOp()); + // Downstream inputs. + AddInputFromArray(TensorShape({2, 3}), + {-10.1f, -10.0f, -9.9f, -9.75f, 53.75f, 53.8f}); + + // Tested code. + TF_ASSERT_OK(RunOpKernel()); + + Tensor* output = GetOutput(0); + Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3})); + FillValues(&expected, + {-10.0f, -10.0f, -10.0f, -9.75f, 53.75f, 53.75f}); + ExpectClose(expected, *output); +} + +TEST_F(QuantOpsTest, WithArgsNudgedZeroIs0) { + // Original quantization range: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 255 / 4]. + // Scale: 1/4, original zero point: 0.4, nudged to 0. + // Nudged range: [0.0; 63.75]. + // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.75. + TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxArgs") + .Input(FakeInput(DT_FLOAT)) // inputs + .Attr("min", -0.1f) + .Attr("max", 63.65f) + .Finalize(node_def())); + TF_EXPECT_OK(InitOp()); + // Downstream inputs. + AddInputFromArray(TensorShape({2, 3}), + {-0.1f, 0.0f, 0.1f, 0.25f, 63.75f, 63.8f}); + + // Tested code. + TF_ASSERT_OK(RunOpKernel()); + + Tensor* output = GetOutput(0); + Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3})); + FillValues(&expected, {0.0f, 0.0f, 0.0f, 0.25f, 63.75f, 63.75f}); + ExpectClose(expected, *output); +} + +TEST_F(QuantOpsTest, WithArgsNudgedZeroIs1) { + // Original quantization range: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 255 / 4]. + // Scale: 1/4, original zero point: 0.5, nudged to 1. + // Nudged range: [-0.25; 63.5]. + // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5. + TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxArgs") + .Input(FakeInput(DT_FLOAT)) // inputs + .Attr("min", -0.125f) + .Attr("max", 63.625f) + .Finalize(node_def())); + TF_EXPECT_OK(InitOp()); + // Downstream inputs. + AddInputFromArray(TensorShape({2, 3}), + {-0.26f, -0.25f, -0.24f, 0.0f, 63.5f, 63.6f}); + + // Tested code. + TF_ASSERT_OK(RunOpKernel()); + + Tensor* output = GetOutput(0); + Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3})); + FillValues(&expected, {-0.25f, -0.25f, -0.25f, 0.0f, 63.5f, 63.5f}); + ExpectClose(expected, *output); +} + +TEST_F(QuantOpsTest, WithArgsNudgedZeroIs255) { + // Original quantization range: [0.4 / 4 - 255 / 4, 0.4 / 4 + 0 / 4]. + // Scale: 1/4, original zero point: 254.6, nudged to 255. + // Nudged range: [-63.75; 0.0]. + // Expected quantized values: -63.75, -63.5, -63.25, ..., 0.0. + TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxArgs") + .Input(FakeInput(DT_FLOAT)) // inputs + .Attr("min", -63.65f) + .Attr("max", 0.1f) + .Finalize(node_def())); + TF_EXPECT_OK(InitOp()); + // Downstream inputs. + AddInputFromArray(TensorShape({2, 3}), + {-63.8f, -63.75f, -63.7f, -63.5f, 0.0f, 0.1f}); + + // Tested code. + TF_ASSERT_OK(RunOpKernel()); + + Tensor* output = GetOutput(0); + Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3})); + FillValues(&expected, {-63.75f, -63.75f, -63.75f, -63.5f, 0.0f, 0.0f}); + ExpectClose(expected, *output); +} + +TEST_F(QuantOpsTest, WithArgsGradient) { + // Original quantization range: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 255 / 4]. + // Scale: 1/4, original zero point: 0.5, nudged to 1. + // Nudged range: [-0.25; 63.5]. + // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5. + TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxArgsGradient") + .Input(FakeInput(DT_FLOAT)) // gradient + .Input(FakeInput(DT_FLOAT)) // inputs + .Attr("min", -0.125f) + .Attr("max", 63.625f) + .Finalize(node_def())); + TF_EXPECT_OK(InitOp()); + // Upstream gradients. + AddRandomInput(TensorShape({2, 3})); + // Downstream inputs. + AddInputFromArray(TensorShape({2, 3}), + {-0.26f, -0.25f, -0.24f, 0.0f, 63.5f, 63.6f}); + + // Tested code. + TF_ASSERT_OK(RunOpKernel()); + + Tensor* output = GetOutput(0); + auto input_flat = GetInput(0).flat(); + Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3})); + FillValues(&expected, + {0.0f, input_flat(1), input_flat(2), + input_flat(3), input_flat(4), 0.0f}); + ExpectClose(expected, *output); +} + +TEST_F(QuantOpsTest, WithVarsNoNudging) { + // Original quantization range: [-10 + 0 / 4, -10 + 255 / 4], scale: 1/4. + // Original zero point: 40, no nudging necessary. + // Expected quantized values: -10.0, -10.25, ..., 53.75. + TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVars") + .Input(FakeInput(DT_FLOAT)) // inputs + .Input(FakeInput(DT_FLOAT)) // min + .Input(FakeInput(DT_FLOAT)) // max + .Finalize(node_def())); + TF_EXPECT_OK(InitOp()); + // Downstream inputs. + AddInputFromArray(TensorShape({2, 3}), + {-10.1f, -10.0f, -9.9f, -9.75f, 53.75f, 53.8f}); + // Min. + AddInputFromArray(TensorShape({}), {-10.0f}); + // Max. + AddInputFromArray(TensorShape({}), {53.75f}); + + // Tested code. + TF_ASSERT_OK(RunOpKernel()); + + Tensor* output = GetOutput(0); + Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3})); + FillValues(&expected, + {-10.0f, -10.0f, -10.0f, -9.75f, 53.75f, 53.75f}); + ExpectClose(expected, *output); +} + +TEST_F(QuantOpsTest, WithVarsNudgedZeroIs0) { + // Original quantization range: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 255 / 4]. + // Scale: 1/4, original zero point: 0.4, nudged to 0. + // Nudged range: [0.0; 63.75]. + // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.75. + TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVars") + .Input(FakeInput(DT_FLOAT)) // inputs + .Input(FakeInput(DT_FLOAT)) // min + .Input(FakeInput(DT_FLOAT)) // max + .Finalize(node_def())); + TF_EXPECT_OK(InitOp()); + // Downstream inputs. + AddInputFromArray(TensorShape({2, 3}), + {-0.1f, 0.0f, 0.1f, 0.25f, 63.75f, 63.8f}); + // Min. + AddInputFromArray(TensorShape({}), {-0.1f}); + // Max. + AddInputFromArray(TensorShape({}), {63.65f}); + + // Tested code. + TF_ASSERT_OK(RunOpKernel()); + + Tensor* output = GetOutput(0); + Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3})); + FillValues(&expected, + {0.0f, 0.0f, 0.0f, 0.25f, 63.75f, 63.75f}); + ExpectClose(expected, *output); +} + +TEST_F(QuantOpsTest, WithVarsNudgedZeroIs1) { + // Original quantization range: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 255 / 4]. + // Scale: 1/4, original zero point: 0.5, nudged to 1. + // Nudged range: [-0.25; 63.5]. + // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5. + TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVars") + .Input(FakeInput(DT_FLOAT)) // inputs + .Input(FakeInput(DT_FLOAT)) // min + .Input(FakeInput(DT_FLOAT)) // max + .Finalize(node_def())); + TF_EXPECT_OK(InitOp()); + // Downstream inputs. + AddInputFromArray(TensorShape({2, 3}), + {-0.26f, -0.25f, -0.24f, 0.0f, 63.5f, 63.6f}); + // Min. + AddInputFromArray(TensorShape({}), {-0.125f}); + // Max. + AddInputFromArray(TensorShape({}), {63.625f}); + + // Tested code. + TF_ASSERT_OK(RunOpKernel()); + + Tensor* output = GetOutput(0); + Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3})); + FillValues(&expected, + {-0.25f, -0.25f, -0.25f, 0.0f, 63.5f, 63.5f}); + ExpectClose(expected, *output); +} + +TEST_F(QuantOpsTest, WithVarsGradient) { + // Original quantization range: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 255 / 4]. + // Scale: 1/4, original zero point: 0.5, nudged to 1. + // Nudged range: [-0.25; 63.5]. + // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5. + TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsGradient") + .Input(FakeInput(DT_FLOAT)) // gradients + .Input(FakeInput(DT_FLOAT)) // inputs + .Input(FakeInput(DT_FLOAT)) // min + .Input(FakeInput(DT_FLOAT)) // max + .Finalize(node_def())); + TF_EXPECT_OK(InitOp()); + // Upstream gradients. + AddRandomInput(TensorShape({2, 3})); + // Downstream inputs. + AddInputFromArray(TensorShape({2, 3}), + {-0.26f, -0.25f, -0.24f, 0.0f, 63.5f, 63.6f}); + // Min. + AddInputFromArray(TensorShape({}), {-0.125f}); + // Max. + AddInputFromArray(TensorShape({}), {63.625f}); + + // Tested code. + TF_ASSERT_OK(RunOpKernel()); + + Tensor* output_bprop_wrt_input = GetOutput(0); + Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({2, 3})); + auto in_flat = GetInput(0).flat(); + FillValues(&expected_bprop_wrt_input, + {0.0f, in_flat(1), + in_flat(2), in_flat(3), + in_flat(4), 0.0f}); + ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input); + + Tensor* output_bprop_wrt_min = GetOutput(1); + Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({})); + expected_bprop_wrt_min.flat()(0) = in_flat(0); + ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min); + + Tensor* output_bprop_wrt_max = GetOutput(2); + Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({})); + expected_bprop_wrt_max.flat()(0) = in_flat(5); + ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max); +} + +TEST_F(QuantOpsTest, WithVarsPerChannelDim1NudgedZeroIs0) { + // Original quantization ranges: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 255 / 4]. + // Scale: 1/4, original zero point: 0.4, nudged to 0. + // Nudged ranges: [0.0; 63.75]. + // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.75. + TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannel") + .Input(FakeInput(DT_FLOAT)) // inputs + .Input(FakeInput(DT_FLOAT)) // min + .Input(FakeInput(DT_FLOAT)) // max + .Finalize(node_def())); + TF_EXPECT_OK(InitOp()); + // Downstream inputs. + AddInputFromArray(TensorShape({4}), {-0.1f, 0.0f, 63.75f, 63.8f}); + // Min. + AddInputFromArray(TensorShape({4}), {-0.1f, -0.1f, -0.1f, -0.1f}); + // Max. + AddInputFromArray(TensorShape({4}), {63.65f, 63.65f, 63.65f, 63.65f}); + + // Tested code. + TF_ASSERT_OK(RunOpKernel()); + + Tensor* output = GetOutput(0); + Tensor expected(allocator(), DT_FLOAT, TensorShape({4})); + FillValues(&expected, {0.0f, 0.0f, 63.75f, 63.75f}); + ExpectClose(expected, *output); +} + +TEST_F(QuantOpsTest, WithVarsPerChannelDim1NudgedZeroIs1) { + // Original quantization ranges: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 255 / 4]. + // Scale: 1/4, original zero point: 0.5, nudged to 1. + // Nudged ranges: [-0.25; 63.5]. + // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5. + TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannel") + .Input(FakeInput(DT_FLOAT)) // inputs + .Input(FakeInput(DT_FLOAT)) // min + .Input(FakeInput(DT_FLOAT)) // max + .Finalize(node_def())); + TF_EXPECT_OK(InitOp()); + // Downstream inputs. + AddInputFromArray(TensorShape({4}), {-0.26f, -0.25f, -0.24f, 63.6f}); + // Min. + AddInputFromArray(TensorShape({4}), + {-0.125f, -0.125f, -0.125f, -0.125f}); + // Max. + AddInputFromArray(TensorShape({4}), + {63.625f, 63.625f, 63.625f, 63.625f}); + + // Tested code. + TF_ASSERT_OK(RunOpKernel()); + + Tensor* output = GetOutput(0); + Tensor expected(allocator(), DT_FLOAT, TensorShape({4})); + FillValues(&expected, {-0.25f, -0.25f, -0.25f, 63.5f}); + ExpectClose(expected, *output); +} + +TEST_F(QuantOpsTest, WithVarsPerChannelDim2NudgedZeroIs0) { + // Original quantization ranges: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 255 / 4]. + // Scale: 1/4, original zero point: 0.4, nudged to 0. + // Nudged ranges: [0.0; 63.75]. + // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.75. + TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannel") + .Input(FakeInput(DT_FLOAT)) // inputs + .Input(FakeInput(DT_FLOAT)) // min + .Input(FakeInput(DT_FLOAT)) // max + .Finalize(node_def())); + TF_EXPECT_OK(InitOp()); + // Downstream inputs. + AddInputFromArray(TensorShape({2, 3}), + {-0.1f, 0.0f, 0.1f, + 0.25f, 63.75f, 63.8f}); + // Min. + AddInputFromArray(TensorShape({3}), {-0.1f, -0.1f, -0.1f}); + // Max. + AddInputFromArray(TensorShape({3}), {63.65f, 63.65f, 63.65f}); + + // Tested code. + TF_ASSERT_OK(RunOpKernel()); + + Tensor* output = GetOutput(0); + Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3})); + FillValues(&expected, {0.0f, 0.0f, 0.0f, + 0.25f, 63.75f, 63.75f}); + ExpectClose(expected, *output); +} + +TEST_F(QuantOpsTest, WithVarsPerChannelDim2NudgedZeroIs1) { + // Original quantization ranges: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 255 / 4]. + // Scale: 1/4, original zero point: 0.5, nudged to 1. + // Nudged ranges: [-0.25; 63.5]. + // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5. + TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannel") + .Input(FakeInput(DT_FLOAT)) // inputs + .Input(FakeInput(DT_FLOAT)) // min + .Input(FakeInput(DT_FLOAT)) // max + .Finalize(node_def())); + TF_EXPECT_OK(InitOp()); + // Downstream inputs. + AddInputFromArray(TensorShape({2, 3}), + {-0.26f, -0.25f, -0.24f, + 0.0f, 63.5f, 63.6f}); + // Min. + AddInputFromArray(TensorShape({3}), {-0.125f, -0.125f, -0.125f}); + // Max. + AddInputFromArray(TensorShape({3}), {63.625f, 63.625f, 63.625f}); + + // Tested code. + TF_ASSERT_OK(RunOpKernel()); + + Tensor* output = GetOutput(0); + Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3})); + FillValues(&expected, {-0.25f, -0.25f, -0.25f, + 0.0f, 63.5f, 63.5f}); + ExpectClose(expected, *output); +} + +TEST_F(QuantOpsTest, WithVarsPerChannelDim4NudgedZeroIs0) { + // Original quantization ranges: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 255 / 4]. + // Scale: 1/4, original zero point: 0.4, nudged to 0. + // Nudged ranges: [0.0; 63.75]. + // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.75. + TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannel") + .Input(FakeInput(DT_FLOAT)) // inputs + .Input(FakeInput(DT_FLOAT)) // min + .Input(FakeInput(DT_FLOAT)) // max + .Finalize(node_def())); + TF_EXPECT_OK(InitOp()); + // Downstream inputs. + AddInputFromArray(TensorShape({1, 2, 3, 4}), + {-0.1f, 0.0f, 0.1f, 0.25f, + 0.5f, 0.75f, 1.0f, 1.25f, + 1.5f, 1.75f, 2.0f, 2.25f, + + 63.0f, 63.25f, 63.5f, 63.7f, + 63.75f, 63.8f, 63.9f, 100.0f, + 100.0f, 100.0f, 100.0f, 1000.0f}); + // Min. + AddInputFromArray(TensorShape({4}), {-0.1f, -0.1f, -0.1f, -0.1f}); + // Max. + AddInputFromArray(TensorShape({4}), {63.65f, 63.65f, 63.65f, 63.65f}); + + // Tested code. + TF_ASSERT_OK(RunOpKernel()); + + Tensor* output = GetOutput(0); + Tensor expected(allocator(), DT_FLOAT, TensorShape({1, 2, 3, 4})); + FillValues(&expected, + {0.0f, 0.0f, 0.0f, 0.25f, + 0.5f, 0.75f, 1.0f, 1.25f, + 1.5f, 1.75f, 2.0f, 2.25f, + + 63.0f, 63.25f, 63.5f, 63.75f, + 63.75f, 63.75f, 63.75f, 63.75f, + 63.75f, 63.75f, 63.75f, 63.75f}); + ExpectClose(expected, *output); +} + +TEST_F(QuantOpsTest, WithVarsPerChannelDim4NudgedZeroIs1) { + // Original quantization ranges: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 255 / 4]. + // Scale: 1/4, original zero point: 0.5, nudged to 1. + // Nudged ranges: [-0.25; 63.5]. + // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5. + TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannel") + .Input(FakeInput(DT_FLOAT)) // inputs + .Input(FakeInput(DT_FLOAT)) // min + .Input(FakeInput(DT_FLOAT)) // max + .Finalize(node_def())); + TF_EXPECT_OK(InitOp()); + // Downstream inputs. + AddInputFromArray(TensorShape({1, 2, 3, 4}), + {-0.3f, -0.25f, -0.2f, 0.0f, + 0.25f, 0.5f, 0.75f, 1.0f, + 1.25f, 1.5f, 1.75f, 2.0f, + + 63.0f, 63.25f, 63.4f, 63.5f, + 63.6f, 63.7f, 100.0f, 100.0f, + 100.0f, 100.0f, 100.0f, 1000.0f}); + // Min. + AddInputFromArray(TensorShape({4}), + {-0.125f, -0.125f, -0.125f, -0.125f}); + // Max. + AddInputFromArray(TensorShape({4}), + {63.625f, 63.625f, 63.625f, 63.625f}); + + // Tested code. + TF_ASSERT_OK(RunOpKernel()); + + Tensor* output = GetOutput(0); + Tensor expected(allocator(), DT_FLOAT, TensorShape({1, 2, 3, 4})); + FillValues(&expected, + {-0.25f, -0.25f, -0.25f, 0.0f, + 0.25f, 0.5f, 0.75f, 1.0f, + 1.25f, 1.5f, 1.75f, 2.0f, + + 63.0f, 63.25f, 63.5f, 63.5f, + 63.5f, 63.5f, 63.5f, 63.5f, + 63.5f, 63.5f, 63.5f, 63.5f}); + ExpectClose(expected, *output); +} + +TEST_F(QuantOpsTest, WithVarsPerChannelDim1GradientNudgedZeroIs0) { + // Original quantization ranges: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 255 / 4]. + // Scale: 1/4, original zero point: 0.4, nudged to 0. + // Nudged ranges: [0.0; 63.75]. + // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.75. + TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient") + .Input(FakeInput(DT_FLOAT)) // gradients + .Input(FakeInput(DT_FLOAT)) // inputs + .Input(FakeInput(DT_FLOAT)) // min + .Input(FakeInput(DT_FLOAT)) // max + .Finalize(node_def())); + TF_EXPECT_OK(InitOp()); + // Upstream gradients. + AddRandomInput(TensorShape({4})); + // Downstream inputs. + AddInputFromArray(TensorShape({4}), {-0.1f, 0.0f, 63.75f, 63.8f}); + // Min. + AddInputFromArray(TensorShape({4}), {-0.1f, -0.1f, -0.1f, -0.1f}); + // Max. + AddInputFromArray(TensorShape({4}), {63.65f, 63.65f, 63.65f, 63.65f}); + + // Tested code. + TF_ASSERT_OK(RunOpKernel()); + + Tensor* output_bprop_wrt_input = GetOutput(0); + Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({4})); + auto grad_flat = GetInput(0).flat(); + FillValues(&expected_bprop_wrt_input, + {0.0f, grad_flat(1), grad_flat(2), 0.0f}); + ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input); + + Tensor* output_bprop_wrt_min = GetOutput(1); + Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({4})); + FillValues(&expected_bprop_wrt_min, + {grad_flat(0), 0.0f, 0.0f, 0.0f}); + ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min); + + Tensor* output_bprop_wrt_max = GetOutput(2); + Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({4})); + FillValues(&expected_bprop_wrt_max, + {0.0f, 0.0f, 0.0f, grad_flat(3)}); + ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max); +} + +TEST_F(QuantOpsTest, WithVarsPerChannelDim1GradientNudgedZeroIs1) { + // Original quantization ranges: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 255 / 4]. + // Scale: 1/4, original zero point: 0.5, nudged to 1. + // Nudged ranges: [-0.25; 63.5]. + // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5. + TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient") + .Input(FakeInput(DT_FLOAT)) // gradients + .Input(FakeInput(DT_FLOAT)) // inputs + .Input(FakeInput(DT_FLOAT)) // min + .Input(FakeInput(DT_FLOAT)) // max + .Finalize(node_def())); + TF_EXPECT_OK(InitOp()); + // Upstream gradients. + AddRandomInput(TensorShape({4})); + // Downstream inputs. + AddInputFromArray(TensorShape({4}), {-0.3f, -0.25f, 63.5f, 63.6f}); + // Min. + AddInputFromArray(TensorShape({4}), + {-0.125f, -0.125f, -0.125f, -0.125f}); + // Max. + AddInputFromArray(TensorShape({4}), + {63.625f, 63.625f, 63.625f, 63.625f}); + + // Tested code. + TF_ASSERT_OK(RunOpKernel()); + + Tensor* output_bprop_wrt_input = GetOutput(0); + Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({4})); + auto grad_flat = GetInput(0).flat(); + FillValues(&expected_bprop_wrt_input, + {0.0f, grad_flat(1), grad_flat(2), 0.0f}); + ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input); + + Tensor* output_bprop_wrt_min = GetOutput(1); + Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({4})); + FillValues(&expected_bprop_wrt_min, + {grad_flat(0), 0.0f, 0.0f, 0.0f}); + ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min); + + Tensor* output_bprop_wrt_max = GetOutput(2); + Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({4})); + FillValues(&expected_bprop_wrt_max, + {0.0f, 0.0f, 0.0f, grad_flat(3)}); + ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max); +} + +TEST_F(QuantOpsTest, WithVarsPerChannelDim2GradientNudgedZeroIs0) { + // Original quantization ranges: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 255 / 4]. + // Scale: 1/4, original zero point: 0.4, nudged to 0. + // Nudged ranges: [0.0; 63.75]. + // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.75. + TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient") + .Input(FakeInput(DT_FLOAT)) // gradients + .Input(FakeInput(DT_FLOAT)) // inputs + .Input(FakeInput(DT_FLOAT)) // min + .Input(FakeInput(DT_FLOAT)) // max + .Finalize(node_def())); + TF_EXPECT_OK(InitOp()); + // Upstream gradients. + AddRandomInput(TensorShape({2, 3})); + // Downstream inputs. + AddInputFromArray(TensorShape({2, 3}), + {-0.1f, 0.0f, 0.1f, + 0.25f, 63.75f, 63.8f}); + // Min. + AddInputFromArray(TensorShape({3}), {-0.1f, -0.1f, -0.1f}); + // Max. + AddInputFromArray(TensorShape({3}), {63.65f, 63.65f, 63.65f}); + + // Tested code. + TF_ASSERT_OK(RunOpKernel()); + + Tensor* output_bprop_wrt_input = GetOutput(0); + Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({2, 3})); + auto grad_flat = GetInput(0).flat(); + FillValues(&expected_bprop_wrt_input, + {0.0f, grad_flat(1), grad_flat(2), + grad_flat(3), grad_flat(4), 0.0f}); + ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input); + + Tensor* output_bprop_wrt_min = GetOutput(1); + Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({3})); + FillValues(&expected_bprop_wrt_min, + {grad_flat(0), 0.0f, 0.0f}); + ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min); + + Tensor* output_bprop_wrt_max = GetOutput(2); + Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({3})); + FillValues(&expected_bprop_wrt_max, + {0.0f, 0.0f, grad_flat(5)}); + ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max); +} + +TEST_F(QuantOpsTest, WithVarsPerChannelDim2GradientNudgedZeroIs1) { + // Original quantization ranges: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 255 / 4]. + // Scale: 1/4, original zero point: 0.5, nudged to 1. + // Nudged ranges: [-0.25; 63.5]. + // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5. + TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient") + .Input(FakeInput(DT_FLOAT)) // gradients + .Input(FakeInput(DT_FLOAT)) // inputs + .Input(FakeInput(DT_FLOAT)) // min + .Input(FakeInput(DT_FLOAT)) // max + .Finalize(node_def())); + TF_EXPECT_OK(InitOp()); + // Upstream gradients. + AddRandomInput(TensorShape({2, 3})); + // Downstream inputs. + AddInputFromArray(TensorShape({2, 3}), + {-0.3f, -0.25f, -0.2f, + 0.0f, 63.5f, 63.6f}); + // Min. + AddInputFromArray(TensorShape({3}), {-0.125f, -0.125f, -0.125f}); + // Max. + AddInputFromArray(TensorShape({3}), {63.625f, 63.625f, 63.625f}); + + // Tested code. + TF_ASSERT_OK(RunOpKernel()); + + Tensor* output_bprop_wrt_input = GetOutput(0); + Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, TensorShape({2, 3})); + auto grad_flat = GetInput(0).flat(); + FillValues(&expected_bprop_wrt_input, + {0.0f, grad_flat(1), grad_flat(2), + grad_flat(3), grad_flat(4), 0.0f}); + ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input); + + Tensor* output_bprop_wrt_min = GetOutput(1); + Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({3})); + FillValues(&expected_bprop_wrt_min, + {grad_flat(0), 0.0f, 0.0f}); + ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min); + + Tensor* output_bprop_wrt_max = GetOutput(2); + Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({3})); + FillValues(&expected_bprop_wrt_max, + {0.0f, 0.0f, grad_flat(5)}); + ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max); +} + +TEST_F(QuantOpsTest, WithVarsPerChannelDim4GradientNudgedZeroIs0) { + // Original quantization ranges: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 255 / 4]. + // Scale: 1/4, original zero point: 0.4, nudged to 0. + // Nudged ranges: [0.0; 63.75]. + // Expected quantized values: 0.0, 0.25, 0.5, ..., 63.75. + TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient") + .Input(FakeInput(DT_FLOAT)) // gradients + .Input(FakeInput(DT_FLOAT)) // inputs + .Input(FakeInput(DT_FLOAT)) // min + .Input(FakeInput(DT_FLOAT)) // max + .Finalize(node_def())); + TF_EXPECT_OK(InitOp()); + // Upstream gradients. + AddRandomInput(TensorShape({1, 2, 3, 4})); + // Downstream inputs. + AddInputFromArray(TensorShape({1, 2, 3, 4}), + {-0.1f, 0.0f, 63.75f, 63.8f, + -0.1f, 0.0f, 63.75f, 63.8f, + -0.1f, 0.0f, 63.75f, 63.8f, + + -0.1f, 0.0f, 63.75f, 63.8f, + -0.1f, 0.0f, 63.75f, 63.8f, + -0.1f, 0.0f, 63.75f, 63.8f}); + // Min. + AddInputFromArray(TensorShape({4}), {-0.1f, -0.1f, -0.1f, -0.1f}); + // Max. + AddInputFromArray(TensorShape({4}), {63.65f, 63.65f, 63.65f, 63.65f}); + + // Tested code. + TF_ASSERT_OK(RunOpKernel()); + + Tensor* output_bprop_wrt_input = GetOutput(0); + Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, + TensorShape({1, 2, 3, 4})); + auto grad_flat = GetInput(0).flat(); + FillValues( + &expected_bprop_wrt_input, + {0.0f, grad_flat(1), grad_flat(2), 0.0f, + 0.0f, grad_flat(5), grad_flat(6), 0.0f, + 0.0f, grad_flat(9), grad_flat(10), 0.0f, + + 0.0f, grad_flat(13), grad_flat(14), 0.0f, + 0.0f, grad_flat(17), grad_flat(18), 0.0f, + 0.0f, grad_flat(21), grad_flat(22), 0.0f}); + ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input); + + Tensor* output_bprop_wrt_min = GetOutput(1); + Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({4})); + FillValues(&expected_bprop_wrt_min, + {grad_flat(0) + grad_flat(4) + grad_flat(8) + + grad_flat(12) + grad_flat(16) + grad_flat(20), + 0.0f, 0.0f, 0.0f}); + ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min); + + Tensor* output_bprop_wrt_max = GetOutput(2); + Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({4})); + FillValues(&expected_bprop_wrt_max, + {0.0f, 0.0f, 0.0f, + grad_flat(3) + grad_flat(7) + grad_flat(11) + + grad_flat(15) + grad_flat(19) + grad_flat(23)}); + ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max); +} + +TEST_F(QuantOpsTest, WithVarsPerChannelDim4GradientNudgedZeroIs1) { + // Original quantization ranges: [-0.5 / 4 + 0 / 4, -0.5 / 4 + 255 / 4]. + // Scale: 1/4, original zero point: 0.5, nudged to 1. + // Nudged ranges: [-0.25; 63.5]. + // Expected quantized values: -0.25, 0.0, 0.25, ..., 63.5. + TF_EXPECT_OK(NodeDefBuilder("op", "FakeQuantWithMinMaxVarsPerChannelGradient") + .Input(FakeInput(DT_FLOAT)) // gradients + .Input(FakeInput(DT_FLOAT)) // inputs + .Input(FakeInput(DT_FLOAT)) // min + .Input(FakeInput(DT_FLOAT)) // max + .Finalize(node_def())); + TF_EXPECT_OK(InitOp()); + // Upstream gradients. + AddRandomInput(TensorShape({1, 2, 3, 4})); + // Downstream inputs. + AddInputFromArray(TensorShape({1, 2, 3, 4}), + {-0.3f, -0.25f, 63.5f, 63.6f, + -0.3f, -0.25f, 63.5f, 63.6f, + -0.3f, -0.25f, 63.5f, 63.6f, + + -0.3f, -0.25f, 63.5f, 63.6f, + -0.3f, -0.25f, 63.5f, 63.6f, + -0.3f, -0.25f, 63.5f, 63.6f}); + // Min. + AddInputFromArray(TensorShape({4}), + {-0.125f, -0.125f, -0.125f, -0.125f}); + // Max. + AddInputFromArray(TensorShape({4}), + {63.625f, 63.625f, 63.625f, 63.625f}); + + // Tested code. + TF_ASSERT_OK(RunOpKernel()); + + Tensor* output_bprop_wrt_input = GetOutput(0); + Tensor expected_bprop_wrt_input(allocator(), DT_FLOAT, + TensorShape({1, 2, 3, 4})); + auto grad_flat = GetInput(0).flat(); + FillValues(&expected_bprop_wrt_input, + {0.0f, grad_flat(1), grad_flat(2), 0.0f, + 0.0f, grad_flat(5), grad_flat(6), 0.0f, + 0.0f, grad_flat(9), grad_flat(10), 0.0f, + + 0.0f, grad_flat(13), grad_flat(14), 0.0f, + 0.0f, grad_flat(17), grad_flat(18), 0.0f, + 0.0f, grad_flat(21), grad_flat(22), 0.0f}); + ExpectClose(expected_bprop_wrt_input, *output_bprop_wrt_input); + + Tensor* output_bprop_wrt_min = GetOutput(1); + Tensor expected_bprop_wrt_min(allocator(), DT_FLOAT, TensorShape({4})); + FillValues(&expected_bprop_wrt_min, + {grad_flat(0) + grad_flat(4) + grad_flat(8) + + grad_flat(12) + grad_flat(16) + grad_flat(20), + 0.0f, 0.0f, 0.0f}); + ExpectClose(expected_bprop_wrt_min, *output_bprop_wrt_min); + + Tensor* output_bprop_wrt_max = GetOutput(2); + Tensor expected_bprop_wrt_max(allocator(), DT_FLOAT, TensorShape({4})); + FillValues(&expected_bprop_wrt_max, + {0.0f, 0.0f, 0.0f, + grad_flat(3) + grad_flat(7) + grad_flat(11) + + grad_flat(15) + grad_flat(19) + grad_flat(23)}); + ExpectClose(expected_bprop_wrt_max, *output_bprop_wrt_max); +} + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/function_ops.cc b/tensorflow/core/kernels/function_ops.cc index 56253eb64a7..4a08f98b33b 100644 --- a/tensorflow/core/kernels/function_ops.cc +++ b/tensorflow/core/kernels/function_ops.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/graph/algorithm.h" #include "tensorflow/core/graph/gradients.h" #include "tensorflow/core/graph/graph_constructor.h" @@ -86,26 +87,27 @@ class RetvalOp : public OpKernel { REGISTER_KERNEL_BUILDER(Name("_Arg").Device(DEVICE_CPU), ArgOp); REGISTER_KERNEL_BUILDER(Name("_Retval").Device(DEVICE_CPU), RetvalOp); -#define REGISTER_GPU_KERNELS(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("_Arg").Device(DEVICE_GPU).TypeConstraint("T"), ArgOp); \ - REGISTER_KERNEL_BUILDER( \ - Name("_Retval").Device(DEVICE_GPU).TypeConstraint("T"), RetvalOp); -REGISTER_GPU_KERNELS(Eigen::half); -REGISTER_GPU_KERNELS(float); -REGISTER_GPU_KERNELS(double); -#undef REGISTER_GPU_KERNELS +#define REGISTER(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("_Arg").Device(DEVICE_GPU).TypeConstraint("T"), ArgOp); +TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER) +TF_CALL_bool(REGISTER) REGISTER_KERNEL_BUILDER(Name("_Arg") + .Device(DEVICE_GPU) + .HostMemory("output") + .TypeConstraint("T"), + ArgOp); +#undef REGISTER -REGISTER_KERNEL_BUILDER(Name("_Arg") - .Device(DEVICE_GPU) - .HostMemory("output") - .TypeConstraint("T"), - ArgOp); -REGISTER_KERNEL_BUILDER(Name("_Retval") - .Device(DEVICE_GPU) - .HostMemory("input") - .TypeConstraint("T"), - RetvalOp); +#define REGISTER(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("_Retval").Device(DEVICE_GPU).TypeConstraint("T"), RetvalOp); +TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER) +TF_CALL_bool(REGISTER) REGISTER_KERNEL_BUILDER(Name("_Retval") + .Device(DEVICE_GPU) + .HostMemory("input") + .TypeConstraint("T"), + RetvalOp); +#undef REGISTER class PassOn : public OpKernel { public: diff --git a/tensorflow/core/kernels/hexagon/BUILD b/tensorflow/core/kernels/hexagon/BUILD index 0454289b991..72b603463b3 100644 --- a/tensorflow/core/kernels/hexagon/BUILD +++ b/tensorflow/core/kernels/hexagon/BUILD @@ -30,6 +30,7 @@ tf_cc_test( name = "quantized_matmul_op_for_hexagon_test", size = "small", srcs = ["quantized_matmul_op_for_hexagon_test.cc"], + tags = ["nomsan"], # http://b/32242946 deps = [ "//tensorflow/core:framework", "//tensorflow/core:protos_all_cc", diff --git a/tensorflow/core/kernels/meta_support.cc b/tensorflow/core/kernels/meta_support.cc new file mode 100644 index 00000000000..4ef56d1987b --- /dev/null +++ b/tensorflow/core/kernels/meta_support.cc @@ -0,0 +1,373 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#define EIGEN_USE_THREADS + +#include "tensorflow/core/kernels/meta_support.h" + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/kernels/quantization_utils.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/mutex.h" + +#if (defined(GEMMLOWP_NEON_32) || defined(GEMMLOWP_NEON_64)) && \ + !defined(TENSORFLOW_DISABLE_META) && !defined(__APPLE__) +#define TENSORFLOW_USE_META (1) +#endif + +namespace tensorflow { +namespace meta { + +namespace { + +int g_num_threads = 0; +bool g_enabled = true; +bool g_use_local_context = false; + +#ifdef TENSORFLOW_USE_META + +uint8_t* GetScratch() { + static uint8_t* scratch = new uint8_t[2048 * 1024]; + return scratch; +} + +gemmlowp::WorkersPool* GetWorkersPool() { + static gemmlowp::WorkersPool* pool = new gemmlowp::WorkersPool(); + return pool; +} + +mutex& GetMutex() { + static mutex mu; + return mu; +} + +int GetWorkersCount(OpKernelContext* tf_context) { + if (g_num_threads == 0) { + return tf_context->device()->tensorflow_cpu_worker_threads()->num_threads; + } + return g_num_threads; +} + +typedef gemmlowp::meta::SimpleContext LocalContext; + +template +void MultiThreadGemm(Context* context, const Params& params) { + if (params.m <= 4) { + gemmlowp::meta::Gemm, + Params, 1, 8, 8>(params); + } else { + if (params.m >= params.n) { + gemmlowp::meta::MultiThreadGemm< + Context, gemmlowp::meta::GemmExecutorPackRHSCacheFriendly<>, Params, + 2, 4, 8>(context, params); + } else { + gemmlowp::meta::MultiThreadGemm< + Context, gemmlowp::meta::GemmExecutorPackLHSCacheFriendly<>, Params, + 2, 4, 8>(context, params); + } + } +} + +template +void QuantizedGemmImpl(OpKernelContext* tf_context, const quint8* a_data, + const quint8* b_data, qint32* c_data, int m, int n, + int k, int offset_a, int offset_b, int lda, int ldb, + int ldc) { + typedef gemmlowp::meta::GemmParams< + uint8_t, int32_t, LeftStream, RightStream, + gemmlowp::meta::QuantizedStaticPreprocessedAsInt32, + gemmlowp::meta::RowMajor> + Params; + Params params; + + params.m = m; + params.n = n; + params.k = k; + + params.lhs = reinterpret_cast(&(a_data->value)); + params.rhs = reinterpret_cast(&(b_data->value)); + params.result = reinterpret_cast(&(c_data->value)); + params.scratch = GetScratch(); + + params.left_stream.count = k; + params.left_stream.stride = lda; + params.left_stream.multiplicative_sum_offset = offset_b; + params.left_stream.additive_sum_offset = k * offset_a * offset_b; + + params.right_stream.count = k; + params.right_stream.stride = ldb; + params.right_stream.multiplicative_sum_offset = offset_a; + params.right_stream.additive_sum_offset = 0; + + params.fused_kernel.kernel.count = k; + params.fused_kernel.output_stream.stride = ldc * sizeof(int32_t); + + if (g_use_local_context) { + LocalContext local_context(GetWorkersCount(tf_context), GetWorkersPool()); + MultiThreadGemm(&local_context, params); + } else { + auto& workers = *(tf_context->device()->tensorflow_cpu_worker_threads()); + TensorflowGemmContext context(workers.num_threads, workers.workers); + MultiThreadGemm(&context, params); + } +} + +template +void MultiThreadTransform1D(OpKernelContext* tf_context, const Params& params) { + if (g_use_local_context) { + LocalContext local_context(GetWorkersCount(tf_context), GetWorkersPool()); + gemmlowp::meta::MultiThreadTransform1D( + &local_context, params); + } else { + auto& workers = *(tf_context->device()->tensorflow_cpu_worker_threads()); + TensorflowGemmContext context(workers.num_threads, workers.workers); + gemmlowp::meta::MultiThreadTransform1D(&context, params); + } +} + +template +double CalculateRangeScale(float min, float max) { + const int bits = sizeof(QuantizedType) * 8; + return static_cast(max - min) / + ((static_cast(1) << bits) - 1); +} + +template +double CalculateOneOverRangeScale(float min, float max) { + if (min == max) { + return 0.0; + } + const int bits = sizeof(QuantizedType) * 8; + return static_cast((static_cast(1) << bits) - 1) / + (max - min); +} + +#endif // TENSORFLOW_USE_META + +} // namespace + +void SetNumThreads(int num_threads) { g_num_threads = num_threads; } + +int GetNumThreads() { return g_num_threads; } + +void SetUseLocalContext(bool use_local_context) { + g_use_local_context = use_local_context; +} + +bool GetUseLocalContext() { return g_use_local_context; } + +bool IsSupported() { +#if defined(TENSORFLOW_USE_META) + return true; +#else + return false; +#endif +} + +bool IsEnabled() { return g_enabled; } + +void SetEnabled(bool enabled) { g_enabled = enabled; } + +bool IsSupportedAndEnabled() { return IsSupported() && IsEnabled(); } + +void QuantizedGemm(OpKernelContext* tf_context, bool transpose_a, + bool transpose_b, const quint8* a_data, const quint8* b_data, + qint32* c_data, int m, int n, int k, int offset_a, + int offset_b, int lda, int ldb, int ldc) { +#ifdef TENSORFLOW_USE_META + mutex_lock library_lock(GetMutex()); + if (transpose_a) { + if (transpose_b) { + QuantizedGemmImpl( + tf_context, a_data, b_data, c_data, m, n, k, offset_a, offset_b, lda, + ldb, ldc); + } else { + QuantizedGemmImpl( + tf_context, a_data, b_data, c_data, m, n, k, offset_a, offset_b, lda, + ldb, ldc); + } + } else { + if (transpose_b) { + QuantizedGemmImpl( + tf_context, a_data, b_data, c_data, m, n, k, offset_a, offset_b, lda, + ldb, ldc); + } else { + QuantizedGemmImpl( + tf_context, a_data, b_data, c_data, m, n, k, offset_a, offset_b, lda, + ldb, ldc); + } + } +#else + LOG(FATAL) << "QuantizedGemm: Meta fastpath not supported."; +#endif +} + +void Requantize(OpKernelContext* tf_context, const qint32* input, int count, + float input_min, float input_max, float output_min, + float output_max, quint8* output) { +#ifdef TENSORFLOW_USE_META + mutex_lock library_lock(GetMutex()); + typedef gemmlowp::meta::Transform1DParams + Params; + + Params params; + params.input = reinterpret_cast(input); + params.output = reinterpret_cast(output); + params.kernel.count = count; + params.kernel.input_range_min = input_min; + params.kernel.output_range_min = output_min; + params.kernel.input_range_scale = + CalculateRangeScale(input_min, input_max); + params.kernel.one_over_output_range_scale = + CalculateOneOverRangeScale(output_min, output_max); + params.kernel.input_range_offset = + static_cast(std::numeric_limits::lowest()); + + // After adding the output_range_offset the value is cast from float to uint. + // The float to int/uint cast in NEON uses round toward 0. To keep the + // rounding consistent with Eigen, which uses round toward closest, we can + // add 0.5f and exploit the fact that we only operate on non negative values. + // TODO(maciekc): fix the actual kernel in gemmlowp/meta + params.kernel.output_range_offset = + static_cast(std::numeric_limits::lowest()) + 0.5f; + + MultiThreadTransform1D(tf_context, params); +#else + LOG(FATAL) << "Requantize: Meta fastpath not supported."; +#endif +} + +void Dequantize(OpKernelContext* tf_context, const quint8* input, int count, + float range_min, float range_max, float* output) { +#ifdef TENSORFLOW_USE_META + mutex_lock library_lock(GetMutex()); + typedef gemmlowp::meta::Transform1DParams + Params; + + Params params; + params.input = reinterpret_cast(input); + params.output = reinterpret_cast(output); + params.kernel.count = count; + params.kernel.range_min = range_min; + params.kernel.range_scale = + CalculateRangeScale(range_min, range_max); + params.kernel.range_offset = + static_cast(std::numeric_limits::lowest()); + + MultiThreadTransform1D(tf_context, params); +#else + LOG(FATAL) << "Dequantize: Meta fastpath not supported."; +#endif +} + +void Quantize(OpKernelContext* tf_context, const float* input, int count, + float range_min, float range_max, quint8* output) { +#ifdef TENSORFLOW_USE_META + mutex_lock library_lock(GetMutex()); + typedef gemmlowp::meta::Transform1DParams + Params; + + Params params; + params.input = reinterpret_cast(input); + params.output = reinterpret_cast(output); + params.kernel.count = count; + params.kernel.range_min = range_min; + params.kernel.range_scale = + CalculateOneOverRangeScale(range_min, range_max); + + // After adding the range_offset the value is cast from float to uint. + // The float to int/uint cast in NEON uses round toward 0. To keep the + // rounding consistent with Eigen, which uses round toward closest, we can + // add 0.5f and exploit the fact that we only operate on non negative values. + // TODO(maciekc): fix the the actual kernel in gemmlowp/meta + params.kernel.range_offset = + static_cast(std::numeric_limits::lowest()) + 0.5f; + + MultiThreadTransform1D(tf_context, params); +#else + LOG(FATAL) << "Quantize: Meta fastpath not supported."; +#endif +} + +void QuantizedBiasAdd(OpKernelContext* tf_context, const quint8* input, + int input_count, const quint8* bias, int bias_count, + float input_min, float input_max, float bias_min, + float bias_max, float output_min, float output_max, + qint32* output) { +#ifdef TENSORFLOW_USE_META + mutex_lock library_lock(GetMutex()); + typedef gemmlowp::meta::Transform1DParams> + Params; + + Params params; + params.input = reinterpret_cast(input); + params.output = reinterpret_cast(output); + params.kernel.bias = reinterpret_cast(bias); + params.kernel.count = bias_count; + params.kernel.rows = input_count / bias_count; + params.kernel.input_range_min = input_min; + params.kernel.bias_range_min = bias_min; + params.kernel.input_range_scale = + CalculateRangeScale(input_min, input_max); + params.kernel.bias_range_scale = + CalculateRangeScale(bias_min, bias_max); + params.kernel.input_range_offset = 0; + params.kernel.bias_range_offset = 0; + params.kernel.output_range_min = output_min; + params.kernel.one_over_output_range_scale = + CalculateOneOverRangeScale(output_min, output_max); + params.kernel.output_range_offset = + static_cast(std::numeric_limits::lowest()); + + // TODO(maciekc): add multithreading to bias add. + // Right now this kernel does not support multi threaded execution. + gemmlowp::meta::Transform1D(params); +#else + LOG(FATAL) << "QuantizedBiasAdd: Meta fastpath not supported."; +#endif +} + +void Clamp(OpKernelContext* tf_context, const quint8* input, int count, + quint8 clamp_min, quint8 clamp_max, quint8* output) { +#ifdef TENSORFLOW_USE_META + mutex_lock library_lock(GetMutex()); + typedef gemmlowp::meta::Transform1DParams> + Params; + + Params params; + params.input = reinterpret_cast(input); + params.output = reinterpret_cast(output); + params.kernel.count = count; + params.kernel.min = clamp_min; + params.kernel.max = clamp_max; + + MultiThreadTransform1D(tf_context, params); +#else + LOG(FATAL) << "Clamp: Meta fastpath not supported."; +#endif +} + +} // namespace meta +} // namespace tensorflow diff --git a/tensorflow/core/kernels/meta_support.h b/tensorflow/core/kernels/meta_support.h new file mode 100644 index 00000000000..0d87baf0344 --- /dev/null +++ b/tensorflow/core/kernels/meta_support.h @@ -0,0 +1,112 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_META_SUPPORT_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_META_SUPPORT_H_ + +#include "meta/multi_thread_gemm.h" +#include "meta/multi_thread_transform.h" +#include "meta/quantized_mul_kernels.h" +#include "meta/streams.h" +#include "meta/transform_kernels.h" + +#include "tensorflow/core/framework/numeric_types.h" + +namespace tensorflow { + +class OpKernelContext; + +namespace meta { + +// Gemmlowp/meta is a small library of optimized Arm32/64 kernels for quantized +// matrix multiplication and other quantized computations. + +// Set the maximum number of threads of computation that the internal workers +// pool can use. If num_threads is 0, then use intra_op_parallelism_threads. +void SetNumThreads(int num_threads); + +int GetNumThreads(); + +// Toggle the internal workers pool. If set to false, the computations will +// use the worker pool passed each time in the OpKernelContext. If set to true +// then the OpKernelContext will be ignored, and the internal optimized workers +// pool will be used. +// +// The internal workers pool is disabled by default (false). +void SetUseLocalContext(bool use_local_context); + +bool GetUseLocalContext(); + +// Toggles the codepath. Enabled by default (true) on supported platforms. +void SetEnabled(bool enabled); + +// Returns true if the codepath is supported and is enabled. Use this call +// before calling the compute functions. If the codepath is not supported, and +// any of the compute function is called, the library will log a FATAL error. +bool IsSupportedAndEnabled(); + +// Calculate the quantized matrix multiplication: +// +// for (i, j) in [0, m) x [0, n) do +// c_data[i, j] := +// sum((a_data[i, l] + offset_a) * (b_data[l, j] + offset_b)) : l in [0, k) +// +// If transpose_a is false the lhs operand has row major layout, otherwise +// column major. Similarily transpose_b describes the layout of the rhs operand. +// lda, ldb, and ldc are the strides of the lhs operand, rhs operand and the +// result arrays. +void QuantizedGemm(OpKernelContext* context, bool transpose_a, bool transpose_b, + const quint8* a_data, const quint8* b_data, qint32* c_data, + int m, int n, int k, int offset_a, int offset_b, int lda, + int ldb, int ldc); + +// Take an array of numbers from the range [input_min, input_max] quantized +// uniformly to int32 values, recover their float values, and then quantize +// them back uniformly to the range [output_min, output_max] as uint8. +// Saturate the uint8 values. +void Requantize(OpKernelContext* context, const qint32* input, int count, + float input_min, float input_max, float output_min, + float output_max, quint8* output); + +// Take an array of numbers from the range [range_min, range_max] quantized +// uniformly to uint8 values and recover their float values. +void Dequantize(OpKernelContext* context, const quint8* input, int count, + float range_min, float range_max, float* output); + +// Take an array of float values and quantize them uniformly to the range +// [range_min, range_max] expressed as uint8. Saturate the uint8 values. +void Quantize(OpKernelContext*, const float* input, int count, float range_min, + float range_max, quint8* output); + +// Take two arrays: the inputs and the bias quantized uniformly in the ranges +// [input_min, input_max], and [bias_min, bias_max] accordingly, as uint8 +// values. Recover their float values. Add the values. Quantize them back +// uniformly to the range [output_min, output_max] as int32. Saturate the +// int32 values. +void QuantizedBiasAdd(OpKernelContext* context, const quint8* input, + int input_count, const quint8* bias, int bias_count, + float input_min, float input_max, float bias_min, + float bias_max, float output_min, float output_max, + qint32* output); + +// Take an array of uint8 values and clamp them to the range [clamp_min, +// clamp_max]. +void Clamp(OpKernelContext* context, const quint8* input, int input_count, + quint8 clamp_min, quint8 clamp_max, quint8* output); + +} // namespace meta +} // namespace tensorflow + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_META_SUPPORT_H_ diff --git a/tensorflow/core/kernels/quantize_down_and_shrink_range.cc b/tensorflow/core/kernels/quantize_down_and_shrink_range.cc index aef5f0b6a35..9893a855877 100644 --- a/tensorflow/core/kernels/quantize_down_and_shrink_range.cc +++ b/tensorflow/core/kernels/quantize_down_and_shrink_range.cc @@ -20,11 +20,12 @@ limitations under the License. #include #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "tensorflow/core/kernels/quantization_utils.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/type_traits.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/meta_support.h" +#include "tensorflow/core/kernels/quantization_utils.h" #include "tensorflow/core/lib/core/errors.h" namespace tensorflow { @@ -48,6 +49,7 @@ class QuantizeDownAndShrinkRangeOp : public OpKernel { Tensor* output_max = nullptr; OP_REQUIRES_OK(ctx, ctx->allocate_output(2, TensorShape({}), &output_max)); + // See QuantizationRangeOp as well, which has a copy of this logic. auto input_array = input.flat(); const int32 input_lowest_quantized = static_cast(Eigen::NumTraits::lowest()); @@ -78,9 +80,17 @@ class QuantizeDownAndShrinkRangeOp : public OpKernel { #endif if (input_array.size() > 0) { - RequantizeManyInNewRangeUsingEigen( - ctx->eigen_device(), input, input_min_float, - input_max_float, actual_min_float, actual_max_float, output); + if (meta::IsSupportedAndEnabled() && std::is_same() && + std::is_same()) { + auto input_i32_array = input.flat(); + meta::Requantize(ctx, input_i32_array.data(), input_i32_array.size(), + input_min_float, input_max_float, actual_min_float, + actual_max_float, output->flat().data()); + } else { + RequantizeManyInNewRangeUsingEigen( + ctx->eigen_device(), input, input_min_float, + input_max_float, actual_min_float, actual_max_float, output); + } } output_min->flat().setConstant(actual_min_float); diff --git a/tensorflow/core/kernels/quantize_op.cc b/tensorflow/core/kernels/quantize_op.cc index 003654c1b0f..b8f0dd86425 100644 --- a/tensorflow/core/kernels/quantize_op.cc +++ b/tensorflow/core/kernels/quantize_op.cc @@ -17,11 +17,12 @@ limitations under the License. #define EIGEN_USE_THREADS -#include "tensorflow/core/kernels/quantization_utils.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/type_traits.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/meta_support.h" +#include "tensorflow/core/kernels/quantization_utils.h" #include "tensorflow/core/lib/core/errors.h" namespace { @@ -124,9 +125,15 @@ class QuantizeV2Op : public OpKernel { .template cast(); } } else if (mode_ == QUANTIZE_MODE_MIN_FIRST) { - FloatTensorToQuantizedInPlaceUsingEigen( - ctx->template eigen_device(), input, min_range, max_range, - output); + if (meta::IsSupportedAndEnabled() && std::is_same()) { + auto input_array = input.flat(); + meta::Quantize(ctx, input_array.data(), input_array.size(), min_range, + max_range, output->flat().data()); + } else { + FloatTensorToQuantizedInPlaceUsingEigen( + ctx->template eigen_device(), input, min_range, max_range, + output); + } } Tensor* output_min_tensor = nullptr; diff --git a/tensorflow/core/kernels/quantized_activation_ops.cc b/tensorflow/core/kernels/quantized_activation_ops.cc index ea1cf15f7bb..2896c3d45a7 100644 --- a/tensorflow/core/kernels/quantized_activation_ops.cc +++ b/tensorflow/core/kernels/quantized_activation_ops.cc @@ -16,10 +16,11 @@ limitations under the License. // Implements a quantized version of the Relu6 operation. #define EIGEN_USE_THREADS -#include "tensorflow/core/kernels/quantization_utils.h" #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/kernels/meta_support.h" +#include "tensorflow/core/kernels/quantization_utils.h" #include "tensorflow/core/lib/core/errors.h" namespace tensorflow { @@ -37,8 +38,16 @@ class QuantizedReluOp : public OpKernel { OP_REQUIRES_OK(context, context->allocate_output(0, input.shape(), &output)); const T min_as_quantized = FloatToQuantized(0.0f, min_input, max_input); - output->flat().device(context->eigen_cpu_device()) = - input.flat().cwiseMax(min_as_quantized).template cast(); + + if (meta::IsSupportedAndEnabled() && std::is_same()) { + auto input_ui8_array = input.flat(); + meta::Clamp(context, input_ui8_array.data(), input_ui8_array.size(), + min_as_quantized, 255, output->flat().data()); + } else { + output->flat().device(context->eigen_cpu_device()) = + input.flat().cwiseMax(min_as_quantized).template cast(); + } + Tensor* output_min = nullptr; OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min)); output_min->flat()(0) = min_input; @@ -63,11 +72,20 @@ class QuantizedRelu6Op : public OpKernel { context->allocate_output(0, input.shape(), &output)); const T min_as_quantized = FloatToQuantized(0.0f, min_input, max_input); const T max_as_quantized = FloatToQuantized(6.0f, min_input, max_input); - output->flat().device(context->eigen_cpu_device()) = - input.flat() - .cwiseMax(min_as_quantized) - .cwiseMin(max_as_quantized) - .template cast(); + + if (meta::IsSupportedAndEnabled() && std::is_same()) { + auto input_ui8_array = input.flat(); + meta::Clamp(context, input_ui8_array.data(), input_ui8_array.size(), + min_as_quantized, max_as_quantized, + output->flat().data()); + } else { + output->flat().device(context->eigen_cpu_device()) = + input.flat() + .cwiseMax(min_as_quantized) + .cwiseMin(max_as_quantized) + .template cast(); + } + Tensor* output_min = nullptr; OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min)); output_min->flat()(0) = min_input; diff --git a/tensorflow/core/kernels/quantized_bias_add_op.cc b/tensorflow/core/kernels/quantized_bias_add_op.cc index 0b34bfcad83..5457d290c25 100644 --- a/tensorflow/core/kernels/quantized_bias_add_op.cc +++ b/tensorflow/core/kernels/quantized_bias_add_op.cc @@ -15,11 +15,14 @@ limitations under the License. // Implements a quantized eight-bit version of the bias addition operation. -#include "tensorflow/core/kernels/quantization_utils.h" +#define EIGEN_USE_THREADS + #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/kernels/meta_support.h" #include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/kernels/quantization_utils.h" #include "tensorflow/core/lib/core/errors.h" namespace tensorflow { @@ -60,9 +63,23 @@ class QuantizedBiasAddOp : public OpKernel { float total_min; float total_max; - QuantizedAddUsingEigen( - context->template eigen_device(), input, input_min, - input_max, bias, bias_min, bias_max, output, &total_min, &total_max); + + if (meta::IsSupportedAndEnabled() && std::is_same() && + std::is_same() && std::is_same()) { + auto input_ui8_array = input.flat(); + auto bias_ui8_array = bias.flat(); + GetOutputMinAndMaxForQuantizedAdd(input_min, input_max, bias_min, + bias_max, &total_min, &total_max); + meta::QuantizedBiasAdd(context, input_ui8_array.data(), + input_ui8_array.size(), bias_ui8_array.data(), + bias_ui8_array.size(), input_min, input_max, + bias_min, bias_max, total_min, total_max, + output->flat().data()); + } else { + QuantizedAddUsingEigen( + context->template eigen_device(), input, input_min, + input_max, bias, bias_min, bias_max, output, &total_min, &total_max); + } Tensor* output_min = nullptr; OP_REQUIRES_OK(context, context->allocate_output(1, {}, &output_min)); diff --git a/tensorflow/core/kernels/quantized_conv_ops.cc b/tensorflow/core/kernels/quantized_conv_ops.cc index fb69d770c0b..2405c55c5b1 100644 --- a/tensorflow/core/kernels/quantized_conv_ops.cc +++ b/tensorflow/core/kernels/quantized_conv_ops.cc @@ -18,12 +18,15 @@ limitations under the License. #include #include +#define EIGEN_USE_THREADS + #include "public/gemmlowp.h" -#include "tensorflow/core/kernels/quantization_utils.h" -#include "tensorflow/core/kernels/reference_gemm.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/kernels/meta_support.h" #include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/kernels/quantization_utils.h" +#include "tensorflow/core/kernels/reference_gemm.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/util/padding.h" @@ -338,12 +341,20 @@ class Im2ColConvFunctor { const int lda = filter_value_count; const int ldb = filter_count; const int ldc = filter_count; - // The gemmlowp optimized library only works for a particular set of data - // types, so check if we meet those requirements and - // fall back to a slower reference implementation if not. - if (std::is_same() && std::is_same() && - std::is_same() && (output_offset == 0) && - (output_mult == 1) && (output_shift == 0)) { + + if (meta::IsSupportedAndEnabled() && std::is_same() && + std::is_same() && std::is_same() && + (output_offset == 0) && (output_mult == 1) && (output_shift == 0) && + (transpose_c == false)) { + meta::QuantizedGemm(op_context, transpose_a, transpose_b, + im2col_buffer.get(), filter_data, output_data, m, n, + k, -input_offset, -filter_offset, lda, ldb, ldc); + } else if (std::is_same() && std::is_same() && + std::is_same() && (output_offset == 0) && + (output_mult == 1) && (output_shift == 0)) { + // The gemmlowp optimized library only works for a particular set of data + // types, so check if we meet those requirements and + // fall back to a slower reference implementation if not. const uint8* im2col_data_as_uint8 = &(im2col_buffer.get()->value); const uint8* filter_data_as_uint8 = &(filter_data->value); int32* output_data_as_int32 = &(output_data->value); diff --git a/tensorflow/core/kernels/quantized_matmul_op.cc b/tensorflow/core/kernels/quantized_matmul_op.cc index 0ce9e376423..4abcae0d357 100644 --- a/tensorflow/core/kernels/quantized_matmul_op.cc +++ b/tensorflow/core/kernels/quantized_matmul_op.cc @@ -15,11 +15,14 @@ limitations under the License. // Implements a quantized eight-bit version of the matmul operation. +#define EIGEN_USE_THREADS + #include "public/gemmlowp.h" -#include "tensorflow/core/kernels/quantization_utils.h" -#include "tensorflow/core/kernels/reference_gemm.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/kernels/meta_support.h" +#include "tensorflow/core/kernels/quantization_utils.h" +#include "tensorflow/core/kernels/reference_gemm.h" #include "tensorflow/core/lib/core/errors.h" namespace tensorflow { @@ -125,12 +128,20 @@ class QuantizedMatMulOp : public OpKernel { const size_t ldb = b.dim_size(1); const size_t ldc = n; - // The gemmlowp optimized library only works for a particular set of data - // types, so check if we meet those requirements and - // fall back to a slower reference implementation if not. - if (std::is_same() && std::is_same() && - std::is_same() && (offset_c == 0) && (mult_c == 1) && - (shift_c == 0) && (transpose_c == false)) { + if (meta::IsSupportedAndEnabled() && std::is_same() && + std::is_same() && std::is_same() && + (offset_c == 0) && (mult_c == 1) && (shift_c == 0) && + (transpose_c == false)) { + // Gemmlowp/meta code path works on 32 & 64 bit Arm with NEON Simd and + // allows optimized quantized 8bit to 32bit gemm. + meta::QuantizedGemm(context, transpose_a_, transpose_b_, a_data, b_data, + c_data, m, n, k, offset_a, offset_b, lda, ldb, ldc); + } else if (std::is_same() && std::is_same() && + std::is_same() && (offset_c == 0) && + (mult_c == 1) && (shift_c == 0) && (transpose_c == false)) { + // The gemmlowp optimized library only works for a particular set of data + // types, so check if we meet those requirements and fall back to a slower + // reference implementation if not. if (transpose_a_) { if (transpose_b_) { GemmlowpMultiply(context, a_data, b_data, c_data, diff --git a/tensorflow/core/kernels/requantization_range_op.cc b/tensorflow/core/kernels/requantization_range_op.cc new file mode 100644 index 00000000000..1aad48763bb --- /dev/null +++ b/tensorflow/core/kernels/requantization_range_op.cc @@ -0,0 +1,80 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// See docs in ../ops/array_ops.cc. + +#define EIGEN_USE_THREADS + +#include + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/kernels/quantization_utils.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/type_traits.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/lib/core/errors.h" + +namespace tensorflow { + +typedef Eigen::ThreadPoolDevice CPUDevice; + +template +class RequantizationRangeOp : public OpKernel { + public: + explicit RequantizationRangeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext* ctx) override { + const Tensor& input = ctx->input(0); + const float input_min_float = ctx->input(1).flat()(0); + const float input_max_float = ctx->input(2).flat()(0); + Tensor* output_min = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &output_min)); + Tensor* output_max = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output(1, TensorShape({}), &output_max)); + + // See the deprecated QuantizeDownAndShrinkRangeOp as well, which has a copy + // of this logic. + auto input_array = input.flat(); + const int32 input_lowest_quantized = + static_cast(Eigen::NumTraits::lowest()); + const int32 input_highest_quantized = + static_cast(Eigen::NumTraits::highest()); + T1 actual_min_quantized = input_highest_quantized; + T1 actual_max_quantized = input_lowest_quantized; + for (int i = 0; i < input_array.size(); ++i) { + const T1 value = input_array(i); + actual_min_quantized = std::min(actual_min_quantized, value); + actual_max_quantized = std::max(actual_max_quantized, value); + } + // We want to make sure that the minimum is no larger than zero, so that the + // convolution operation can run efficiently. + const float actual_min_float = + std::min(0.0f, QuantizedToFloat(actual_min_quantized, input_min_float, + input_max_float)); + const float actual_max_float = QuantizedToFloat( + actual_max_quantized, input_min_float, input_max_float); + + output_min->flat().setConstant(actual_min_float); + output_max->flat().setConstant(actual_max_float); + } +}; + +REGISTER_KERNEL_BUILDER(Name("RequantizationRange") + .Device(DEVICE_CPU) + .TypeConstraint("Tinput"), + RequantizationRangeOp); + +} // namespace tensorflow diff --git a/tensorflow/core/kernels/requantization_range_op_test.cc b/tensorflow/core/kernels/requantization_range_op_test.cc new file mode 100644 index 00000000000..38dc3af7cca --- /dev/null +++ b/tensorflow/core/kernels/requantization_range_op_test.cc @@ -0,0 +1,66 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/fake_input.h" +#include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/node_def_builder.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/kernels/ops_testutil.h" +#include "tensorflow/core/kernels/ops_util.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { + +class RequantizationRangeTest : public OpsTestBase { + protected: +}; + +// Runs a manually generated array through the operator, and makes sure that the +// results match the expected hand-calculated values. +TEST_F(RequantizationRangeTest, HandCrafted) { + TF_ASSERT_OK(NodeDefBuilder("requantization_range", "RequantizationRange") + .Input(FakeInput(DT_QINT32)) + .Input(FakeInput(DT_FLOAT)) + .Input(FakeInput(DT_FLOAT)) + .Attr("Tinput", DataTypeToEnum::v()) + .Finalize(node_def())); + TF_ASSERT_OK(InitOp()); + + // For this test we have an input that has the theoretical range of -256.0f to + // +256.0f, but the actual values present only span -1.0f to 1.0f. We expect + // the operator to take advantage of this, and rescale the output to fill up + // the available range in the lower bit depth, and update to the true min and + // max ranges. + const int value_count = 3; + AddInputFromArray(TensorShape({value_count}), + {-(1 << 23), 0, (1 << 23)}); + AddInputFromArray(TensorShape({1}), {-256.0f}); + AddInputFromArray(TensorShape({1}), {256.0f}); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected_min(allocator(), DT_FLOAT, TensorShape({})); + test::FillValues(&expected_min, {-1.0f}); + test::ExpectTensorEqual(expected_min, *GetOutput(0)); + Tensor expected_max(allocator(), DT_FLOAT, TensorShape({})); + test::FillValues(&expected_max, {1.0f}); + test::ExpectTensorEqual(expected_max, *GetOutput(1)); +} + +} // end namespace tensorflow diff --git a/tensorflow/core/kernels/requantize.cc b/tensorflow/core/kernels/requantize.cc index 865970a99e0..fc8af1799d5 100644 --- a/tensorflow/core/kernels/requantize.cc +++ b/tensorflow/core/kernels/requantize.cc @@ -55,9 +55,10 @@ class RequantizeOp : public OpKernel { errors::InvalidArgument("requested_output_min must be <= 0, but got ", requested_output_min_float)); OP_REQUIRES( - ctx, requested_output_max_float >= 0.0f, - errors::InvalidArgument("requested_output_max must be <= 0, but got ", - requested_output_max_float)); + ctx, requested_output_max_float >= requested_output_min_float, + errors::InvalidArgument( + "requested_output_max must be >= requested_output_min, but got ", + requested_output_max_float, " and ", requested_output_min_float)); auto input_array = input.flat(); diff --git a/tensorflow/core/kernels/requantize_op_test.cc b/tensorflow/core/kernels/requantize_op_test.cc index e7674eb2946..44cacf890b6 100644 --- a/tensorflow/core/kernels/requantize_op_test.cc +++ b/tensorflow/core/kernels/requantize_op_test.cc @@ -88,10 +88,12 @@ TEST_F(RequantizeTest, InvalidOutputMax) { {-(1 << 23), 0, (1 << 23)}); AddInputFromArray(TensorShape({1}), {-256.0f}); AddInputFromArray(TensorShape({1}), {256.0f}); - AddInputFromArray(TensorShape({1}), {-1.0f}); - AddInputFromArray(TensorShape({1}), {-0.001f}); - EXPECT_EQ("requested_output_max must be <= 0, but got -0.001", - RunOpKernel().error_message()); + AddInputFromArray(TensorShape({1}), {-10.0f}); + AddInputFromArray(TensorShape({1}), {-11.0f}); + EXPECT_EQ( + "requested_output_max must be >= requested_output_min, but got -11 and " + "-10", + RunOpKernel().error_message()); } } // end namespace tensorflow diff --git a/tensorflow/core/kernels/sdca_ops.cc b/tensorflow/core/kernels/sdca_ops.cc index 63e705df438..d30e7486f51 100644 --- a/tensorflow/core/kernels/sdca_ops.cc +++ b/tensorflow/core/kernels/sdca_ops.cc @@ -167,7 +167,7 @@ class Example { // A dense vector which is a row-slice of the underlying matrix. struct DenseVector { // Returns a row slice from the matrix. - Eigen::TensorMap> row() + Eigen::TensorMap> Row() const { return Eigen::TensorMap>( data_matrix.data() + row_index * data_matrix.dimension(1), @@ -176,7 +176,7 @@ class Example { // Returns a row slice as a 1 * F matrix, where F is the number of features. Eigen::TensorMap> - row_as_matrix() const { + RowAsMatrix() const { return Eigen::TensorMap>( data_matrix.data() + row_index * data_matrix.dimension(1), 1, data_matrix.dimension(1)); @@ -228,18 +228,26 @@ class FeatureWeightsDenseStorage { const Eigen::ThreadPoolDevice& device, const Example::DenseVector& dense_vector, const std::vector& normalized_bounded_dual_delta) { - // Transform the dual vector into a column matrix. - const Eigen::TensorMap> - dual_matrix(normalized_bounded_dual_delta.data(), - normalized_bounded_dual_delta.size(), 1); - const Eigen::array, 1> product_dims = { - Eigen::IndexPair(1, 0)}; - // This essentially computes delta_w += delta_vector / \lamdba * N. - deltas_.device(device) = - (deltas_.cast() + - dual_matrix.contract(dense_vector.row_as_matrix().cast(), - product_dims)) - .cast(); + const size_t num_weight_vectors = normalized_bounded_dual_delta.size(); + if (num_weight_vectors == 1) { + deltas_.device(device) = + deltas_ + + dense_vector.RowAsMatrix() * + deltas_.constant(normalized_bounded_dual_delta[0]); + } else { + // Transform the dual vector into a column matrix. + const Eigen::TensorMap> + dual_matrix(normalized_bounded_dual_delta.data(), num_weight_vectors, + 1); + const Eigen::array, 1> product_dims = { + Eigen::IndexPair(1, 0)}; + // This essentially computes delta_w += delta_vector / \lamdba * N. + deltas_.device(device) = + (deltas_.cast() + + dual_matrix.contract(dense_vector.RowAsMatrix().cast(), + product_dims)) + .cast(); + } } private: @@ -456,19 +464,37 @@ const ExampleStatistics Example::ComputeWxAndWeightedExampleNorm( dense_weights.nominals() + dense_weights.deltas() * dense_weights.deltas().constant(num_loss_partitions); - const Eigen::array, 1> product_dims = { - Eigen::IndexPair(1, 1)}; - const Eigen::Tensor prev_prediction = - regularization.EigenShrinkMatrix(dense_weights.nominals()) - .contract(dense_vector.row_as_matrix(), product_dims); - const Eigen::Tensor prediction = - regularization.EigenShrinkMatrix(feature_weights) - .contract(dense_vector.row_as_matrix(), product_dims); - // The result of "tensor contraction" (multiplication) in the code - // above is of dimension num_weight_vectors * 1. - for (int l = 0; l < num_weight_vectors; ++l) { - result.prev_wx[l] += prev_prediction(l, 0); - result.wx[l] += prediction(l, 0); + if (num_weight_vectors == 1) { + const Eigen::Tensor prev_prediction = + (dense_vector.Row() * + regularization.EigenShrinkVector( + Eigen::TensorMap>( + dense_weights.nominals().data(), + dense_weights.nominals().dimension(1)))) + .sum(); + const Eigen::Tensor prediction = + (dense_vector.Row() * + regularization.EigenShrinkVector( + Eigen::TensorMap>( + feature_weights.data(), feature_weights.dimension(1)))) + .sum(); + result.prev_wx[0] += prev_prediction(); + result.wx[0] += prediction(); + } else { + const Eigen::array, 1> product_dims = { + Eigen::IndexPair(1, 1)}; + const Eigen::Tensor prev_prediction = + regularization.EigenShrinkMatrix(dense_weights.nominals()) + .contract(dense_vector.RowAsMatrix(), product_dims); + const Eigen::Tensor prediction = + regularization.EigenShrinkMatrix(feature_weights) + .contract(dense_vector.RowAsMatrix(), product_dims); + // The result of "tensor contraction" (multiplication) in the code + // above is of dimension num_weight_vectors * 1. + for (int l = 0; l < num_weight_vectors; ++l) { + result.prev_wx[l] += prev_prediction(l, 0); + result.wx[l] += prediction(l, 0); + } } } @@ -824,7 +850,7 @@ void Examples::ComputeSquaredNormPerExample( } for (int j = 0; j < num_dense_features; ++j) { const Eigen::Tensor sn = - example->dense_vectors_[j]->row().square().sum(); + example->dense_vectors_[j]->Row().square().sum(); squared_norm += sn(); } example->squared_norm_ = squared_norm; diff --git a/tensorflow/core/kernels/sdca_ops_test.cc b/tensorflow/core/kernels/sdca_ops_test.cc index 9ddbd817e19..400f330ce7b 100644 --- a/tensorflow/core/kernels/sdca_ops_test.cc +++ b/tensorflow/core/kernels/sdca_ops_test.cc @@ -232,6 +232,17 @@ void BM_SDCA(const int iters, const int num_examples) { test::Benchmark("cpu", train, GetSingleThreadedOptions(), init).Run(iters); } +void BM_SDCA_LARGE_DENSE(const int iters, const int num_examples) { + testing::StopTiming(); + Graph* init = nullptr; + Graph* train = nullptr; + GetGraphs(num_examples, 0 /* sparse feature groups */, + 0 /* sparse features per group */, 5 /* dense feature groups*/, + 200000 /* dense features per group */, &init, &train); + testing::StartTiming(); + test::Benchmark("cpu", train, GetSingleThreadedOptions(), init).Run(iters); +} + void BM_SDCA_LARGE_SPARSE(const int iters, const int num_examples) { testing::StopTiming(); Graph* init = nullptr; @@ -242,10 +253,10 @@ void BM_SDCA_LARGE_SPARSE(const int iters, const int num_examples) { testing::StartTiming(); test::Benchmark("cpu", train, GetMultiThreadedOptions(), init).Run(iters); } - } // namespace BENCHMARK(BM_SDCA)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); +BENCHMARK(BM_SDCA_LARGE_DENSE)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); BENCHMARK(BM_SDCA_LARGE_SPARSE)->Arg(128)->Arg(256)->Arg(512)->Arg(1024); } // namespace tensorflow diff --git a/tensorflow/core/kernels/softmax_op.cc b/tensorflow/core/kernels/softmax_op.cc index 8ec8409e21d..c7ae93852f8 100644 --- a/tensorflow/core/kernels/softmax_op.cc +++ b/tensorflow/core/kernels/softmax_op.cc @@ -65,6 +65,9 @@ REGISTER_KERNEL_BUILDER( REGISTER_KERNEL_BUILDER( Name("Softmax").Device(DEVICE_GPU).TypeConstraint("T"), SoftmaxOp); +REGISTER_KERNEL_BUILDER( + Name("Softmax").Device(DEVICE_GPU).TypeConstraint("T"), + SoftmaxOp); REGISTER_KERNEL_BUILDER( Name("LogSoftmax").Device(DEVICE_GPU).TypeConstraint("T"), SoftmaxOp); diff --git a/tensorflow/core/kernels/softmax_op_gpu.cu.cc b/tensorflow/core/kernels/softmax_op_gpu.cu.cc index 8c26a66a3c3..3f7dd383c60 100644 --- a/tensorflow/core/kernels/softmax_op_gpu.cu.cc +++ b/tensorflow/core/kernels/softmax_op_gpu.cu.cc @@ -41,6 +41,7 @@ struct SoftmaxFunctor { // Instantiate the GPU implementation for float. template struct functor::SoftmaxFunctor; template struct functor::SoftmaxFunctor; +template struct functor::SoftmaxFunctor; } // end namespace tensorflow diff --git a/tensorflow/core/kernels/sparse_matmul_op.cc b/tensorflow/core/kernels/sparse_matmul_op.cc index cf17efaf01e..e5b0b6fcd21 100644 --- a/tensorflow/core/kernels/sparse_matmul_op.cc +++ b/tensorflow/core/kernels/sparse_matmul_op.cc @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/fill_functor.h" #include "tensorflow/core/lib/core/blocking_counter.h" #include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/gtl/stl_util.h" @@ -852,6 +853,15 @@ class SparseMatMulOp : public OpKernel { b.shape().DebugString())); Tensor* output = nullptr; OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({m, n}), &output)); + + if (k == 0) { + // If the inner dimension k in the matrix multiplication is zero, we fill + // the output with zeros. + functor::SetZeroFunctor f; + f(ctx->eigen_device(), output->flat()); + return; + } + auto out = output->matrix(); std::unique_ptr a_float; diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc index c6c4f191b86..6cbcbf9fd95 100644 --- a/tensorflow/core/kernels/strided_slice_op.cc +++ b/tensorflow/core/kernels/strided_slice_op.cc @@ -295,21 +295,16 @@ class StridedSliceAssignOp : public OpKernel { // 0-dimensional case implies the left and right are exactly the same // scalar shape - if (processing_shape.dims() == 0) { - functor::DenseUpdate copy; - copy(context->eigen_device(), old_lhs.flat(), - input.flat()); - return; - } // Handle general dimensions -#define HANDLE_DIM(NDIM) \ - if (processing_dims == NDIM) { \ - HandleStridedSliceAssignCase(context, begin, end, \ - strides, processing_shape, \ - is_simple_slice, &old_lhs); \ - return; \ +#define HANDLE_DIM(NDIM) \ + if (processing_dims == NDIM) { \ + HandleStridedSliceAssignCase()( \ + context, begin, end, strides, processing_shape, is_simple_slice, \ + &old_lhs); \ + return; \ } + HANDLE_DIM(0); HANDLE_DIM(1); HANDLE_DIM(2); HANDLE_DIM(3); @@ -377,7 +372,15 @@ REGISTER_STRIDED_SLICE(bfloat16); .HostMemory("end") \ .HostMemory("strides") \ .TypeConstraint("Index"), \ - StridedSliceGradOp) + StridedSliceGradOp) \ + REGISTER_KERNEL_BUILDER(Name("StridedSliceAssign") \ + .Device(DEVICE_GPU) \ + .TypeConstraint("T") \ + .HostMemory("begin") \ + .HostMemory("end") \ + .HostMemory("strides") \ + .TypeConstraint("Index"), \ + StridedSliceAssignOp) TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU); @@ -405,7 +408,15 @@ REGISTER_KERNEL_BUILDER(Name("StridedSliceGrad") .HostMemory("dy") .HostMemory("output"), StridedSliceGradOp); - +REGISTER_KERNEL_BUILDER(Name("StridedSliceAssign") + .Device(DEVICE_GPU) + .TypeConstraint("T") + .TypeConstraint("Index") + .HostMemory("ref") + .HostMemory("begin") + .HostMemory("end") + .HostMemory("strides"), + StridedSliceAssignOp) #undef REGISTER_GPU #endif // GOOGLE_CUDA diff --git a/tensorflow/core/kernels/strided_slice_op.h b/tensorflow/core/kernels/strided_slice_op.h index 098f5379d5f..13128e67fb6 100644 --- a/tensorflow/core/kernels/strided_slice_op.h +++ b/tensorflow/core/kernels/strided_slice_op.h @@ -116,6 +116,14 @@ struct StridedSliceAssign { } }; +template +struct StridedSliceAssignScalar { + void operator()(const Device& d, typename TTypes::Tensor output, + typename TTypes::ConstTensor input) { + output.device(d) = input; + } +}; + } // namespace functor } // namespace tensorflow diff --git a/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc b/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc index 75b4b324190..e8f75cf38d0 100644 --- a/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc +++ b/tensorflow/core/kernels/strided_slice_op_gpu.cu.cc @@ -45,7 +45,8 @@ typedef Eigen::GpuDevice GPUDevice; template struct functor::StridedSliceAssign; \ template struct functor::StridedSliceAssign; \ template struct functor::StridedSliceAssign; \ - template struct functor::StridedSliceAssign; + template struct functor::StridedSliceAssign; \ + template struct functor::StridedSliceAssignScalar; TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS); DEFINE_GPU_KERNELS(int32); diff --git a/tensorflow/core/kernels/strided_slice_op_impl.h b/tensorflow/core/kernels/strided_slice_op_impl.h index b1b5d2df3eb..e89d1920b9c 100644 --- a/tensorflow/core/kernels/strided_slice_op_impl.h +++ b/tensorflow/core/kernels/strided_slice_op_impl.h @@ -27,6 +27,7 @@ limitations under the License. #include "tensorflow/core/framework/register_types_traits.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/kernels/bounds_check.h" +#include "tensorflow/core/kernels/dense_update_ops.h" #include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/gtl/array_slice.h" @@ -51,12 +52,14 @@ void HandleStridedSliceGradCase(OpKernelContext* context, bool is_simple_slice, Tensor* result); template -void HandleStridedSliceAssignCase(OpKernelContext* context, - const gtl::ArraySlice& begin, - const gtl::ArraySlice& end, - const gtl::ArraySlice& strides, - const TensorShape& processing_shape, - bool is_simple_slice, Tensor* result); +class HandleStridedSliceAssignCase { + public: + void operator()(OpKernelContext* context, const gtl::ArraySlice& begin, + const gtl::ArraySlice& end, + const gtl::ArraySlice& strides, + const TensorShape& processing_shape, bool is_simple_slice, + Tensor* result); +}; } // namespace tensorflow // The actual implementation. This is designed so multiple @@ -134,12 +137,10 @@ void HandleStridedSliceGradCase(OpKernelContext* context, } template -void HandleStridedSliceAssignCase(OpKernelContext* context, - const gtl::ArraySlice& begin, - const gtl::ArraySlice& end, - const gtl::ArraySlice& strides, - const TensorShape& processing_shape, - bool is_simple_slice, Tensor* result) { +void HandleStridedSliceAssignCase::operator()( + OpKernelContext* context, const gtl::ArraySlice& begin, + const gtl::ArraySlice& end, const gtl::ArraySlice& strides, + const TensorShape& processing_shape, bool is_simple_slice, Tensor* result) { gtl::InlinedVector processing_dims = processing_shape.dim_sizes(); typedef typename proxy_type::type Proxy; Eigen::DSizes begin_di; @@ -156,14 +157,34 @@ void HandleStridedSliceAssignCase(OpKernelContext* context, begin_di, end_di, strides_di); } +template +class HandleStridedSliceAssignCase { + public: + enum { NDIM_PROXY = 1 }; + void operator()(OpKernelContext* context, const gtl::ArraySlice& begin, + const gtl::ArraySlice& end, + const gtl::ArraySlice& strides, + const TensorShape& processing_shape, bool is_simple_slice, + Tensor* result) { + gtl::InlinedVector processing_dims(1); + processing_dims[0] = 1; + + typedef typename proxy_type::type Proxy; + functor::StridedSliceAssignScalar()( + context->eigen_device(), + result->bit_casted_shaped(processing_dims), + context->input(4).bit_casted_shaped(processing_dims)); + } +}; + // NODE(aselle): according to bsteiner, we need this because otherwise // nvcc instantiates templates that are invalid. strided_slice_op_gpu.cu // handles instantiates externally. It is important that this is done# // before the HandleXXCase's are instantiated to avoid duplicate // specialization errors. -#if GOOGLE_CUDA -#define PREVENT_INSTANTIATE(T, NDIM) \ + +#define PREVENT_INSTANTIATE_DIM1_AND_UP(T, NDIM) \ namespace functor { \ template <> \ void StridedSlice::operator()( \ @@ -197,12 +218,28 @@ void HandleStridedSliceAssignCase(OpKernelContext* context, const Eigen::DSizes& strides); \ extern template struct StridedSliceAssign; \ } // namespace functor +#define PREVENT_INSTANTIATE_DIM0_ONLY(T, NDIM) \ + namespace functor { \ + template <> \ + void StridedSliceAssignScalar::operator()( \ + const GPUDevice& d, typename TTypes::Tensor output, \ + typename TTypes::ConstTensor input); \ + extern template struct StridedSliceAssignScalar; \ + } // namespace functor +// Dimension 0 only instantiates some functors. So we only need +// to prevent ones defined by PREVENT_INSTANTIATE_DIM0_ONLY +#if GOOGLE_CUDA +#if STRIDED_SLICE_INSTANTIATE_DIM == 0 +#define PREVENT_INSTANTIATE(T, NDIM) PREVENT_INSTANTIATE_DIM0_ONLY(T, NDIM) +#else +#define PREVENT_INSTANTIATE(T, NDIM) PREVENT_INSTANTIATE_DIM1_AND_UP(T, NDIM) +#endif #else #define PREVENT_INSTANTIATE(T, NDIM) #endif -#define INSTANTIATE(DEVICE, T, DIM) \ +#define INSTANTIATE_DIM1_AND_UP_HANDLERS(DEVICE, T, DIM) \ template void HandleStridedSliceCase( \ OpKernelContext * context, const gtl::ArraySlice& begin, \ const gtl::ArraySlice& end, \ @@ -210,18 +247,25 @@ void HandleStridedSliceAssignCase(OpKernelContext* context, const TensorShape& processing_shape, bool is_simple_slice, \ Tensor* result); \ template void HandleStridedSliceGradCase( \ - OpKernelContext * context, const gtl::ArraySlice& begin, \ - const gtl::ArraySlice& end, \ - const gtl::ArraySlice& strides, \ - const TensorShape& processing_shape, bool is_simple_slice, \ - Tensor* result); \ - template void HandleStridedSliceAssignCase( \ OpKernelContext * context, const gtl::ArraySlice& begin, \ const gtl::ArraySlice& end, \ const gtl::ArraySlice& strides, \ const TensorShape& processing_shape, bool is_simple_slice, \ Tensor* result); +#define INSTANTIATE_DIM0_AND_UP_HANDLERS(DEVICE, T, DIM) \ + template class HandleStridedSliceAssignCase; + +// Only some kernels need to be instantiated on dim 0. +#if STRIDED_SLICE_INSTANTIATE_DIM == 0 +#define INSTANTIATE(DEVICE, T, DIM) \ + INSTANTIATE_DIM0_AND_UP_HANDLERS(DEVICE, T, DIM) +#else +#define INSTANTIATE(DEVICE, T, DIM) \ + INSTANTIATE_DIM0_AND_UP_HANDLERS(DEVICE, T, DIM) \ + INSTANTIATE_DIM1_AND_UP_HANDLERS(DEVICE, T, DIM) +#endif + #define DECLARE_FOR_N_CPU(T) \ INSTANTIATE(CPUDevice, T, STRIDED_SLICE_INSTANTIATE_DIM) diff --git a/tensorflow/core/kernels/strided_slice_op_inst_0.cc b/tensorflow/core/kernels/strided_slice_op_inst_0.cc new file mode 100644 index 00000000000..48b52442d65 --- /dev/null +++ b/tensorflow/core/kernels/strided_slice_op_inst_0.cc @@ -0,0 +1,23 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#define EIGEN_USE_THREADS +#if GOOGLE_CUDA +#define EIGEN_USE_GPU +#endif + +#define STRIDED_SLICE_INSTANTIATE_DIM 0 +#include "tensorflow/core/kernels/strided_slice_op_impl.h" +#undef STRIDED_SLICE_INSTANTIATE_DIM diff --git a/tensorflow/core/lib/core/status.h b/tensorflow/core/lib/core/status.h index 814f76cb938..734ea91c80f 100644 --- a/tensorflow/core/lib/core/status.h +++ b/tensorflow/core/lib/core/status.h @@ -110,7 +110,7 @@ typedef std::function StatusCallback; // DEBUG only version of TF_CHECK_OK. Compiler still parses 'val' even in opt // mode. -#ifdef NDEBUG +#ifndef NDEBUG #define TF_DCHECK_OK(val) TF_CHECK_OK(val) #else #define TF_DCHECK_OK(val) \ diff --git a/tensorflow/core/lib/gtl/flatmap.h b/tensorflow/core/lib/gtl/flatmap.h new file mode 100644 index 00000000000..c66bc47168a --- /dev/null +++ b/tensorflow/core/lib/gtl/flatmap.h @@ -0,0 +1,349 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_CORE_LIB_GTL_FLATMAP_H_ +#define THIRD_PARTY_TENSORFLOW_CORE_LIB_GTL_FLATMAP_H_ + +#include +#include +#include "tensorflow/core/lib/gtl/flatrep.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { +namespace gtl { + +// FlatMap provides a map from K to V. +// +// The map is implemented using an open-addressed hash table. A +// single array holds entire map contents and collisions are resolved +// by probing at a sequence of locations in the array. +template > +class FlatMap { + private: + // Forward declare some internal types needed in public section. + struct Bucket; + + public: + typedef Key key_type; + typedef Val mapped_type; + typedef Hash hasher; + typedef Eq key_equal; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + + // We cannot use std::pair<> since internal representation stores + // keys and values in separate arrays, so we make a custom struct + // that holds references to the internal key, value elements. + struct value_type { + typedef Key first_type; + typedef Val second_type; + + const Key& first; + Val& second; + value_type(const Key& k, Val& v) : first(k), second(v) {} + }; + typedef value_type* pointer; + typedef const value_type* const_pointer; + typedef value_type& reference; + typedef const value_type& const_reference; + + FlatMap() : FlatMap(1) {} + + explicit FlatMap(size_t N, const Hash& hf = Hash(), const Eq& eq = Eq()) + : rep_(N, hf, eq) {} + + FlatMap(const FlatMap& src) : rep_(src.rep_) {} + + template + FlatMap(InputIter first, InputIter last, size_t N = 1, + const Hash& hf = Hash(), const Eq& eq = Eq()) + : FlatMap(N, hf, eq) { + insert(first, last); + } + + FlatMap& operator=(const FlatMap& src) { + rep_.CopyFrom(src.rep_); + return *this; + } + + ~FlatMap() {} + + void swap(FlatMap& x) { rep_.swap(x.rep_); } + void clear_no_resize() { rep_.clear_no_resize(); } + void clear() { rep_.clear(); } + void reserve(size_t N) { rep_.Resize(std::max(N, size())); } + void rehash(size_t N) { rep_.Resize(std::max(N, size())); } + void resize(size_t N) { rep_.Resize(std::max(N, size())); } + size_t size() const { return rep_.size(); } + bool empty() const { return size() == 0; } + size_t bucket_count() const { return rep_.bucket_count(); } + hasher hash_function() const { return rep_.hash_function(); } + key_equal key_eq() const { return rep_.key_eq(); } + + class iterator { + public: + iterator() : b_(nullptr), end_(nullptr), i_(0) {} + + // Make iterator pointing at first element at or after b. + explicit iterator(Bucket* b, Bucket* end) : b_(b), end_(end), i_(0) { + SkipUnused(); + } + + // Make iterator pointing exactly at ith element in b, which must exist. + iterator(Bucket* b, Bucket* end, uint32 i) : b_(b), end_(end), i_(i) { + FillValue(); + } + + value_type& operator*() { return *val(); } + value_type* operator->() { return val(); } + bool operator==(const iterator& x) const { + return b_ == x.b_ && i_ == x.i_; + } + bool operator!=(const iterator& x) const { return !(*this == x); } + iterator& operator++() { + DCHECK(b_ != end_); + i_++; + SkipUnused(); + return *this; + } + + private: + friend class FlatMap; + Bucket* b_; + Bucket* end_; + uint32 i_; + char space_[sizeof(value_type)]; + + value_type* val() { return reinterpret_cast(space_); } + void FillValue() { new (space_) value_type(b_->key(i_), b_->val(i_)); } + void SkipUnused() { + while (b_ < end_) { + if (i_ >= Rep::kWidth) { + i_ = 0; + b_++; + } else if (b_->marker[i_] < 2) { + i_++; + } else { + FillValue(); + break; + } + } + } + }; + + class const_iterator { + private: + mutable iterator rep_; // Share state and logic with non-const iterator. + public: + const_iterator() : rep_() {} + explicit const_iterator(Bucket* start, Bucket* end) : rep_(start, end) {} + const_iterator(Bucket* b, Bucket* end, uint32 i) : rep_(b, end, i) {} + + const value_type& operator*() const { return *rep_.val(); } + const value_type* operator->() const { return rep_.val(); } + bool operator==(const const_iterator& x) const { return rep_ == x.rep_; } + bool operator!=(const const_iterator& x) const { return rep_ != x.rep_; } + const_iterator& operator++() { + ++rep_; + return *this; + } + }; + + iterator begin() { return iterator(rep_.start(), rep_.limit()); } + iterator end() { return iterator(rep_.limit(), rep_.limit()); } + const_iterator begin() const { + return const_iterator(rep_.start(), rep_.limit()); + } + const_iterator end() const { + return const_iterator(rep_.limit(), rep_.limit()); + } + + size_t count(const Key& k) const { return rep_.Find(k).found ? 1 : 0; } + iterator find(const Key& k) { + auto r = rep_.Find(k); + return r.found ? iterator(r.b, rep_.limit(), r.index) : end(); + } + const_iterator find(const Key& k) const { + auto r = rep_.Find(k); + return r.found ? const_iterator(r.b, rep_.limit(), r.index) : end(); + } + + Val& at(const Key& k) { + auto r = rep_.Find(k); + DCHECK(r.found); + return r.b->val(r.index); + } + const Val& at(const Key& k) const { + auto r = rep_.Find(k); + DCHECK(r.found); + return r.b->val(r.index); + } + + template + std::pair insert(const P& p) { + return Insert(p.first, p.second); + } + std::pair insert(const std::pair& p) { + return Insert(p.first, p.second); + } + template + void insert(InputIter first, InputIter last) { + for (; first != last; ++first) { + insert(*first); + } + } + + Val& operator[](const Key& k) { return IndexOp(k); } + Val& operator[](Key&& k) { return IndexOp(std::forward(k)); } + + template + std::pair emplace(Args&&... args) { + return InsertPair(std::make_pair(std::forward(args)...)); + } + + size_t erase(const Key& k) { + auto r = rep_.Find(k); + if (!r.found) return 0; + rep_.Erase(r.b, r.index); + return 1; + } + iterator erase(iterator pos) { + rep_.Erase(pos.b_, pos.i_); + ++pos; + return pos; + } + iterator erase(iterator pos, iterator last) { + for (; pos != last; ++pos) { + rep_.Erase(pos.b_, pos.i_); + } + return pos; + } + + std::pair equal_range(const Key& k) { + auto pos = find(k); + if (pos == end()) { + return std::make_pair(pos, pos); + } else { + auto next = pos; + ++next; + return std::make_pair(pos, next); + } + } + std::pair equal_range(const Key& k) const { + auto pos = find(k); + if (pos == end()) { + return std::make_pair(pos, pos); + } else { + auto next = pos; + ++next; + return std::make_pair(pos, next); + } + } + + bool operator==(const FlatMap& x) const { + if (size() != x.size()) return false; + for (auto& p : x) { + auto i = find(p.first); + if (i == end()) return false; + if (i->second != p.second) return false; + } + return true; + } + bool operator!=(const FlatMap& x) const { return !(*this == x); } + + // If key exists in the table, prefetch the associated value. This + // is a hint, and may have no effect. + void prefetch_value(const Key& key) const { rep_.Prefetch(key); } + + private: + using Rep = internal::FlatRep; + + // Bucket stores kWidth triples. + // The data is organized as three parallel arrays to reduce padding. + struct Bucket { + uint8 marker[Rep::kWidth]; + + // Wrap keys and values in union to control construction and destruction. + union Storage { + struct { + Key key[Rep::kWidth]; + Val val[Rep::kWidth]; + }; + Storage() {} + ~Storage() {} + } storage; + + Key& key(uint32 i) { + DCHECK_GE(marker[i], 2); + return storage.key[i]; + } + Val& val(uint32 i) { + DCHECK_GE(marker[i], 2); + return storage.val[i]; + } + template + void InitVal(uint32 i, V&& v) { + new (&storage.val[i]) Val(std::forward(v)); + } + void Destroy(uint32 i) { + storage.key[i].Key::~Key(); + storage.val[i].Val::~Val(); + } + void MoveFrom(uint32 i, Bucket* src, uint32 src_index) { + new (&storage.key[i]) Key(std::move(src->storage.key[src_index])); + new (&storage.val[i]) Val(std::move(src->storage.val[src_index])); + } + void CopyFrom(uint32 i, Bucket* src, uint32 src_index) { + new (&storage.key[i]) Key(src->storage.key[src_index]); + new (&storage.val[i]) Val(src->storage.val[src_index]); + } + }; + + template + std::pair InsertPair(Pair&& p) { + return Insert(std::forward(p.first), + std::forward(p.second)); + } + + template + std::pair Insert(K&& k, V&& v) { + rep_.MaybeResize(); + auto r = rep_.FindOrInsert(std::forward(k)); + const bool inserted = !r.found; + if (inserted) { + r.b->InitVal(r.index, std::forward(v)); + } + return {iterator(r.b, rep_.limit(), r.index), inserted}; + } + + template + Val& IndexOp(K&& k) { + rep_.MaybeResize(); + auto r = rep_.FindOrInsert(std::forward(k)); + Val* vptr = &r.b->val(r.index); + if (!r.found) { + new (vptr) Val(); // Initialize value in new slot. + } + return *vptr; + } + + Rep rep_; +}; + +} // namespace gtl +} // namespace tensorflow + +#endif // THIRD_PARTY_TENSORFLOW_CORE_LIB_GTL_FLATMAP_H_ diff --git a/tensorflow/core/lib/gtl/flatmap_test.cc b/tensorflow/core/lib/gtl/flatmap_test.cc new file mode 100644 index 00000000000..2fa610b7e12 --- /dev/null +++ b/tensorflow/core/lib/gtl/flatmap_test.cc @@ -0,0 +1,576 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/lib/gtl/flatmap.h" + +#include +#include +#include +#include "tensorflow/core/lib/hash/hash.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { +namespace gtl { +namespace { + +typedef FlatMap NumMap; + +// If map has an entry for k, return the corresponding value, else return def. +int32 Get(const NumMap& map, int64 k, int32 def = -1) { + auto iter = map.find(k); + if (iter == map.end()) { + EXPECT_EQ(map.count(k), 0); + return def; + } else { + EXPECT_EQ(map.count(k), 1); + EXPECT_EQ(&map.at(k), &iter->second); + EXPECT_EQ(iter->first, k); + return iter->second; + } +} + +// Return contents of map as a sorted list of pairs. +typedef std::vector> NumMapContents; +NumMapContents Contents(const NumMap& map) { + NumMapContents result; + for (const auto& p : map) { + result.push_back({p.first, p.second}); + } + std::sort(result.begin(), result.end()); + return result; +} + +// Fill entries with keys [start,limit). +void Fill(NumMap* map, int64 start, int64 limit) { + for (int64 i = start; i < limit; i++) { + map->insert({i, i * 100}); + } +} + +TEST(FlatMapTest, Find) { + NumMap map; + EXPECT_EQ(Get(map, 1), -1); + map.insert({1, 100}); + map.insert({2, 200}); + EXPECT_EQ(Get(map, 1), 100); + EXPECT_EQ(Get(map, 2), 200); + EXPECT_EQ(Get(map, 3), -1); +} + +TEST(FlatMapTest, Insert) { + NumMap map; + EXPECT_EQ(Get(map, 1), -1); + + // New entry. + auto result = map.insert({1, 100}); + EXPECT_TRUE(result.second); + EXPECT_EQ(result.first->first, 1); + EXPECT_EQ(result.first->second, 100); + EXPECT_EQ(Get(map, 1), 100); + + // Attempt to insert over existing entry. + result = map.insert({1, 200}); + EXPECT_FALSE(result.second); + EXPECT_EQ(result.first->first, 1); + EXPECT_EQ(result.first->second, 100); + EXPECT_EQ(Get(map, 1), 100); + + // Overwrite through iterator. + result.first->second = 300; + EXPECT_EQ(result.first->second, 300); + EXPECT_EQ(Get(map, 1), 300); + + // Should get updated value. + result = map.insert({1, 400}); + EXPECT_FALSE(result.second); + EXPECT_EQ(result.first->first, 1); + EXPECT_EQ(result.first->second, 300); + EXPECT_EQ(Get(map, 1), 300); +} + +TEST(FlatMapTest, InsertGrowth) { + NumMap map; + const int n = 100; + Fill(&map, 0, 100); + EXPECT_EQ(map.size(), n); + for (int i = 0; i < n; i++) { + EXPECT_EQ(Get(map, i), i * 100) << i; + } +} + +TEST(FlatMapTest, Emplace) { + NumMap map; + + // New entry. + auto result = map.emplace(1, 100); + EXPECT_TRUE(result.second); + EXPECT_EQ(result.first->first, 1); + EXPECT_EQ(result.first->second, 100); + EXPECT_EQ(Get(map, 1), 100); + + // Attempt to insert over existing entry. + result = map.emplace(1, 200); + EXPECT_FALSE(result.second); + EXPECT_EQ(result.first->first, 1); + EXPECT_EQ(result.first->second, 100); + EXPECT_EQ(Get(map, 1), 100); + + // Overwrite through iterator. + result.first->second = 300; + EXPECT_EQ(result.first->second, 300); + EXPECT_EQ(Get(map, 1), 300); + + // Update a second value + result = map.emplace(2, 400); + EXPECT_TRUE(result.second); + EXPECT_EQ(result.first->first, 2); + EXPECT_EQ(result.first->second, 400); + EXPECT_EQ(Get(map, 2), 400); +} + +TEST(FlatMapTest, EmplaceUniquePtr) { + FlatMap, HashInt64> smap; + smap.emplace(1, std::unique_ptr(new string("hello"))); +} + +TEST(FlatMapTest, Size) { + NumMap map; + EXPECT_EQ(map.size(), 0); + + map.insert({1, 100}); + map.insert({2, 200}); + EXPECT_EQ(map.size(), 2); +} + +TEST(FlatMapTest, Empty) { + NumMap map; + EXPECT_TRUE(map.empty()); + + map.insert({1, 100}); + map.insert({2, 200}); + EXPECT_FALSE(map.empty()); +} + +TEST(FlatMapTest, ArrayOperator) { + NumMap map; + + // Create new element if not found. + auto v1 = &map[1]; + EXPECT_EQ(*v1, 0); + EXPECT_EQ(Get(map, 1), 0); + + // Write through returned reference. + *v1 = 100; + EXPECT_EQ(map[1], 100); + EXPECT_EQ(Get(map, 1), 100); + + // Reuse existing element if found. + auto v1a = &map[1]; + EXPECT_EQ(v1, v1a); + EXPECT_EQ(*v1, 100); + + // Create another element. + map[2] = 200; + EXPECT_EQ(Get(map, 1), 100); + EXPECT_EQ(Get(map, 2), 200); +} + +TEST(FlatMapTest, Count) { + NumMap map; + EXPECT_EQ(map.count(1), 0); + EXPECT_EQ(map.count(2), 0); + + map.insert({1, 100}); + EXPECT_EQ(map.count(1), 1); + EXPECT_EQ(map.count(2), 0); + + map.insert({2, 200}); + EXPECT_EQ(map.count(1), 1); + EXPECT_EQ(map.count(2), 1); +} + +TEST(FlatMapTest, Iter) { + NumMap map; + EXPECT_EQ(Contents(map), NumMapContents()); + + map.insert({1, 100}); + map.insert({2, 200}); + EXPECT_EQ(Contents(map), NumMapContents({{1, 100}, {2, 200}})); +} + +TEST(FlatMapTest, Erase) { + NumMap map; + EXPECT_EQ(map.erase(1), 0); + map[1] = 100; + map[2] = 200; + EXPECT_EQ(map.erase(3), 0); + EXPECT_EQ(map.erase(1), 1); + EXPECT_EQ(map.size(), 1); + EXPECT_EQ(Get(map, 2), 200); + EXPECT_EQ(Contents(map), NumMapContents({{2, 200}})); + EXPECT_EQ(map.erase(2), 1); + EXPECT_EQ(Contents(map), NumMapContents()); +} + +TEST(FlatMapTest, EraseIter) { + NumMap map; + Fill(&map, 1, 11); + size_t size = 10; + for (auto iter = map.begin(); iter != map.end();) { + iter = map.erase(iter); + size--; + EXPECT_EQ(map.size(), size); + } + EXPECT_EQ(Contents(map), NumMapContents()); +} + +TEST(FlatMapTest, EraseIterPair) { + NumMap map; + Fill(&map, 1, 11); + NumMap expected; + auto p1 = map.begin(); + expected.insert(*p1); + ++p1; + expected.insert(*p1); + ++p1; + auto p2 = map.end(); + EXPECT_EQ(map.erase(p1, p2), map.end()); + EXPECT_EQ(map.size(), 2); + EXPECT_EQ(Contents(map), Contents(expected)); +} + +TEST(FlatMapTest, EraseLongChains) { + // Make a map with lots of elements and erase a bunch of them to ensure + // that we are likely to hit them on future lookups. + NumMap map; + const int num = 128; + Fill(&map, 0, num); + for (int i = 0; i < num; i += 3) { + EXPECT_EQ(map.erase(i), 1); + } + for (int i = 0; i < num; i++) { + if ((i % 3) != 0) { + EXPECT_EQ(Get(map, i), i * 100); + } else { + EXPECT_EQ(map.count(i), 0); + } + } + + // Erase remainder to trigger table shrinking. + const size_t orig_buckets = map.bucket_count(); + for (int i = 0; i < num; i++) { + map.erase(i); + } + EXPECT_TRUE(map.empty()); + EXPECT_EQ(map.bucket_count(), orig_buckets); + map[1] = 100; // Actual shrinking is triggered by an insert. + EXPECT_LT(map.bucket_count(), orig_buckets); +} + +TEST(FlatMap, AlternatingInsertRemove) { + NumMap map; + map.insert({1000, 1000}); + map.insert({2000, 1000}); + map.insert({3000, 1000}); + for (int i = 0; i < 10000; i++) { + map.insert({i, i}); + map.erase(i); + } +} + +TEST(FlatMap, ClearNoResize) { + NumMap map; + Fill(&map, 0, 100); + const size_t orig = map.bucket_count(); + map.clear_no_resize(); + EXPECT_EQ(map.size(), 0); + EXPECT_EQ(Contents(map), NumMapContents()); + EXPECT_EQ(map.bucket_count(), orig); +} + +TEST(FlatMap, Clear) { + NumMap map; + Fill(&map, 0, 100); + const size_t orig = map.bucket_count(); + map.clear(); + EXPECT_EQ(map.size(), 0); + EXPECT_EQ(Contents(map), NumMapContents()); + EXPECT_LT(map.bucket_count(), orig); +} + +TEST(FlatMap, Copy) { + for (int n = 0; n < 10; n++) { + NumMap src; + Fill(&src, 0, n); + NumMap copy = src; + EXPECT_EQ(Contents(src), Contents(copy)); + NumMap copy2; + copy2 = src; + EXPECT_EQ(Contents(src), Contents(copy2)); + copy2 = copy2; // Self-assignment + EXPECT_EQ(Contents(src), Contents(copy2)); + } +} + +TEST(FlatMap, InitFromIter) { + for (int n = 0; n < 10; n++) { + NumMap src; + Fill(&src, 0, n); + auto vec = Contents(src); + NumMap dst(vec.begin(), vec.end()); + EXPECT_EQ(Contents(dst), vec); + } +} + +TEST(FlatMap, InsertIter) { + NumMap a, b; + Fill(&a, 1, 10); + Fill(&b, 8, 20); + b[9] = 10000; // Should not get inserted into a since a already has 9 + a.insert(b.begin(), b.end()); + NumMap expected; + Fill(&expected, 1, 20); + EXPECT_EQ(Contents(a), Contents(expected)); +} + +TEST(FlatMap, Eq) { + NumMap empty; + + NumMap elems; + Fill(&elems, 0, 5); + EXPECT_FALSE(empty == elems); + EXPECT_TRUE(empty != elems); + + NumMap copy = elems; + EXPECT_TRUE(copy == elems); + EXPECT_FALSE(copy != elems); + + NumMap changed = elems; + changed[3] = 1; + EXPECT_FALSE(changed == elems); + EXPECT_TRUE(changed != elems); + + NumMap changed2 = elems; + changed2.erase(3); + EXPECT_FALSE(changed2 == elems); + EXPECT_TRUE(changed2 != elems); +} + +TEST(FlatMap, Swap) { + NumMap a, b; + Fill(&a, 1, 5); + Fill(&b, 100, 200); + NumMap c = a; + NumMap d = b; + EXPECT_EQ(c, a); + EXPECT_EQ(d, b); + c.swap(d); + EXPECT_EQ(c, b); + EXPECT_EQ(d, a); +} + +TEST(FlatMap, Reserve) { + NumMap src; + Fill(&src, 1, 100); + NumMap a = src; + a.reserve(10); + EXPECT_EQ(a, src); + NumMap b = src; + b.rehash(1000); + EXPECT_EQ(b, src); +} + +TEST(FlatMap, EqualRangeMutable) { + NumMap map; + Fill(&map, 1, 10); + + // Existing element + auto p1 = map.equal_range(3); + EXPECT_TRUE(p1.first != p1.second); + EXPECT_EQ(p1.first->first, 3); + EXPECT_EQ(p1.first->second, 300); + ++p1.first; + EXPECT_TRUE(p1.first == p1.second); + + // Missing element + auto p2 = map.equal_range(100); + EXPECT_TRUE(p2.first == p2.second); +} + +TEST(FlatMap, EqualRangeConst) { + NumMap tmp; + Fill(&tmp, 1, 10); + + const NumMap map = tmp; + + // Existing element + auto p1 = map.equal_range(3); + EXPECT_TRUE(p1.first != p1.second); + EXPECT_EQ(p1.first->first, 3); + EXPECT_EQ(p1.first->second, 300); + ++p1.first; + EXPECT_TRUE(p1.first == p1.second); + + // Missing element + auto p2 = map.equal_range(100); + EXPECT_TRUE(p2.first == p2.second); +} + +TEST(FlatMap, Prefetch) { + NumMap map; + Fill(&map, 0, 1000); + // Prefetch present and missing keys. + for (int i = 0; i < 2000; i++) { + map.prefetch_value(i); + } +} + +// Non-copyable values should work. +struct NC { + int64 value; + NC() : value(-1) {} + NC(int64 v) : value(v) {} + NC(const NC& x) : value(x.value) {} + bool operator==(const NC& x) const { return value == x.value; } +}; +struct HashNC { + size_t operator()(NC x) const { return x.value; } +}; + +TEST(FlatMap, NonCopyable) { + FlatMap map; + for (int i = 0; i < 100; i++) { + map[NC(i)] = NC(i * 100); + } + for (int i = 0; i < 100; i++) { + EXPECT_EQ(map.count(NC(i)), 1); + auto iter = map.find(NC(i)); + EXPECT_NE(iter, map.end()); + EXPECT_EQ(iter->first, NC(i)); + EXPECT_EQ(iter->second, NC(i * 100)); + EXPECT_EQ(map[NC(i)], NC(i * 100)); + } + map.erase(NC(10)); + EXPECT_EQ(map.count(NC(10)), 0); +} + +// Test with heap-allocated objects so that mismanaged constructions +// or destructions will show up as errors under a sanitizer or +// heap checker. +TEST(FlatMap, ConstructDestruct) { + FlatMap map; + string k1 = "the quick brown fox jumped over the lazy dog"; + string k2 = k1 + k1; + string k3 = k1 + k2; + map[k1] = k2; + map[k3] = k1; + EXPECT_EQ(k1, map.find(k1)->first); + EXPECT_EQ(k2, map.find(k1)->second); + EXPECT_EQ(k1, map[k3]); + map.erase(k3); + EXPECT_EQ(string(), map[k3]); + + map.clear(); + map[k1] = k2; + EXPECT_EQ(k2, map[k1]); + + map.reserve(100); + EXPECT_EQ(k2, map[k1]); +} + +// Type to use to ensure that custom equality operator is used +// that ignores extra value. +struct CustomCmpKey { + int64 a; + int64 b; + CustomCmpKey(int64 v1, int64 v2) : a(v1), b(v2) {} + bool operator==(const CustomCmpKey& x) const { return a == x.a && b == x.b; } +}; +struct HashA { + size_t operator()(CustomCmpKey x) const { return x.a; } +}; +struct EqA { + // Ignore b fields. + bool operator()(CustomCmpKey x, CustomCmpKey y) const { return x.a == y.a; } +}; +TEST(FlatMap, CustomCmp) { + FlatMap map; + map[CustomCmpKey(100, 200)] = 300; + EXPECT_EQ(300, map[CustomCmpKey(100, 200)]); + EXPECT_EQ(300, map[CustomCmpKey(100, 500)]); // Differences in key.b ignored +} + +// Test unique_ptr handling. +typedef std::unique_ptr UniqInt; +static UniqInt MakeUniq(int i) { return UniqInt(new int(i)); } + +struct HashUniq { + size_t operator()(const UniqInt& p) const { return *p; } +}; +struct EqUniq { + bool operator()(const UniqInt& a, const UniqInt& b) const { return *a == *b; } +}; +typedef FlatMap UniqMap; + +TEST(FlatMap, UniqueMap) { + UniqMap map; + + // Fill map + const int N = 10; + for (int i = 0; i < N; i++) { + if ((i % 2) == 0) { + map[MakeUniq(i)] = MakeUniq(i + 100); + } else { + map.emplace(MakeUniq(i), MakeUniq(i + 100)); + } + } + EXPECT_EQ(map.size(), N); + + // Lookups + for (int i = 0; i < N; i++) { + EXPECT_EQ(*map.at(MakeUniq(i)), i + 100); + } + + // find+erase + EXPECT_EQ(map.count(MakeUniq(2)), 1); + map.erase(MakeUniq(2)); + EXPECT_EQ(map.count(MakeUniq(2)), 0); + + // clear + map.clear(); + EXPECT_EQ(map.size(), 0); +} + +TEST(FlatMap, UniqueMapIter) { + UniqMap map; + const int kCount = 10; + const int kValueDelta = 100; + for (int i = 1; i <= kCount; i++) { + map[MakeUniq(i)] = MakeUniq(i + kValueDelta); + } + int key_sum = 0; + int val_sum = 0; + for (const auto& p : map) { + key_sum += *p.first; + val_sum += *p.second; + } + EXPECT_EQ(key_sum, (kCount * (kCount + 1)) / 2); + EXPECT_EQ(val_sum, key_sum + (kCount * kValueDelta)); +} + +} // namespace +} // namespace gtl +} // namespace tensorflow diff --git a/tensorflow/core/lib/gtl/flatrep.h b/tensorflow/core/lib/gtl/flatrep.h new file mode 100644 index 00000000000..ff590d41280 --- /dev/null +++ b/tensorflow/core/lib/gtl/flatrep.h @@ -0,0 +1,332 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_CORE_LIB_GTL_FLATREP_H_ +#define THIRD_PARTY_TENSORFLOW_CORE_LIB_GTL_FLATREP_H_ + +#include +#include +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { +namespace gtl { +namespace internal { + +// Internal representation for FlatMap and FlatSet. +// +// The representation is an open-addressed hash table. Conceptually, +// the representation is a flat array of entries. However we +// structure it as an array of of buckets where each bucket holds +// kWidth entries along with metadata for the kWidth entries. The +// metadata marker is +// +// (a) kEmpty: the entry is empty +// (b) kDeleted: the entry has been deleted +// (c) other: the entry is occupied and has low-8 bits of its hash. +// These hash bits can be used to avoid potentially expensive +// key comparisons. +// +// FlatMap passes in a bucket that contains keys and values, FlatSet +// passes in a bucket that does not contain values. +template +class FlatRep { + public: + // kWidth is the number of entries stored in a bucket. + static const uint32 kBase = 3; + static const uint32 kWidth = (1 << kBase); + + FlatRep(size_t N, const Hash& hf, const Eq& eq) : hash_(hf), equal_(eq) { + Init(N); + } + explicit FlatRep(const FlatRep& src) : hash_(src.hash_), equal_(src.equal_) { + Init(src.size()); + CopyEntries(src.array_, src.end_, CopyEntry()); + } + ~FlatRep() { + clear_no_resize(); + delete[] array_; + } + + // Simple accessors. + size_t size() const { return not_empty_ - deleted_; } + size_t bucket_count() const { return mask_ + 1; } + Bucket* start() const { return array_; } + Bucket* limit() const { return end_; } + const Hash& hash_function() const { return hash_; } + const Eq& key_eq() const { return equal_; } + + // Overwrite contents of *this with contents of src. + void CopyFrom(const FlatRep& src) { + if (this != &src) { + clear_no_resize(); + delete[] array_; + Init(src.size()); + CopyEntries(src.array_, src.end_, CopyEntry()); + } + } + + void clear_no_resize() { + for (Bucket* b = array_; b != end_; b++) { + for (uint32 i = 0; i < kWidth; i++) { + if (b->marker[i] >= 2) { + b->Destroy(i); + b->marker[i] = kEmpty; + } + } + } + not_empty_ = 0; + deleted_ = 0; + } + + void clear() { + clear_no_resize(); + grow_ = 0; // Consider shrinking in MaybeResize() + MaybeResize(); + } + + void swap(FlatRep& x) { + using std::swap; + swap(array_, x.array_); + swap(end_, x.end_); + swap(lglen_, x.lglen_); + swap(mask_, x.mask_); + swap(not_empty_, x.not_empty_); + swap(deleted_, x.deleted_); + swap(grow_, x.grow_); + swap(shrink_, x.shrink_); + } + + struct SearchResult { + bool found; + Bucket* b; + uint32 index; + }; + + // Hash value is partitioned as follows: + // 1. Bottom 8 bits are stored in bucket to help speed up comparisons. + // 2. Next 3 bits give index inside bucket. + // 3. Remaining bits give bucket number. + + // Find bucket/index for key k. + SearchResult Find(const Key& k) const { + size_t h = hash_(k); + const uint32 marker = Marker(h & 0xff); + size_t index = (h >> 8) & mask_; // Holds bucket num and index-in-bucket + uint32 num_probes = 1; // Needed for quadratic probing + while (true) { + uint32 bi = index & (kWidth - 1); + Bucket* b = &array_[index >> kBase]; + const uint32 x = b->marker[bi]; + if (x == marker && equal_(b->key(bi), k)) { + return {true, b, bi}; + } else if (x == kEmpty) { + return {false, nullptr, 0}; + } + // Quadratic probing. + index = (index + num_probes) & mask_; + num_probes++; + } + } + + // Find bucket/index for key k, creating a new one if necessary. + // + // KeyType is a template parameter so that k's type is deduced and it + // becomes a universal reference which allows the key initialization + // below to use an rvalue constructor if available. + template + SearchResult FindOrInsert(KeyType&& k) { + size_t h = hash_(k); + const uint32 marker = Marker(h & 0xff); + size_t index = (h >> 8) & mask_; // Holds bucket num and index-in-bucket + uint32 num_probes = 1; // Needed for quadratic probing + Bucket* del = nullptr; // First encountered deletion for kInsert + uint32 di = 0; + while (true) { + uint32 bi = index & (kWidth - 1); + Bucket* b = &array_[index >> kBase]; + const uint32 x = b->marker[bi]; + if (x == marker && equal_(b->key(bi), k)) { + return {true, b, bi}; + } else if (!del && x == kDeleted) { + // Remember deleted index to use for insertion. + del = b; + di = bi; + } else if (x == kEmpty) { + if (del) { + // Store in the first deleted slot we encountered + b = del; + bi = di; + deleted_--; // not_empty_ does not change + } else { + not_empty_++; + } + b->marker[bi] = marker; + new (&b->key(bi)) Key(std::forward(k)); + return {false, b, bi}; + } + // Quadratic probing. + index = (index + num_probes) & mask_; + num_probes++; + } + } + + void Erase(Bucket* b, uint32 i) { + b->Destroy(i); + b->marker[i] = kDeleted; + deleted_++; + grow_ = 0; // Consider shrinking on next insert + } + + void Prefetch(const Key& k) const { + size_t h = hash_(k); + size_t index = (h >> 8) & mask_; // Holds bucket num and index-in-bucket + uint32 bi = index & (kWidth - 1); + Bucket* b = &array_[index >> kBase]; + prefetch(&b->storage.key[bi]); + } + void prefetch(const void* ptr) const { + // TODO(jeff,sanjay): Remove this routine when we add a + // prefetch(...) call to platform so that the Prefetch routine + // actually does something + } + + inline void MaybeResize() { + if (not_empty_ < grow_) { + return; // Nothing to do + } + if (grow_ == 0) { + // Special value set by erase to cause shrink on next insert. + if (size() >= shrink_) { + // Not small enough to shrink. + grow_ = static_cast(bucket_count() * 0.8); + if (not_empty_ < grow_) return; + } + } + Resize(size() + 1); + } + + void Resize(size_t N) { + Bucket* old = array_; + Bucket* old_end = end_; + Init(N); + CopyEntries(old, old_end, MoveEntry()); + delete[] old; + } + + private: + enum { kEmpty = 0, kDeleted = 1 }; // Special markers for an entry. + + Hash hash_; // User-supplied hasher + Eq equal_; // User-supplied comparator + uint8 lglen_; // lg(#buckets) + Bucket* array_; // array of length (1 << lglen_) + Bucket* end_; // Points just past last bucket in array_ + size_t mask_; // (# of entries in table) - 1 + size_t not_empty_; // Count of entries with marker != kEmpty + size_t deleted_; // Count of entries with marker == kDeleted + size_t grow_; // Grow array when not_empty_ >= grow_ + size_t shrink_; // Shrink array when size() < shrink_ + + // Avoid kEmpty and kDeleted markers when computing hash values to + // store in Bucket::marker[]. + static uint32 Marker(uint32 hb) { return hb + (hb < 2 ? 2 : 0); } + + void Init(size_t N) { + // Make enough room for N elements. + size_t lg = 0; // Smallest table is just one bucket. + while (N >= 0.8 * ((1 << lg) * kWidth)) { + lg++; + } + const size_t n = (1 << lg); + Bucket* array = new Bucket[n]; + for (size_t i = 0; i < n; i++) { + Bucket* b = &array[i]; + memset(b->marker, kEmpty, kWidth); + } + const size_t capacity = (1 << lg) * kWidth; + lglen_ = lg; + mask_ = capacity - 1; + array_ = array; + end_ = array + n; + not_empty_ = 0; + deleted_ = 0; + grow_ = static_cast(capacity * 0.8); + if (lg == 0) { + // Already down to one bucket; no more shrinking. + shrink_ = 0; + } else { + shrink_ = static_cast(grow_ * 0.4); // Must be less than 0.5 + } + } + + // Used by FreshInsert when we should copy from source. + struct CopyEntry { + inline void operator()(Bucket* dst, uint32 dsti, Bucket* src, uint32 srci) { + dst->CopyFrom(dsti, src, srci); + } + }; + + // Used by FreshInsert when we should move from source. + struct MoveEntry { + inline void operator()(Bucket* dst, uint32 dsti, Bucket* src, uint32 srci) { + dst->MoveFrom(dsti, src, srci); + src->Destroy(srci); + src->marker[srci] = kDeleted; + } + }; + + template + void CopyEntries(Bucket* start, Bucket* end, Copier copier) { + for (Bucket* b = start; b != end; b++) { + for (uint32 i = 0; i < kWidth; i++) { + if (b->marker[i] >= 2) { + FreshInsert(b, i, copier); + } + } + } + } + + // Create an entry for the key numbered src_index in *src and return + // its bucket/index. Used for insertion into a fresh table. We + // assume that there are no deletions, and k does not already exist + // in the table. + template + void FreshInsert(Bucket* src, uint32 src_index, Copier copier) { + size_t h = hash_(src->key(src_index)); + const uint32 marker = Marker(h & 0xff); + size_t index = (h >> 8) & mask_; // Holds bucket num and index-in-bucket + uint32 num_probes = 1; // Needed for quadratic probing + while (true) { + uint32 bi = index & (kWidth - 1); + Bucket* b = &array_[index >> kBase]; + const uint32 x = b->marker[bi]; + if (x == 0) { + b->marker[bi] = marker; + not_empty_++; + copier(b, bi, src, src_index); + return; + } + // Quadratic probing. + index = (index + num_probes) & mask_; + num_probes++; + } + } +}; + +} // namespace internal +} // namespace gtl +} // namespace tensorflow + +#endif // THIRD_PARTY_TENSORFLOW_CORE_LIB_GTL_FLATREP_H_ diff --git a/tensorflow/core/lib/gtl/flatset.h b/tensorflow/core/lib/gtl/flatset.h new file mode 100644 index 00000000000..b94d88cbc6a --- /dev/null +++ b/tensorflow/core/lib/gtl/flatset.h @@ -0,0 +1,277 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_CORE_LIB_GTL_FLATSET_H_ +#define THIRD_PARTY_TENSORFLOW_CORE_LIB_GTL_FLATSET_H_ + +#include +#include +#include "tensorflow/core/lib/gtl/flatrep.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { +namespace gtl { + +// FlatSet provides a set of K. +// +// The map is implemented using an open-addressed hash table. A +// single array holds entire map contents and collisions are resolved +// by probing at a sequence of locations in the array. +template > +class FlatSet { + private: + // Forward declare some internal types needed in public section. + struct Bucket; + + public: + typedef Key key_type; + typedef Key value_type; + typedef Hash hasher; + typedef Eq key_equal; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + typedef value_type* pointer; + typedef const value_type* const_pointer; + typedef value_type& reference; + typedef const value_type& const_reference; + + FlatSet() : FlatSet(1) {} + + explicit FlatSet(size_t N, const Hash& hf = Hash(), const Eq& eq = Eq()) + : rep_(N, hf, eq) {} + + FlatSet(const FlatSet& src) : rep_(src.rep_) {} + + template + FlatSet(InputIter first, InputIter last, size_t N = 1, + const Hash& hf = Hash(), const Eq& eq = Eq()) + : FlatSet(N, hf, eq) { + insert(first, last); + } + + FlatSet& operator=(const FlatSet& src) { + rep_.CopyFrom(src.rep_); + return *this; + } + + ~FlatSet() {} + + void swap(FlatSet& x) { rep_.swap(x.rep_); } + void clear_no_resize() { rep_.clear_no_resize(); } + void clear() { rep_.clear(); } + void reserve(size_t N) { rep_.Resize(std::max(N, size())); } + void rehash(size_t N) { rep_.Resize(std::max(N, size())); } + void resize(size_t N) { rep_.Resize(std::max(N, size())); } + size_t size() const { return rep_.size(); } + bool empty() const { return size() == 0; } + size_t bucket_count() const { return rep_.bucket_count(); } + hasher hash_function() const { return rep_.hash_function(); } + key_equal key_eq() const { return rep_.key_eq(); } + + class iterator { + public: + iterator() : b_(nullptr), end_(nullptr), i_(0) {} + + // Make iterator pointing at first element at or after b. + explicit iterator(Bucket* b, Bucket* end) : b_(b), end_(end), i_(0) { + SkipUnused(); + } + + // Make iterator pointing exactly at ith element in b, which must exist. + iterator(Bucket* b, Bucket* end, uint32 i) : b_(b), end_(end), i_(i) {} + + Key& operator*() { return key(); } + Key* operator->() { return &key(); } + bool operator==(const iterator& x) const { + return b_ == x.b_ && i_ == x.i_; + } + bool operator!=(const iterator& x) const { return !(*this == x); } + iterator& operator++() { + DCHECK(b_ != end_); + i_++; + SkipUnused(); + return *this; + } + + private: + friend class FlatSet; + Bucket* b_; + Bucket* end_; + uint32 i_; + + Key& key() const { return b_->key(i_); } + void SkipUnused() { + while (b_ < end_) { + if (i_ >= Rep::kWidth) { + i_ = 0; + b_++; + } else if (b_->marker[i_] < 2) { + i_++; + } else { + break; + } + } + } + }; + + class const_iterator { + private: + mutable iterator rep_; // Share state and logic with non-const iterator. + public: + const_iterator() : rep_() {} + explicit const_iterator(Bucket* start, Bucket* end) : rep_(start, end) {} + const_iterator(Bucket* b, Bucket* end, uint32 i) : rep_(b, end, i) {} + + const Key& operator*() const { return rep_.key(); } + const Key* operator->() const { return &rep_.key(); } + bool operator==(const const_iterator& x) const { return rep_ == x.rep_; } + bool operator!=(const const_iterator& x) const { return rep_ != x.rep_; } + const_iterator& operator++() { + ++rep_; + return *this; + } + }; + + iterator begin() { return iterator(rep_.start(), rep_.limit()); } + iterator end() { return iterator(rep_.limit(), rep_.limit()); } + const_iterator begin() const { + return const_iterator(rep_.start(), rep_.limit()); + } + const_iterator end() const { + return const_iterator(rep_.limit(), rep_.limit()); + } + + size_t count(const Key& k) const { return rep_.Find(k).found ? 1 : 0; } + iterator find(const Key& k) { + auto r = rep_.Find(k); + return r.found ? iterator(r.b, rep_.limit(), r.index) : end(); + } + const_iterator find(const Key& k) const { + auto r = rep_.Find(k); + return r.found ? const_iterator(r.b, rep_.limit(), r.index) : end(); + } + + std::pair insert(const Key& k) { return Insert(k); } + template + void insert(InputIter first, InputIter last) { + for (; first != last; ++first) { + insert(*first); + } + } + + template + std::pair emplace(Args&&... args) { + rep_.MaybeResize(); + auto r = rep_.FindOrInsert(std::forward(args)...); + const bool inserted = !r.found; + return {iterator(r.b, rep_.limit(), r.index), inserted}; + } + + size_t erase(const Key& k) { + auto r = rep_.Find(k); + if (!r.found) return 0; + rep_.Erase(r.b, r.index); + return 1; + } + iterator erase(iterator pos) { + rep_.Erase(pos.b_, pos.i_); + ++pos; + return pos; + } + iterator erase(iterator pos, iterator last) { + for (; pos != last; ++pos) { + rep_.Erase(pos.b_, pos.i_); + } + return pos; + } + + std::pair equal_range(const Key& k) { + auto pos = find(k); + if (pos == end()) { + return std::make_pair(pos, pos); + } else { + auto next = pos; + ++next; + return std::make_pair(pos, next); + } + } + std::pair equal_range(const Key& k) const { + auto pos = find(k); + if (pos == end()) { + return std::make_pair(pos, pos); + } else { + auto next = pos; + ++next; + return std::make_pair(pos, next); + } + } + + bool operator==(const FlatSet& x) const { + if (size() != x.size()) return false; + for (const auto& elem : x) { + auto i = find(elem); + if (i == end()) return false; + } + return true; + } + bool operator!=(const FlatSet& x) const { return !(*this == x); } + + // If key exists in the table, prefetch it. This is a hint, and may + // have no effect. + void prefetch_value(const Key& key) const { rep_.Prefetch(key); } + + private: + using Rep = internal::FlatRep; + + // Bucket stores kWidth triples. + // The data is organized as three parallel arrays to reduce padding. + struct Bucket { + uint8 marker[Rep::kWidth]; + + // Wrap keys in union to control construction and destruction. + union Storage { + Key key[Rep::kWidth]; + Storage() {} + ~Storage() {} + } storage; + + Key& key(uint32 i) { + DCHECK_GE(marker[i], 2); + return storage.key[i]; + } + void Destroy(uint32 i) { storage.key[i].Key::~Key(); } + void MoveFrom(uint32 i, Bucket* src, uint32 src_index) { + new (&storage.key[i]) Key(std::move(src->storage.key[src_index])); + } + void CopyFrom(uint32 i, Bucket* src, uint32 src_index) { + new (&storage.key[i]) Key(src->storage.key[src_index]); + } + }; + + std::pair Insert(const Key& k) { + rep_.MaybeResize(); + auto r = rep_.FindOrInsert(k); + const bool inserted = !r.found; + return {iterator(r.b, rep_.limit(), r.index), inserted}; + } + + Rep rep_; +}; + +} // namespace gtl +} // namespace tensorflow + +#endif // THIRD_PARTY_TENSORFLOW_CORE_LIB_GTL_FLATSET_H_ diff --git a/tensorflow/core/lib/gtl/flatset_test.cc b/tensorflow/core/lib/gtl/flatset_test.cc new file mode 100644 index 00000000000..ea9c9c22b55 --- /dev/null +++ b/tensorflow/core/lib/gtl/flatset_test.cc @@ -0,0 +1,501 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/lib/gtl/flatset.h" + +#include +#include +#include +#include "tensorflow/core/lib/hash/hash.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { +namespace gtl { +namespace { + +typedef FlatSet NumSet; + +// Returns true iff set has an entry for k. +// Also verifies that find and count give consistent results. +bool Has(const NumSet& set, int64 k) { + auto iter = set.find(k); + if (iter == set.end()) { + EXPECT_EQ(set.count(k), 0); + return false; + } else { + EXPECT_EQ(set.count(k), 1); + EXPECT_EQ(*iter, k); + return true; + } +} + +// Return contents of set as a sorted list of numbers. +typedef std::vector NumSetContents; +NumSetContents Contents(const NumSet& set) { + NumSetContents result; + for (int64 n : set) { + result.push_back(n); + } + std::sort(result.begin(), result.end()); + return result; +} + +// Fill entries with keys [start,limit). +void Fill(NumSet* set, int64 start, int64 limit) { + for (int64 i = start; i < limit; i++) { + set->insert(i); + } +} + +TEST(FlatSetTest, Find) { + NumSet set; + EXPECT_FALSE(Has(set, 1)); + set.insert(1); + set.insert(2); + EXPECT_TRUE(Has(set, 1)); + EXPECT_TRUE(Has(set, 2)); + EXPECT_FALSE(Has(set, 3)); +} + +TEST(FlatSetTest, Insert) { + NumSet set; + EXPECT_FALSE(Has(set, 1)); + + // New entry. + auto result = set.insert(1); + EXPECT_TRUE(result.second); + EXPECT_EQ(*result.first, 1); + EXPECT_TRUE(Has(set, 1)); + + // Attempt to insert over existing entry. + result = set.insert(1); + EXPECT_FALSE(result.second); + EXPECT_EQ(*result.first, 1); + EXPECT_TRUE(Has(set, 1)); +} + +TEST(FlatSetTest, InsertGrowth) { + NumSet set; + const int n = 100; + Fill(&set, 0, 100); + EXPECT_EQ(set.size(), n); + for (int i = 0; i < n; i++) { + EXPECT_TRUE(Has(set, i)) << i; + } +} + +TEST(FlatSetTest, Emplace) { + NumSet set; + + // New entry. + auto result = set.emplace(73); + EXPECT_TRUE(result.second); + EXPECT_EQ(*result.first, 73); + EXPECT_TRUE(Has(set, 73)); + + // Attempt to insert an existing entry. + result = set.emplace(73); + EXPECT_FALSE(result.second); + EXPECT_EQ(*result.first, 73); + EXPECT_TRUE(Has(set, 73)); + + // Add a second value + result = set.emplace(103); + EXPECT_TRUE(result.second); + EXPECT_EQ(*result.first, 103); + EXPECT_TRUE(Has(set, 103)); +} + +TEST(FlatSetTest, Size) { + NumSet set; + EXPECT_EQ(set.size(), 0); + + set.insert(1); + set.insert(2); + EXPECT_EQ(set.size(), 2); +} + +TEST(FlatSetTest, Empty) { + NumSet set; + EXPECT_TRUE(set.empty()); + + set.insert(1); + set.insert(2); + EXPECT_FALSE(set.empty()); +} + +TEST(FlatSetTest, Count) { + NumSet set; + EXPECT_EQ(set.count(1), 0); + EXPECT_EQ(set.count(2), 0); + + set.insert(1); + EXPECT_EQ(set.count(1), 1); + EXPECT_EQ(set.count(2), 0); + + set.insert(2); + EXPECT_EQ(set.count(1), 1); + EXPECT_EQ(set.count(2), 1); +} + +TEST(FlatSetTest, Iter) { + NumSet set; + EXPECT_EQ(Contents(set), NumSetContents()); + + set.insert(1); + set.insert(2); + EXPECT_EQ(Contents(set), NumSetContents({1, 2})); +} + +TEST(FlatSetTest, Erase) { + NumSet set; + EXPECT_EQ(set.erase(1), 0); + set.insert(1); + set.insert(2); + EXPECT_EQ(set.erase(3), 0); + EXPECT_EQ(set.erase(1), 1); + EXPECT_EQ(set.size(), 1); + EXPECT_TRUE(Has(set, 2)); + EXPECT_EQ(Contents(set), NumSetContents({2})); + EXPECT_EQ(set.erase(2), 1); + EXPECT_EQ(Contents(set), NumSetContents()); +} + +TEST(FlatSetTest, EraseIter) { + NumSet set; + Fill(&set, 1, 11); + size_t size = 10; + for (auto iter = set.begin(); iter != set.end();) { + iter = set.erase(iter); + size--; + EXPECT_EQ(set.size(), size); + } + EXPECT_EQ(Contents(set), NumSetContents()); +} + +TEST(FlatSetTest, EraseIterPair) { + NumSet set; + Fill(&set, 1, 11); + NumSet expected; + auto p1 = set.begin(); + expected.insert(*p1); + ++p1; + expected.insert(*p1); + ++p1; + auto p2 = set.end(); + EXPECT_EQ(set.erase(p1, p2), set.end()); + EXPECT_EQ(set.size(), 2); + EXPECT_EQ(Contents(set), Contents(expected)); +} + +TEST(FlatSetTest, EraseLongChains) { + // Make a set with lots of elements and erase a bunch of them to ensure + // that we are likely to hit them on future lookups. + NumSet set; + const int num = 128; + Fill(&set, 0, num); + for (int i = 0; i < num; i += 3) { + EXPECT_EQ(set.erase(i), 1); + } + for (int i = 0; i < num; i++) { + // Multiples of 3 should be not present. + EXPECT_EQ(Has(set, i), ((i % 3) != 0)) << i; + } + + // Erase remainder to trigger table shrinking. + const size_t orig_buckets = set.bucket_count(); + for (int i = 0; i < num; i++) { + set.erase(i); + } + EXPECT_TRUE(set.empty()); + EXPECT_EQ(set.bucket_count(), orig_buckets); + set.insert(1); // Actual shrinking is triggered by an insert. + EXPECT_LT(set.bucket_count(), orig_buckets); +} + +TEST(FlatSet, ClearNoResize) { + NumSet set; + Fill(&set, 0, 100); + const size_t orig = set.bucket_count(); + set.clear_no_resize(); + EXPECT_EQ(set.size(), 0); + EXPECT_EQ(Contents(set), NumSetContents()); + EXPECT_EQ(set.bucket_count(), orig); +} + +TEST(FlatSet, Clear) { + NumSet set; + Fill(&set, 0, 100); + const size_t orig = set.bucket_count(); + set.clear(); + EXPECT_EQ(set.size(), 0); + EXPECT_EQ(Contents(set), NumSetContents()); + EXPECT_LT(set.bucket_count(), orig); +} + +TEST(FlatSet, Copy) { + for (int n = 0; n < 10; n++) { + NumSet src; + Fill(&src, 0, n); + NumSet copy = src; + EXPECT_EQ(Contents(src), Contents(copy)); + NumSet copy2; + copy2 = src; + EXPECT_EQ(Contents(src), Contents(copy2)); + copy2 = copy2; // Self-assignment + EXPECT_EQ(Contents(src), Contents(copy2)); + } +} + +TEST(FlatSet, InitFromIter) { + for (int n = 0; n < 10; n++) { + NumSet src; + Fill(&src, 0, n); + auto vec = Contents(src); + NumSet dst(vec.begin(), vec.end()); + EXPECT_EQ(Contents(dst), vec); + } +} + +TEST(FlatSet, InsertIter) { + NumSet a, b; + Fill(&a, 1, 10); + Fill(&b, 8, 20); + b.insert(9); // Should not get inserted into a since a already has 9 + a.insert(b.begin(), b.end()); + NumSet expected; + Fill(&expected, 1, 20); + EXPECT_EQ(Contents(a), Contents(expected)); +} + +TEST(FlatSet, Eq) { + NumSet empty; + + NumSet elems; + Fill(&elems, 0, 5); + EXPECT_FALSE(empty == elems); + EXPECT_TRUE(empty != elems); + + NumSet copy = elems; + EXPECT_TRUE(copy == elems); + EXPECT_FALSE(copy != elems); + + NumSet changed = elems; + changed.insert(7); + EXPECT_FALSE(changed == elems); + EXPECT_TRUE(changed != elems); + + NumSet changed2 = elems; + changed2.erase(3); + EXPECT_FALSE(changed2 == elems); + EXPECT_TRUE(changed2 != elems); +} + +TEST(FlatSet, Swap) { + NumSet a, b; + Fill(&a, 1, 5); + Fill(&b, 100, 200); + NumSet c = a; + NumSet d = b; + EXPECT_EQ(c, a); + EXPECT_EQ(d, b); + c.swap(d); + EXPECT_EQ(c, b); + EXPECT_EQ(d, a); +} + +TEST(FlatSet, Reserve) { + NumSet src; + Fill(&src, 1, 100); + NumSet a = src; + a.reserve(10); + EXPECT_EQ(a, src); + NumSet b = src; + b.rehash(1000); + EXPECT_EQ(b, src); +} + +TEST(FlatSet, EqualRangeMutable) { + NumSet set; + Fill(&set, 1, 10); + + // Existing element + auto p1 = set.equal_range(3); + EXPECT_TRUE(p1.first != p1.second); + EXPECT_EQ(*p1.first, 3); + ++p1.first; + EXPECT_TRUE(p1.first == p1.second); + + // Missing element + auto p2 = set.equal_range(100); + EXPECT_TRUE(p2.first == p2.second); +} + +TEST(FlatSet, EqualRangeConst) { + NumSet tmp; + Fill(&tmp, 1, 10); + + const NumSet set = tmp; + + // Existing element + auto p1 = set.equal_range(3); + EXPECT_TRUE(p1.first != p1.second); + EXPECT_EQ(*p1.first, 3); + ++p1.first; + EXPECT_TRUE(p1.first == p1.second); + + // Missing element + auto p2 = set.equal_range(100); + EXPECT_TRUE(p2.first == p2.second); +} + +TEST(FlatSet, Prefetch) { + NumSet set; + Fill(&set, 0, 1000); + // Prefetch present and missing keys. + for (int i = 0; i < 2000; i++) { + set.prefetch_value(i); + } +} + +// Non-copyable values should work. +struct NC { + int64 value; + NC() : value(-1) {} + NC(int64 v) : value(v) {} + NC(const NC& x) : value(x.value) {} + bool operator==(const NC& x) const { return value == x.value; } +}; +struct HashNC { + size_t operator()(NC x) const { return x.value; } +}; + +TEST(FlatSet, NonCopyable) { + FlatSet set; + for (int i = 0; i < 100; i++) { + set.insert(NC(i)); + } + for (int i = 0; i < 100; i++) { + EXPECT_EQ(set.count(NC(i)), 1); + auto iter = set.find(NC(i)); + EXPECT_NE(iter, set.end()); + EXPECT_EQ(*iter, NC(i)); + } + set.erase(NC(10)); + EXPECT_EQ(set.count(NC(10)), 0); +} + +// Test with heap-allocated objects so that mismanaged constructions +// or destructions will show up as errors under a sanitizer or +// heap checker. +TEST(FlatSet, ConstructDestruct) { + FlatSet set; + string k1 = "the quick brown fox jumped over the lazy dog"; + string k2 = k1 + k1; + string k3 = k1 + k2; + set.insert(k1); + set.insert(k3); + EXPECT_EQ(set.count(k1), 1); + EXPECT_EQ(set.count(k2), 0); + EXPECT_EQ(set.count(k3), 1); + + set.erase(k3); + EXPECT_EQ(set.count(k3), 0); + + set.clear(); + set.insert(k1); + EXPECT_EQ(set.count(k1), 1); + EXPECT_EQ(set.count(k3), 0); + + set.reserve(100); + EXPECT_EQ(set.count(k1), 1); + EXPECT_EQ(set.count(k3), 0); +} + +// Type to use to ensure that custom equality operator is used +// that ignores extra value. +struct CustomCmpKey { + int64 a; + int64 b; + CustomCmpKey(int64 v1, int64 v2) : a(v1), b(v2) {} + bool operator==(const CustomCmpKey& x) const { return a == x.a && b == x.b; } +}; +struct HashA { + size_t operator()(CustomCmpKey x) const { return x.a; } +}; +struct EqA { + // Ignore b fields. + bool operator()(CustomCmpKey x, CustomCmpKey y) const { return x.a == y.a; } +}; +TEST(FlatSet, CustomCmp) { + FlatSet set; + set.insert(CustomCmpKey(100, 200)); + EXPECT_EQ(set.count(CustomCmpKey(100, 200)), 1); + EXPECT_EQ(set.count(CustomCmpKey(100, 500)), 1); // key.b ignored +} + +// Test unique_ptr handling. +typedef std::unique_ptr UniqInt; +static UniqInt MakeUniq(int i) { return UniqInt(new int(i)); } + +struct HashUniq { + size_t operator()(const UniqInt& p) const { return *p; } +}; +struct EqUniq { + bool operator()(const UniqInt& a, const UniqInt& b) const { return *a == *b; } +}; +typedef FlatSet UniqSet; + +TEST(FlatSet, UniqueSet) { + UniqSet set; + + // Fill set + const int N = 10; + for (int i = 0; i < N; i++) { + set.emplace(MakeUniq(i)); + } + EXPECT_EQ(set.size(), N); + + // Lookups + for (int i = 0; i < N; i++) { + EXPECT_EQ(set.count(MakeUniq(i)), 1); + } + + // erase + set.erase(MakeUniq(2)); + EXPECT_EQ(set.count(MakeUniq(2)), 0); + + // clear + set.clear(); + EXPECT_EQ(set.size(), 0); +} + +TEST(FlatSet, UniqueSetIter) { + UniqSet set; + const int kCount = 10; + for (int i = 1; i <= kCount; i++) { + set.emplace(MakeUniq(i)); + } + int sum = 0; + for (const auto& p : set) { + sum += *p; + } + EXPECT_EQ(sum, (kCount * (kCount + 1)) / 2); +} + +} // namespace +} // namespace gtl +} // namespace tensorflow diff --git a/tensorflow/core/lib/hash/hash.h b/tensorflow/core/lib/hash/hash.h index 3c71e7d6cce..4e64c90d629 100644 --- a/tensorflow/core/lib/hash/hash.h +++ b/tensorflow/core/lib/hash/hash.h @@ -42,6 +42,24 @@ inline uint64 Hash64Combine(uint64 a, uint64 b) { return a ^ (b + 0x9e3779b97f4a7800ULL + (a << 10) + (a >> 4)); } +// Convenience Hash functors +struct HashInt64 { + size_t operator()(int64 x) const { return static_cast(x); } +}; +struct HashStr { + size_t operator()(const string& s) const { + return static_cast(Hash64(s)); + } +}; +template +struct HashPtr { + size_t operator()(const PTR p) const { + // Hash pointers as integers, but bring more entropy to the lower bits. + size_t k = static_cast(reinterpret_cast(p)); + return k + (k >> 6); + } +}; + } // namespace tensorflow #endif // TENSORFLOW_LIB_HASH_HASH_H_ diff --git a/tensorflow/core/lib/monitoring/collected_metrics.h b/tensorflow/core/lib/monitoring/collected_metrics.h index 42a80bf5b78..3dde55342ef 100644 --- a/tensorflow/core/lib/monitoring/collected_metrics.h +++ b/tensorflow/core/lib/monitoring/collected_metrics.h @@ -25,14 +25,12 @@ limitations under the License. #include #include +#include "tensorflow/core/framework/summary.pb.h" #include "tensorflow/core/lib/monitoring/metric_def.h" namespace tensorflow { namespace monitoring { -// The type of the metric values. -enum class ValueType : int { kInt64 = 0 }; - // A metric is a statistic about a monitorable entity. // // Metrics are named with path-like strings, which must conform to the regular @@ -89,6 +87,7 @@ struct Point { // The actual metric value, dependent on the value_type enum. ValueType value_type; int64 int64_value; + HistogramProto histogram_value; // start_timestamp and end_timestamp indicate the time period over which this // point's value measurement applies. diff --git a/tensorflow/core/lib/monitoring/collection_registry.cc b/tensorflow/core/lib/monitoring/collection_registry.cc index 47112279cff..d3fd7132de5 100644 --- a/tensorflow/core/lib/monitoring/collection_registry.cc +++ b/tensorflow/core/lib/monitoring/collection_registry.cc @@ -49,9 +49,8 @@ void Collector::CollectMetricDescriptor( metric_descriptor->label_names.push_back(label_name.ToString()); } - // Only cumulative int64 counter is implemented at the moment. - metric_descriptor->metric_kind = MetricKind::kCumulative; - metric_descriptor->value_type = ValueType::kInt64; + metric_descriptor->metric_kind = metric_def->kind(); + metric_descriptor->value_type = metric_def->value_type(); } } // namespace internal diff --git a/tensorflow/core/lib/monitoring/collection_registry.h b/tensorflow/core/lib/monitoring/collection_registry.h index 3da2439238f..2eff4684367 100644 --- a/tensorflow/core/lib/monitoring/collection_registry.h +++ b/tensorflow/core/lib/monitoring/collection_registry.h @@ -19,6 +19,7 @@ limitations under the License. #include #include +#include "tensorflow/core/framework/summary.pb.h" #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/monitoring/collected_metrics.h" #include "tensorflow/core/lib/monitoring/metric_def.h" @@ -217,6 +218,14 @@ inline void CollectValue(const int64& value, Point* const point) { point->int64_value = value; } +template <> +inline void CollectValue(const HistogramProto& value, Point* const point) { + point->value_type = ValueType::kHistogram; + // This is inefficient. If and when we hit snags, we can change the API to do + // this more efficiently. + point->histogram_value = value; +} + // Used by the CollectionRegistry class to collect all the values of all the // metrics in the registry. This is an implementation detail of the // CollectionRegistry class, please do not depend on this. diff --git a/tensorflow/core/lib/monitoring/collection_registry_test.cc b/tensorflow/core/lib/monitoring/collection_registry_test.cc index 04a4879da47..34a480b07db 100644 --- a/tensorflow/core/lib/monitoring/collection_registry_test.cc +++ b/tensorflow/core/lib/monitoring/collection_registry_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/core/lib/monitoring/collection_registry.h" #include "tensorflow/core/lib/monitoring/counter.h" +#include "tensorflow/core/lib/monitoring/sampler.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/platform/test.h" @@ -23,6 +24,8 @@ limitations under the License. namespace tensorflow { namespace monitoring { +using histogram::Histogram; + namespace test_util { class CollectionRegistryTestAccess { @@ -42,7 +45,7 @@ TEST(CollectionRegistryTest, RegistrationUnregistration) { auto* collection_registry = CollectionRegistry::Default(); const MetricDef metric_def0( "/tensorflow/metric0", "An example metric with no labels."); - const MetricDef metric_def1( + const MetricDef metric_def1( "/tensorflow/metric1", "An example metric with one label.", "LabelName"); { @@ -173,6 +176,112 @@ TEST(CollectMetricsTest, Counter) { } } +void EqHistograms(const Histogram& expected, + const HistogramProto& actual_proto) { + Histogram actual; + ASSERT_TRUE(actual.DecodeFromProto(actual_proto)); + + EXPECT_EQ(expected.ToString(), actual.ToString()); +} + +TEST(CollectMetricsTest, Sampler) { + auto sampler_with_labels = std::unique_ptr>( + Sampler<2>::New({"/tensorflow/test/sampler_with_labels", + "Sampler with labels.", "MyLabel0", "MyLabel1"}, + {1.0, 2.0})); + auto sampler_without_labels = std::unique_ptr>(Sampler<0>::New( + {"/tensorflow/test/sampler_without_labels", "Sampler without labels."}, + {0.0})); + + Histogram with_labels0({1.0, 2.0, DBL_MAX}); + sampler_with_labels->GetCell("Label00", "Label10")->Add(0.7); + with_labels0.Add(0.7); + + Histogram with_labels1({1.0, 2.0, DBL_MAX}); + sampler_with_labels->GetCell("Label01", "Label11")->Add(1.5); + with_labels1.Add(1.5); + + Histogram without_labels({0.0, DBL_MAX}); + sampler_without_labels->GetCell()->Add(0.5); + without_labels.Add(0.5); + + for (const bool collect_metric_descriptors : {true, false}) { + SCOPED_TRACE(strings::StrCat("collect_metric_descriptors: ", + collect_metric_descriptors)); + + auto* collection_registry = CollectionRegistry::Default(); + CollectionRegistry::CollectMetricsOptions options; + options.collect_metric_descriptors = collect_metric_descriptors; + const std::unique_ptr collected_metrics = + collection_registry->CollectMetrics(options); + + if (collect_metric_descriptors) { + ASSERT_EQ(2, collected_metrics->metric_descriptor_map.size()); + + const MetricDescriptor& ld = *collected_metrics->metric_descriptor_map.at( + "/tensorflow/test/sampler_with_labels"); + EXPECT_EQ("/tensorflow/test/sampler_with_labels", ld.name); + EXPECT_EQ("Sampler with labels.", ld.description); + ASSERT_EQ(2, ld.label_names.size()); + EXPECT_EQ("MyLabel0", ld.label_names[0]); + EXPECT_EQ("MyLabel1", ld.label_names[1]); + EXPECT_EQ(MetricKind::kCumulative, ld.metric_kind); + EXPECT_EQ(ValueType::kHistogram, ld.value_type); + + const MetricDescriptor& ud = *collected_metrics->metric_descriptor_map.at( + "/tensorflow/test/sampler_without_labels"); + EXPECT_EQ("/tensorflow/test/sampler_without_labels", ud.name); + EXPECT_EQ("Sampler without labels.", ud.description); + ASSERT_EQ(0, ud.label_names.size()); + EXPECT_EQ(MetricKind::kCumulative, ud.metric_kind); + EXPECT_EQ(ValueType::kHistogram, ud.value_type); + } else { + EXPECT_EQ(0, collected_metrics->metric_descriptor_map.size()); + } + + ASSERT_EQ(2, collected_metrics->point_set_map.size()); + + const PointSet& lps = *collected_metrics->point_set_map.at( + "/tensorflow/test/sampler_with_labels"); + EXPECT_EQ("/tensorflow/test/sampler_with_labels", lps.metric_name); + ASSERT_EQ(2, lps.points.size()); + ASSERT_EQ(2, lps.points[0]->labels.size()); + EXPECT_EQ("MyLabel0", lps.points[0]->labels[0].name); + EXPECT_EQ("Label00", lps.points[0]->labels[0].value); + EXPECT_EQ("MyLabel1", lps.points[0]->labels[1].name); + EXPECT_EQ("Label10", lps.points[0]->labels[1].value); + EXPECT_EQ(ValueType::kHistogram, lps.points[0]->value_type); + EqHistograms(with_labels0, lps.points[0]->histogram_value); + EXPECT_LT(0, lps.points[0]->start_timestamp_millis); + EXPECT_LT(0, lps.points[0]->end_timestamp_millis); + EXPECT_GE(lps.points[0]->end_timestamp_millis, + lps.points[0]->start_timestamp_millis); + ASSERT_EQ(2, lps.points[1]->labels.size()); + EXPECT_EQ("MyLabel0", lps.points[1]->labels[0].name); + EXPECT_EQ("Label01", lps.points[1]->labels[0].value); + EXPECT_EQ("MyLabel1", lps.points[1]->labels[1].name); + EXPECT_EQ("Label11", lps.points[1]->labels[1].value); + EXPECT_EQ(ValueType::kHistogram, lps.points[1]->value_type); + EqHistograms(with_labels1, lps.points[1]->histogram_value); + EXPECT_LT(0, lps.points[1]->start_timestamp_millis); + EXPECT_LT(0, lps.points[1]->end_timestamp_millis); + EXPECT_GE(lps.points[1]->end_timestamp_millis, + lps.points[1]->start_timestamp_millis); + + const PointSet& ups = *collected_metrics->point_set_map.at( + "/tensorflow/test/sampler_without_labels"); + EXPECT_EQ("/tensorflow/test/sampler_without_labels", ups.metric_name); + ASSERT_EQ(1, ups.points.size()); + EXPECT_EQ(0, ups.points[0]->labels.size()); + EXPECT_EQ(ValueType::kHistogram, ups.points[0]->value_type); + EqHistograms(without_labels, ups.points[0]->histogram_value); + EXPECT_LT(0, ups.points[0]->start_timestamp_millis); + EXPECT_LT(0, ups.points[0]->end_timestamp_millis); + EXPECT_GE(ups.points[0]->end_timestamp_millis, + ups.points[0]->start_timestamp_millis); + } +} + // A FakeClockEnv to manually advance time. class FakeClockEnv : public EnvWrapper { public: diff --git a/tensorflow/core/lib/monitoring/counter.h b/tensorflow/core/lib/monitoring/counter.h index e76057b980a..4b84e9d928c 100644 --- a/tensorflow/core/lib/monitoring/counter.h +++ b/tensorflow/core/lib/monitoring/counter.h @@ -155,7 +155,7 @@ CounterCell* Counter::GetCell(const Labels&... labels) "Mismatch between Counter and number of labels " "provided in GetCell(...)."); - const LabelArray& label_array = {labels...}; + const LabelArray& label_array = {{labels...}}; mutex_lock l(mu_); const auto found_it = cells_.find(label_array); if (found_it != cells_.end()) { diff --git a/tensorflow/core/lib/monitoring/metric_def.h b/tensorflow/core/lib/monitoring/metric_def.h index 8c7207b829f..116a73823d7 100644 --- a/tensorflow/core/lib/monitoring/metric_def.h +++ b/tensorflow/core/lib/monitoring/metric_def.h @@ -19,11 +19,25 @@ limitations under the License. #include #include +#include "tensorflow/core/framework/summary.pb.h" #include "tensorflow/core/lib/core/stringpiece.h" namespace tensorflow { namespace monitoring { +// The different metric kinds available. +// +// Gauge indicates that the metric's values are instantaneous measurements of a +// (typically) continuously varying quantity. Examples: a process's current heap +// size, a queue's current length. +// +// Cumulative indicates that the metric's values represent non-negative changes +// over specified time periods. Example: the number of rpc calls to a service. +enum class MetricKind : int { kGauge = 0, kCumulative }; + +// The type of the metric values. +enum class ValueType : int { kInt64 = 0, kHistogram }; + // Everything in the internal namespace is implementation details. Do not depend // on this. namespace internal { @@ -46,17 +60,20 @@ class StringLiteral { const StringPiece literal_; }; -} // namespace internal +template +ValueType GetValueType(); -// The different metric kinds available. -// -// Gauge indicates that the metric's values are instantaneous measurements of a -// (typically) continuously varying quantity. Examples: a process's current heap -// size, a queue's current length. -// -// Cumulative indicates that the metric's values represent non-negative changes -// over specified time periods. Example: the number of rpc calls to a service. -enum class MetricKind : int { kGauge = 0, kCumulative }; +template <> +inline ValueType GetValueType() { + return ValueType::kInt64; +} + +template <> +inline ValueType GetValueType() { + return ValueType::kHistogram; +} + +} // namespace internal // Abstract base class for a metric definition. // @@ -69,6 +86,8 @@ class AbstractMetricDef { public: MetricKind kind() const { return kind_; } + ValueType value_type() const { return value_type_; } + StringPiece name() const { return name_; } StringPiece description() const { return description_; } @@ -82,16 +101,19 @@ class AbstractMetricDef { friend class MetricDef; AbstractMetricDef( - const MetricKind kind, const internal::StringLiteral name, + const MetricKind kind, const ValueType value_type, + const internal::StringLiteral name, const internal::StringLiteral description, const std::vector& label_descriptions) : kind_(kind), + value_type_(value_type), name_(name), description_(description), - label_descriptions_( - {label_descriptions.begin(), label_descriptions.end()}) {} + label_descriptions_(std::vector( + label_descriptions.begin(), label_descriptions.end())) {} const MetricKind kind_; + const ValueType value_type_; const StringPiece name_; const StringPiece description_; const std::vector label_descriptions_; @@ -108,14 +130,12 @@ class AbstractMetricDef { template class MetricDef : public AbstractMetricDef { public: - using value_type = Value; - template MetricDef(const internal::StringLiteral name, const internal::StringLiteral description, const LabelDesc&... label_descriptions) - : AbstractMetricDef(metric_kind, name, description, - {label_descriptions...}) { + : AbstractMetricDef(metric_kind, internal::GetValueType(), name, + description, {label_descriptions...}) { static_assert(sizeof...(LabelDesc) == NumLabels, "Mismatch between Counter and number of label " "descriptions."); diff --git a/tensorflow/core/lib/monitoring/metric_def_test.cc b/tensorflow/core/lib/monitoring/metric_def_test.cc index 237be6f48c5..dc07a08e4fe 100644 --- a/tensorflow/core/lib/monitoring/metric_def_test.cc +++ b/tensorflow/core/lib/monitoring/metric_def_test.cc @@ -24,7 +24,7 @@ namespace { TEST(MetricDefTest, Simple) { const MetricDef metric_def0( "/tensorflow/metric0", "An example metric with no labels."); - const MetricDef metric_def1( + const MetricDef metric_def1( "/tensorflow/metric1", "An example metric with one label.", "LabelName"); EXPECT_EQ("/tensorflow/metric0", metric_def0.name()); diff --git a/tensorflow/core/lib/monitoring/sampler.h b/tensorflow/core/lib/monitoring/sampler.h index 9a08437bfdf..3932f8d1a72 100644 --- a/tensorflow/core/lib/monitoring/sampler.h +++ b/tensorflow/core/lib/monitoring/sampler.h @@ -28,13 +28,12 @@ limitations under the License. #include "tensorflow/core/framework/summary.pb.h" #include "tensorflow/core/lib/histogram/histogram.h" +#include "tensorflow/core/lib/monitoring/collection_registry.h" #include "tensorflow/core/lib/monitoring/metric_def.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/thread_annotations.h" -// TODO(vinuraja): Not ready yet. The collection part has to be plumbed in. - namespace tensorflow { namespace monitoring { @@ -68,9 +67,10 @@ class SamplerCell { // A stateful class for updating a cumulative histogram metric. // -// This class encapsulates a set of values (or a single value for a label-less -// metric). Each value is identified by a tuple of labels. The class allows the -// user to increment each value. +// This class encapsulates a set of histograms (or a single histogram for a +// label-less metric) configured with a list of increasing bucket boundaries. +// Each histogram is identified by a tuple of labels. The class allows the user +// to add a sample to each histogram value. // // Sampler allocates storage and maintains a cell for each value. You can // retrieve an individual cell using a label-tuple and update it separately. @@ -81,7 +81,10 @@ class SamplerCell { template class Sampler { public: - ~Sampler() {} + ~Sampler() { + // Deleted here, before the metric_def is destroyed. + registration_handle_.reset(); + } // Creates the metric based on the metric-definition arguments. // @@ -110,7 +113,17 @@ class Sampler { Sampler(const MetricDef& metric_def, const std::vector& bucket_limits) - : metric_def_(metric_def), bucket_limits_(bucket_limits) {} + : metric_def_(metric_def), + bucket_limits_(bucket_limits), + registration_handle_(CollectionRegistry::Default()->Register( + &metric_def_, [&](MetricCollectorGetter getter) { + auto metric_collector = getter.Get(&metric_def_); + + mutex_lock l(mu_); + for (const auto& cell : cells_) { + metric_collector.CollectValue(cell.first, cell.second.value()); + } + })) {} mutable mutex mu_; @@ -122,6 +135,9 @@ class Sampler { // Bucket limits for the histograms in the cells. const std::vector bucket_limits_; + // Registration handle with the CollectionRegistry. + std::unique_ptr registration_handle_; + // We use a std::map here because we give out pointers to the SamplerCells, // which need to remain valid even after more cells. using LabelArray = std::array; @@ -171,7 +187,7 @@ SamplerCell* Sampler::GetCell(const Labels&... labels) "Mismatch between Sampler and number of labels " "provided in GetCell(...)."); - const LabelArray& label_array = {labels...}; + const LabelArray& label_array = {{labels...}}; mutex_lock l(mu_); const auto found_it = cells_.find(label_array); if (found_it != cells_.end()) { diff --git a/tensorflow/core/lib/monitoring/sampler_test.cc b/tensorflow/core/lib/monitoring/sampler_test.cc index b018d020da9..27e1ccca3c9 100644 --- a/tensorflow/core/lib/monitoring/sampler_test.cc +++ b/tensorflow/core/lib/monitoring/sampler_test.cc @@ -23,10 +23,10 @@ namespace { using histogram::Histogram; -static void EqHistograms(const histogram::Histogram& expected, - const HistogramProto& actual_proto) { - histogram::Histogram actual; - EXPECT_TRUE(actual.DecodeFromProto(actual_proto)); +void EqHistograms(const Histogram& expected, + const HistogramProto& actual_proto) { + Histogram actual; + ASSERT_TRUE(actual.DecodeFromProto(actual_proto)); EXPECT_EQ(expected.ToString(), actual.ToString()); } diff --git a/tensorflow/core/lib/strings/numbers.cc b/tensorflow/core/lib/strings/numbers.cc index 4df0f54378e..fc07bd446c1 100644 --- a/tensorflow/core/lib/strings/numbers.cc +++ b/tensorflow/core/lib/strings/numbers.cc @@ -80,16 +80,12 @@ T locale_independent_strtonum(const char* str, const char** endptr) { // Set to result to what strto{f,d} functions would have returned. If the // number was outside the range, the stringstream sets the fail flag, but // returns the +/-max() value, whereas strto{f,d} functions return +/-INF. - bool real_fail = false; if (s.fail()) { - real_fail = true; if (result == std::numeric_limits::max()) { result = std::numeric_limits::infinity(); - real_fail = false; s.clear(s.rdstate() & ~std::ios::failbit); } else if (result == -std::numeric_limits::max()) { result = -std::numeric_limits::infinity(); - real_fail = false; s.clear(s.rdstate() & ~std::ios::failbit); } } @@ -97,10 +93,9 @@ T locale_independent_strtonum(const char* str, const char** endptr) { if (endptr) { *endptr = str + - (real_fail - ? static_cast(0) - : (s.eof() ? static_cast(strlen(str)) - : s.tellg())); + (s.fail() ? static_cast(0) + : (s.eof() ? static_cast(strlen(str)) + : s.tellg())); } return result; } diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index 33695451dba..6e076a092e1 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -114,48 +114,49 @@ Status SetOutputShapeForReshape(InferenceContext* c) { ShapeHandle out; TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &out)); - // If the rank and all dimensions of the input tensor are known, we may - // infer missing shape information or perform shape checks. - // NumElements conveniently returns kUnknownDim upon missing rank or - // dimension information. - // Additionally, if the rank of the out shape is unknown we have no shape - // information to go off of. + if (!c->RankKnown(out)) { + // We have no information about the shape of the output. + c->set_output(0, out); + return Status::OK(); + } DimensionHandle num_in_elems = c->NumElements(in); - DimensionHandle num_out_elems = c->NumElements(out); - if (!c->ValueKnown(num_in_elems) || !c->RankKnown(out)) { - // Do nothing. We have no shape information to infer from so we directly - // return out as our shape. - } else if (c->ValueKnown(num_out_elems)) { - // If we know the number of output elements, we ensure that they - // are equal to the number of input elements. - if (c->Value(num_in_elems) != c->Value(num_out_elems)) { + if (c->FullyDefined(out)) { + DimensionHandle num_out_elems = c->NumElements(out); + if (c->ValueKnown(num_in_elems) && + c->Value(num_in_elems) != c->Value(num_out_elems)) { return errors::InvalidArgument( "Cannot reshape a tensor with ", c->DebugString(num_in_elems), " elements to shape ", c->DebugString(out), " (", c->DebugString(num_out_elems), " elements)"); } - } else { - // If we don't know the number of output elements, we can infer + c->set_output(0, out); + return Status::OK(); + } + + if (c->ValueKnown(num_in_elems)) { + // We don't know the number of output elements, but we can try to infer // the missing dimension. int32 unknown_idx = -1; + bool too_many_unknown = false; DimensionHandle known_elems = c->MakeDim(1); for (int32 i = 0; i < c->Rank(out); ++i) { DimensionHandle dim = c->Dim(out, i); if (!c->ValueKnown(dim)) { if (unknown_idx >= 0) { - return errors::InvalidArgument( - "Cannot infer multiple unknown dimensions in shape ", - c->DebugString(out)); + too_many_unknown = true; + break; } unknown_idx = i; } else { TF_RETURN_IF_ERROR(c->Multiply(known_elems, dim, &known_elems)); } } - DimensionHandle inferred_dim; - TF_RETURN_IF_ERROR(c->Divide(num_in_elems, c->Value(known_elems), - true /* evenly_divisible */, &inferred_dim)); - TF_RETURN_IF_ERROR(c->ReplaceDim(out, unknown_idx, inferred_dim, &out)); + if (!too_many_unknown) { + DimensionHandle inferred_dim; + TF_RETURN_IF_ERROR(c->Divide(num_in_elems, c->Value(known_elems), + true /* evenly_divisible */, &inferred_dim)); + TF_RETURN_IF_ERROR(c->ReplaceDim(out, unknown_idx, inferred_dim, &out)); + } } c->set_output(0, out); @@ -2477,11 +2478,10 @@ REGISTER_OP("Placeholder") PartialTensorShape shape; TF_RETURN_IF_ERROR(c->GetAttr("shape", &shape)); - // Placeholder has a legacy bug where we cannot tell - // the difference between a scalar shape attribute and - // 'unknown shape'. So if the shape is a scalar, we return - // an unknown shape. - if (shape.dims() == 0) { + // Placeholder has legacy behavior where we cannot tell the difference + // between a scalar shape attribute and 'unknown shape'. So if the shape + // is a scalar, we return an unknown shape. + if (shape.dims() <= 0) { return shape_inference::UnknownShape(c); } @@ -4382,6 +4382,117 @@ output_min: This value is copied from input_min. output_max: This value is copied from input_max. )Doc"); +REGISTER_OP("FakeQuantWithMinMaxArgs") + .Attr("min: float = -6.0") + .Attr("max: float = 6.0") + .Input("inputs: float") + .Output("outputs: float") + .Doc(R"doc( +Fake-quantize the 'inputs' tensor, type float to 'outputs' tensor of same type. + +Attributes [min; max] define the clamping range for the 'inputs' data. Op +divides this range into 255 steps (total of 256 values), then replaces each +'inputs' value with the closest of the quantized step values. + +Quantization is called fake since the output is still in floating point. +)doc"); + +REGISTER_OP("FakeQuantWithMinMaxArgsGradient") + .Attr("min: float = -6.0") + .Attr("max: float = 6.0") + .Input("gradients: float") + .Input("inputs: float") + .Output("backprops: float") + .Doc(R"doc( +Compute gradients for a FakeQuantWithMinMaxArgs operation. + +gradients: Backpropagated gradients above the FakeQuantWithMinMaxArgs operation. +inputs: Values passed as inputs to the FakeQuantWithMinMaxArgs operation. +backprops: Backpropagated gradients below the FakeQuantWithMinMaxArgs operation: + `gradients * (inputs >= min && inputs <= max)`. +)doc"); + +REGISTER_OP("FakeQuantWithMinMaxVars") + .Input("inputs: float") + .Input("min: float") + .Input("max: float") + .Output("outputs: float") + .Doc(R"doc( +Fake-quantize the 'inputs' tensor of type float and shape `[b, h, w, d]` via +global float scalars `min` and `max` to 'outputs' tensor of same shape as +`inputs`. + +[min; max] is the clamping range for the 'inputs' data. Op divides this range +into 255 steps (total of 256 values), then replaces each 'inputs' value with the +closest of the quantized step values. + +This operation has a gradient and thus allows for training `min` and `max` values. +)doc"); + +REGISTER_OP("FakeQuantWithMinMaxVarsGradient") + .Input("gradients: float") + .Input("inputs: float") + .Input("min: float") + .Input("max: float") + .Output("backprops_wrt_input: float") + .Output("backprop_wrt_min: float") + .Output("backprop_wrt_max: float") + .Doc(R"doc( +Compute gradients for a FakeQuantWithMinMaxVars operation. + +gradients: Backpropagated gradients above the FakeQuantWithMinMaxVars operation. +inputs: Values passed as inputs to the FakeQuantWithMinMaxVars operation. +min, max: Quantization interval, scalar floats. +backprops_wrt_input: Backpropagated gradients w.r.t. inputs: + `gradients * (inputs >= min && inputs <= max)`. +backprop_wrt_min: Backpropagated gradients w.r.t. min parameter: + `sum(gradients * (inputs < min))`. +backprop_wrt_max: Backpropagated gradients w.r.t. max parameter: + `sum(gradients * (inputs > max))`. +)doc"); + +REGISTER_OP("FakeQuantWithMinMaxVarsPerChannel") + .Input("inputs: float") + .Input("min: float") + .Input("max: float") + .Output("outputs: float") + .Doc(R"doc( +Fake-quantize the 'inputs' tensor of type float and one of the shapes: `[d]`, +`[b, d]` `[b, h, w, d]` via per-channel floats `min` and `max` of shape `[d]` +to 'outputs' tensor of same shape as `inputs`. + +[min; max] is the clamping range for the 'inputs' data in the corresponding +depth channel. Op divides this range into 255 steps (total of 256 values), then +replaces each 'inputs' value with the closest of the quantized step values. + +This operation has a gradient and thus allows for training `min` and `max` values. +)doc"); + +REGISTER_OP("FakeQuantWithMinMaxVarsPerChannelGradient") + .Input("gradients: float") + .Input("inputs: float") + .Input("min: float") + .Input("max: float") + .Output("backprops_wrt_input: float") + .Output("backprop_wrt_min: float") + .Output("backprop_wrt_max: float") + .Doc(R"doc( +Compute gradients for a FakeQuantWithMinMaxVarsPerChannel operation. + +gradients: Backpropagated gradients above the FakeQuantWithMinMaxVars operation, + shape one of: `[d]`, `[b, d]`, `[b, h, w, d]`. +inputs: Values passed as inputs to the FakeQuantWithMinMaxVars operation, shape + same as `gradients`. +min, max: Quantization interval, floats of shape `[d]`. +backprops_wrt_input: Backpropagated gradients w.r.t. inputs, shape same as + `inputs`: + `gradients * (inputs >= min && inputs <= max)`. +backprop_wrt_min: Backpropagated gradients w.r.t. min parameter, shape `[d]`: + `sum_per_d(gradients * (inputs < min))`. +backprop_wrt_max: Backpropagated gradients w.r.t. max parameter, shape `[d]`: + `sum_per_d(gradients * (inputs > max))`. +)doc"); + // Deprecated op registrations: // The following can be deleted after 10mar2017. diff --git a/tensorflow/core/ops/array_ops_test.cc b/tensorflow/core/ops/array_ops_test.cc index 71491e8d669..8679739b70c 100644 --- a/tensorflow/core/ops/array_ops_test.cc +++ b/tensorflow/core/ops/array_ops_test.cc @@ -693,8 +693,7 @@ TEST(ArrayOpsTest, Reshape_ShapeFn) { "[7];[2]"); // Multiple missing dimensions cannot be inferred. new_shape = test::AsTensor({-1, -1, 2}); - INFER_ERROR("Cannot infer multiple unknown dimensions in shape [?,?,2]", op, - "[8];[3]"); + INFER_OK(op, "[8];[3]", "[?,?,2]"); // Reshaping to a scalar. new_shape = test::AsTensor({}); diff --git a/tensorflow/core/ops/compat/ops_history.v0.pbtxt b/tensorflow/core/ops/compat/ops_history.v0.pbtxt index fac856d6602..b5b056e41f6 100644 --- a/tensorflow/core/ops/compat/ops_history.v0.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v0.pbtxt @@ -11709,6 +11709,160 @@ op { type: DT_STRING } } +op { + name: "FakeQuantWithMinMaxArgs" + input_arg { + name: "inputs" + type: DT_FLOAT + } + output_arg { + name: "outputs" + type: DT_FLOAT + } + attr { + name: "min" + type: "float" + default_value { + f: -6 + } + } + attr { + name: "max" + type: "float" + default_value { + f: 6 + } + } +} +op { + name: "FakeQuantWithMinMaxArgsGradient" + input_arg { + name: "gradients" + type: DT_FLOAT + } + input_arg { + name: "inputs" + type: DT_FLOAT + } + output_arg { + name: "backprops" + type: DT_FLOAT + } + attr { + name: "min" + type: "float" + default_value { + f: -6 + } + } + attr { + name: "max" + type: "float" + default_value { + f: 6 + } + } +} +op { + name: "FakeQuantWithMinMaxVars" + input_arg { + name: "inputs" + type: DT_FLOAT + } + input_arg { + name: "min" + type: DT_FLOAT + } + input_arg { + name: "max" + type: DT_FLOAT + } + output_arg { + name: "outputs" + type: DT_FLOAT + } +} +op { + name: "FakeQuantWithMinMaxVarsGradient" + input_arg { + name: "gradients" + type: DT_FLOAT + } + input_arg { + name: "inputs" + type: DT_FLOAT + } + input_arg { + name: "min" + type: DT_FLOAT + } + input_arg { + name: "max" + type: DT_FLOAT + } + output_arg { + name: "backprops_wrt_input" + type: DT_FLOAT + } + output_arg { + name: "backprop_wrt_min" + type: DT_FLOAT + } + output_arg { + name: "backprop_wrt_max" + type: DT_FLOAT + } +} +op { + name: "FakeQuantWithMinMaxVarsPerChannel" + input_arg { + name: "inputs" + type: DT_FLOAT + } + input_arg { + name: "min" + type: DT_FLOAT + } + input_arg { + name: "max" + type: DT_FLOAT + } + output_arg { + name: "outputs" + type: DT_FLOAT + } +} +op { + name: "FakeQuantWithMinMaxVarsPerChannelGradient" + input_arg { + name: "gradients" + type: DT_FLOAT + } + input_arg { + name: "inputs" + type: DT_FLOAT + } + input_arg { + name: "min" + type: DT_FLOAT + } + input_arg { + name: "max" + type: DT_FLOAT + } + output_arg { + name: "backprops_wrt_input" + type: DT_FLOAT + } + output_arg { + name: "backprop_wrt_min" + type: DT_FLOAT + } + output_arg { + name: "backprop_wrt_max" + type: DT_FLOAT + } +} op { name: "Fill" input_arg { @@ -22466,6 +22620,42 @@ op { } } } +op { + name: "RequantizationRange" + input_arg { + name: "input" + type_attr: "Tinput" + } + input_arg { + name: "input_min" + type: DT_FLOAT + } + input_arg { + name: "input_max" + type: DT_FLOAT + } + output_arg { + name: "output_min" + type: DT_FLOAT + } + output_arg { + name: "output_max" + type: DT_FLOAT + } + attr { + name: "Tinput" + type: "type" + allowed_values { + list { + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT16 + type: DT_QUINT16 + type: DT_QINT32 + } + } + } +} op { name: "Requantize" input_arg { diff --git a/tensorflow/core/ops/data_flow_ops.cc b/tensorflow/core/ops/data_flow_ops.cc index d1f6d9ff0ae..3c13ca2bfbf 100644 --- a/tensorflow/core/ops/data_flow_ops.cc +++ b/tensorflow/core/ops/data_flow_ops.cc @@ -629,6 +629,10 @@ REGISTER_OP("SparseConditionalAccumulator") .Attr("container: string = ''") .Attr("shared_name: string = ''") .SetIsStateful() + .SetShapeFn([](InferenceContext* c) { + c->set_output(0, c->Vector(2)); + return Status::OK(); + }) .Doc(R"doc( A conditional accumulator for aggregating sparse gradients. The accumulator accepts gradients marked with local_step greater or equal to the most recent @@ -654,6 +658,11 @@ REGISTER_OP("SparseAccumulatorApplyGradient") .Input("gradient_shape: int64") .Attr("dtype: numbertype") .Attr("has_known_shape: bool") + .SetShapeFn([](InferenceContext* c) { + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + return Status::OK(); + }) .Doc(R"doc( Applies a sparse gradient to a given accumulator. Does not add if local_step is lesser than the accumulator's global_step. @@ -679,6 +688,14 @@ REGISTER_OP("SparseAccumulatorTakeGradient") .Output("values: dtype") .Output("shape: int64") .Attr("dtype: numbertype") + .SetShapeFn([](InferenceContext* c) { + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + // Shape of output is the shape of the accumulator referenced + // by 'handle', but which is not available here, so we lose + // shape information. + return shape_inference::UnknownShape(c); + }) .Doc(R"doc( Extracts the average sparse gradient in the given SparseConditionalAccumulator, provided that sufficient (i.e., more than num_required) gradients have been diff --git a/tensorflow/core/ops/math_ops.cc b/tensorflow/core/ops/math_ops.cc index 3390e3661d6..8d3d9310a4d 100644 --- a/tensorflow/core/ops/math_ops.cc +++ b/tensorflow/core/ops/math_ops.cc @@ -2298,6 +2298,35 @@ out_type: The type of the output. Should be a lower bit depth than Tinput. )doc"); +REGISTER_OP("RequantizationRange") + .Input("input: Tinput") + .Input("input_min: float") + .Input("input_max: float") + .Output("output_min: float") + .Output("output_max: float") + .Attr("Tinput: quantizedtype") + .SetShapeFn([](InferenceContext* c) { + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + c->set_output(0, c->Scalar()); + c->set_output(1, c->Scalar()); + return Status::OK(); + }) + .Doc(R"doc( +Given a quantized tensor described by (input, input_min, input_max), outputs a +range that covers the actual values present in that tensor. This op is +typically used to produce the requested_output_min and requested_output_max for +Requantize. + +input_min: The float value that the minimum quantized input value represents. +input_max: The float value that the maximum quantized input value represents. +Tinput: The type of the input. +output_min: The computed min output. +output_max: the computed max output. + +)doc"); + // Deprecated ops: REGISTER_OP("BatchFFT") .Input("input: complex64") diff --git a/tensorflow/core/ops/math_ops_test.cc b/tensorflow/core/ops/math_ops_test.cc index d5f56d7a174..79ae187342b 100644 --- a/tensorflow/core/ops/math_ops_test.cc +++ b/tensorflow/core/ops/math_ops_test.cc @@ -462,4 +462,15 @@ TEST(MathOpsTest, Requantize_ShapeFn) { INFER_ERROR("must be rank 0", op, "?;?;?;?;[4]"); } +TEST(MathOpstest, RequantizationRange_ShapeFn) { + ShapeInferenceTestOp op("RequantizationRange"); + + INFER_OK(op, "?;?;?", "[];[]"); + INFER_OK(op, "?;[];[]", "[];[]"); + + // Rank checks on input scalars. + INFER_ERROR("must be rank 0", op, "?;[1];?"); + INFER_ERROR("must be rank 0", op, "?;?;[2]"); +} + } // end namespace tensorflow diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index c8abfc04eb4..7a57f917e0a 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -6710,6 +6710,182 @@ op { } summary: "Output a fact about factorials." } +op { + name: "FakeQuantWithMinMaxArgs" + input_arg { + name: "inputs" + type: DT_FLOAT + } + output_arg { + name: "outputs" + type: DT_FLOAT + } + attr { + name: "min" + type: "float" + default_value { + f: -6 + } + } + attr { + name: "max" + type: "float" + default_value { + f: 6 + } + } + summary: "Fake-quantize the \'inputs\' tensor, type float to \'outputs\' tensor of same type." + description: "Attributes [min; max] define the clamping range for the \'inputs\' data. Op\ndivides this range into 255 steps (total of 256 values), then replaces each\n\'inputs\' value with the closest of the quantized step values.\n\nQuantization is called fake since the output is still in floating point." +} +op { + name: "FakeQuantWithMinMaxArgsGradient" + input_arg { + name: "gradients" + description: "Backpropagated gradients above the FakeQuantWithMinMaxArgs operation." + type: DT_FLOAT + } + input_arg { + name: "inputs" + description: "Values passed as inputs to the FakeQuantWithMinMaxArgs operation." + type: DT_FLOAT + } + output_arg { + name: "backprops" + description: "Backpropagated gradients below the FakeQuantWithMinMaxArgs operation:\n`gradients * (inputs >= min && inputs <= max)`." + type: DT_FLOAT + } + attr { + name: "min" + type: "float" + default_value { + f: -6 + } + } + attr { + name: "max" + type: "float" + default_value { + f: 6 + } + } + summary: "Compute gradients for a FakeQuantWithMinMaxArgs operation." +} +op { + name: "FakeQuantWithMinMaxVars" + input_arg { + name: "inputs" + type: DT_FLOAT + } + input_arg { + name: "min" + type: DT_FLOAT + } + input_arg { + name: "max" + type: DT_FLOAT + } + output_arg { + name: "outputs" + type: DT_FLOAT + } + summary: "Fake-quantize the \'inputs\' tensor of type float and shape `[b, h, w, d]` via" + description: "global float scalars `min` and `max` to \'outputs\' tensor of same shape as\n`inputs`.\n\n[min; max] is the clamping range for the \'inputs\' data. Op divides this range\ninto 255 steps (total of 256 values), then replaces each \'inputs\' value with the\nclosest of the quantized step values.\n\nThis operation has a gradient and thus allows for training `min` and `max` values." +} +op { + name: "FakeQuantWithMinMaxVarsGradient" + input_arg { + name: "gradients" + description: "Backpropagated gradients above the FakeQuantWithMinMaxVars operation." + type: DT_FLOAT + } + input_arg { + name: "inputs" + description: "Values passed as inputs to the FakeQuantWithMinMaxVars operation.\nmin, max: Quantization interval, scalar floats." + type: DT_FLOAT + } + input_arg { + name: "min" + type: DT_FLOAT + } + input_arg { + name: "max" + type: DT_FLOAT + } + output_arg { + name: "backprops_wrt_input" + description: "Backpropagated gradients w.r.t. inputs:\n`gradients * (inputs >= min && inputs <= max)`." + type: DT_FLOAT + } + output_arg { + name: "backprop_wrt_min" + description: "Backpropagated gradients w.r.t. min parameter:\n`sum(gradients * (inputs < min))`." + type: DT_FLOAT + } + output_arg { + name: "backprop_wrt_max" + description: "Backpropagated gradients w.r.t. max parameter:\n`sum(gradients * (inputs > max))`." + type: DT_FLOAT + } + summary: "Compute gradients for a FakeQuantWithMinMaxVars operation." +} +op { + name: "FakeQuantWithMinMaxVarsPerChannel" + input_arg { + name: "inputs" + type: DT_FLOAT + } + input_arg { + name: "min" + type: DT_FLOAT + } + input_arg { + name: "max" + type: DT_FLOAT + } + output_arg { + name: "outputs" + type: DT_FLOAT + } + summary: "Fake-quantize the \'inputs\' tensor of type float and one of the shapes: `[d]`," + description: "`[b, d]` `[b, h, w, d]` via per-channel floats `min` and `max` of shape `[d]`\nto \'outputs\' tensor of same shape as `inputs`.\n\n[min; max] is the clamping range for the \'inputs\' data in the corresponding\ndepth channel. Op divides this range into 255 steps (total of 256 values), then\nreplaces each \'inputs\' value with the closest of the quantized step values.\n\nThis operation has a gradient and thus allows for training `min` and `max` values." +} +op { + name: "FakeQuantWithMinMaxVarsPerChannelGradient" + input_arg { + name: "gradients" + description: "Backpropagated gradients above the FakeQuantWithMinMaxVars operation,\nshape one of: `[d]`, `[b, d]`, `[b, h, w, d]`." + type: DT_FLOAT + } + input_arg { + name: "inputs" + description: "Values passed as inputs to the FakeQuantWithMinMaxVars operation, shape\n same as `gradients`.\nmin, max: Quantization interval, floats of shape `[d]`." + type: DT_FLOAT + } + input_arg { + name: "min" + type: DT_FLOAT + } + input_arg { + name: "max" + type: DT_FLOAT + } + output_arg { + name: "backprops_wrt_input" + description: "Backpropagated gradients w.r.t. inputs, shape same as\n`inputs`:\n `gradients * (inputs >= min && inputs <= max)`." + type: DT_FLOAT + } + output_arg { + name: "backprop_wrt_min" + description: "Backpropagated gradients w.r.t. min parameter, shape `[d]`:\n`sum_per_d(gradients * (inputs < min))`." + type: DT_FLOAT + } + output_arg { + name: "backprop_wrt_max" + description: "Backpropagated gradients w.r.t. max parameter, shape `[d]`:\n`sum_per_d(gradients * (inputs > max))`." + type: DT_FLOAT + } + summary: "Compute gradients for a FakeQuantWithMinMaxVarsPerChannel operation." +} op { name: "Fill" input_arg { @@ -14090,6 +14266,49 @@ op { } summary: "Computes rectified linear gradients for a Relu operation." } +op { + name: "RequantizationRange" + input_arg { + name: "input" + type_attr: "Tinput" + } + input_arg { + name: "input_min" + description: "The float value that the minimum quantized input value represents." + type: DT_FLOAT + } + input_arg { + name: "input_max" + description: "The float value that the maximum quantized input value represents." + type: DT_FLOAT + } + output_arg { + name: "output_min" + description: "The computed min output." + type: DT_FLOAT + } + output_arg { + name: "output_max" + description: "the computed max output." + type: DT_FLOAT + } + attr { + name: "Tinput" + type: "type" + description: "The type of the input." + allowed_values { + list { + type: DT_QINT8 + type: DT_QUINT8 + type: DT_QINT16 + type: DT_QUINT16 + type: DT_QINT32 + } + } + } + summary: "Given a quantized tensor described by (input, input_min, input_max), outputs a" + description: "range that covers the actual values present in that tensor. This op is\ntypically used to produce the requested_output_min and requested_output_max for\nRequantize." +} op { name: "Requantize" input_arg { diff --git a/tensorflow/core/ops/state_ops.cc b/tensorflow/core/ops/state_ops.cc index 629a280cc8a..b9ac8b16ffb 100644 --- a/tensorflow/core/ops/state_ops.cc +++ b/tensorflow/core/ops/state_ops.cc @@ -28,7 +28,24 @@ REGISTER_OP("Variable") .Attr("container: string = ''") .Attr("shared_name: string = ''") .SetIsStateful() - .SetShapeFn(shape_inference::UnknownShape) + .SetShapeFn([](InferenceContext* c) { + PartialTensorShape shape; + TF_RETURN_IF_ERROR(c->GetAttr("shape", &shape)); + + // Variable has legacy behavior where we cannot tell the difference + // between a scalar shape attribute and 'unknown shape'. So if the shape + // is a scalar, we return an unknown shape. + if (shape.dims() <= 0) { + return shape_inference::UnknownShape(c); + } + + TensorShapeProto shape_proto; + shape.AsProto(&shape_proto); + ShapeHandle out; + TF_RETURN_IF_ERROR(c->MakeShapeFromShapeProto(shape_proto, &out)); + c->set_output(0, out); + return Status::OK(); + }) .Doc(R"doc( Holds state in the form of a tensor that persists across steps. diff --git a/tensorflow/core/ops/state_ops_test.cc b/tensorflow/core/ops/state_ops_test.cc index 586de77edc8..4c1ec67e9cf 100644 --- a/tensorflow/core/ops/state_ops_test.cc +++ b/tensorflow/core/ops/state_ops_test.cc @@ -71,4 +71,30 @@ TEST(StateOpsTest, TemporaryVariable_ShapeFn) { INFER_OK(op, "", "[1,2,3]"); } +TEST(StateOpsTest, Variable_ShapeFn) { + ShapeInferenceTestOp op("Variable"); + TensorShapeProto shape_proto; + + // Unknown rank. + PartialTensorShape().AsProto(&shape_proto); + TF_ASSERT_OK(NodeDefBuilder("test", "Variable") + .Attr("shape", shape_proto) + .Finalize(&op.node_def)); + INFER_OK(op, "", "?"); + + // For historical reasons an empty TensorShapeProto can be either an unknown + // rank or a scalar, so the shape function conservatively says "unknown" + shape_proto.Clear(); + TF_ASSERT_OK(NodeDefBuilder("test", "Variable") + .Attr("shape", shape_proto) + .Finalize(&op.node_def)); + INFER_OK(op, "", "?"); + + // Specified shape. + TensorShape({1, 2, 3}).AsProto(&shape_proto); + TF_ASSERT_OK(NodeDefBuilder("test", "Variable") + .Attr("shape", shape_proto) + .Finalize(&op.node_def)); + INFER_OK(op, "", "[1,2,3]"); +} } // end namespace tensorflow diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc index 56e89277cc8..6641971ba07 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system.cc @@ -52,6 +52,9 @@ constexpr uint64 kUploadRetryDelayMicros = 1000000L; // The HTTP response code "308 Resume Incomplete". constexpr uint64 HTTP_CODE_RESUME_INCOMPLETE = 308; +// The file statistics returned by Stat() for directories. +const FileStatistics DIRECTORY_STAT(0, 0, true); + Status GetTmpFilename(string* filename) { if (!filename) { return errors::Internal("'filename' cannot be nullptr."); @@ -80,19 +83,19 @@ Status ParseGcsPath(StringPiece fname, bool empty_object_ok, string* bucket, StringPiece scheme, bucketp, objectp; ParseURI(fname, &scheme, &bucketp, &objectp); if (scheme != "gs") { - return errors::InvalidArgument( - strings::StrCat("GCS path doesn't start with 'gs://': ", fname)); + return errors::InvalidArgument("GCS path doesn't start with 'gs://': ", + fname); } *bucket = bucketp.ToString(); if (bucket->empty() || *bucket == ".") { - return errors::InvalidArgument( - strings::StrCat("GCS path doesn't contain a bucket name: ", fname)); + return errors::InvalidArgument("GCS path doesn't contain a bucket name: ", + fname); } objectp.Consume("/"); *object = objectp.ToString(); if (!empty_object_ok && object->empty()) { - return errors::InvalidArgument( - strings::StrCat("GCS path doesn't contain an object name: ", fname)); + return errors::InvalidArgument("GCS path doesn't contain an object name: ", + fname); } return Status::OK(); } @@ -128,8 +131,8 @@ Status GetValue(const Json::Value& parent, const string& name, Json::Value* result) { *result = parent.get(name, Json::Value::null); if (*result == Json::Value::null) { - return errors::Internal(strings::StrCat( - "The field '", name, "' was expected in the JSON response.")); + return errors::Internal("The field '", name, + "' was expected in the JSON response."); } return Status::OK(); } @@ -141,8 +144,8 @@ Status GetStringValue(const Json::Value& parent, const string& name, TF_RETURN_IF_ERROR(GetValue(parent, name, &result_value)); if (!result_value.isString()) { return errors::Internal( - strings::StrCat("The field '", name, - "' in the JSON response was expected to be a string.")); + "The field '", name, + "' in the JSON response was expected to be a string."); } *result = result_value.asString(); return Status::OK(); @@ -162,8 +165,8 @@ Status GetInt64Value(const Json::Value& parent, const string& name, return Status::OK(); } return errors::Internal( - strings::StrCat("The field '", name, - "' in the JSON response was expected to be a number.")); + "The field '", name, + "' in the JSON response was expected to be a number."); } /// Reads a boolean JSON value with the given name from a parent JSON value. @@ -172,9 +175,9 @@ Status GetBoolValue(const Json::Value& parent, const string& name, Json::Value result_value; TF_RETURN_IF_ERROR(GetValue(parent, name, &result_value)); if (!result_value.isBool()) { - return errors::Internal(strings::StrCat( + return errors::Internal( "The field '", name, - "' in the JSON response was expected to be a boolean.")); + "' in the JSON response was expected to be a boolean."); } *result = result_value.asBool(); return Status::OK(); @@ -233,9 +236,9 @@ class GcsRandomAccessFile : public RandomAccessFile { if (result->size() < n) { // This is not an error per se. The RandomAccessFile interface expects // that Read returns OutOfRange if fewer bytes were read than requested. - return errors::OutOfRange(strings::StrCat("EOF reached, ", result->size(), - " bytes were read out of ", n, - " bytes requested.")); + return errors::OutOfRange("EOF reached, ", result->size(), + " bytes were read out of ", n, + " bytes requested."); } return Status::OK(); } @@ -378,8 +381,8 @@ class GcsWritableFile : public WritableFile { case errors::Code::NOT_FOUND: // GCS docs recommend retrying the whole upload. We're relying on the // RetryingFileSystem to retry the Sync() call. - return errors::Unavailable( - strings::StrCat("Could not upload gs://", bucket_, "/", object_)); + return errors::Unavailable("Could not upload gs://", bucket_, "/", + object_); case errors::Code::UNAVAILABLE: // The upload can be resumed, but GCS docs recommend an exponential // back-off. @@ -391,8 +394,7 @@ class GcsWritableFile : public WritableFile { return upload_status; } } - return errors::Aborted( - strings::StrCat("Upload gs://", bucket_, "/", object_, " failed.")); + return errors::Aborted("Upload gs://", bucket_, "/", object_, " failed."); } private: @@ -445,9 +447,9 @@ class GcsWritableFile : public WritableFile { request->Send(), " when initiating an upload to ", GetGcsPath()); *session_uri = request->GetResponseHeader("Location"); if (session_uri->empty()) { - return errors::Internal( - strings::StrCat("Unexpected response from GCS when writing to ", - GetGcsPath(), ": 'Location' header not returned.")); + return errors::Internal("Unexpected response from GCS when writing to ", + GetGcsPath(), + ": 'Location' header not returned."); } return Status::OK(); } @@ -495,15 +497,14 @@ class GcsWritableFile : public WritableFile { std::vector range_parts; if (!str_util::SplitAndParseAsInts(range_piece, '-', &range_parts) || range_parts.size() != 2) { - return errors::Internal(strings::StrCat( - "Unexpected response from GCS when writing ", GetGcsPath(), - ": Range header '", received_range, "' could not be parsed.")); + return errors::Internal("Unexpected response from GCS when writing ", + GetGcsPath(), ": Range header '", + received_range, "' could not be parsed."); } if (range_parts[0] != 0) { - return errors::Internal( - strings::StrCat("Unexpected response from GCS when writing to ", - GetGcsPath(), ": the returned range '", - received_range, "' does not start at zero.")); + return errors::Internal("Unexpected response from GCS when writing to ", + GetGcsPath(), ": the returned range '", + received_range, "' does not start at zero."); } // If GCS returned "Range: 0-10", this means 11 bytes were uploaded. *uploaded = range_parts[1] + 1; @@ -655,14 +656,31 @@ bool GcsFileSystem::FileExists(const string& fname) { return false; } if (object.empty()) { - return BucketExists(bucket).ok(); + bool result; + return BucketExists(bucket, &result).ok() && result; } - return ObjectExists(bucket, object).ok() || FolderExists(fname).ok(); + bool result; + return (ObjectExists(bucket, object, &result).ok() && result) || + (FolderExists(fname, &result).ok() && result); } -Status GcsFileSystem::ObjectExists(const string& bucket, const string& object) { - FileStatistics stat; - return StatForObject(bucket, object, &stat); +Status GcsFileSystem::ObjectExists(const string& bucket, const string& object, + bool* result) { + if (!result) { + return errors::Internal("'result' cannot be nullptr."); + } + FileStatistics not_used_stat; + const Status status = StatForObject(bucket, object, ¬_used_stat); + switch (status.code()) { + case errors::Code::OK: + *result = true; + return Status::OK(); + case errors::Code::NOT_FOUND: + *result = false; + return Status::OK(); + default: + return status; + } } Status GcsFileSystem::StatForObject(const string& bucket, const string& object, @@ -707,7 +725,10 @@ Status GcsFileSystem::StatForObject(const string& bucket, const string& object, return Status::OK(); } -Status GcsFileSystem::BucketExists(const string& bucket) { +Status GcsFileSystem::BucketExists(const string& bucket, bool* result) { + if (!result) { + return errors::Internal("'result' cannot be nullptr."); + } string auth_token; TF_RETURN_IF_ERROR(AuthProvider::GetToken(auth_provider_.get(), &auth_token)); @@ -715,15 +736,26 @@ Status GcsFileSystem::BucketExists(const string& bucket) { TF_RETURN_IF_ERROR(request->Init()); request->SetUri(strings::StrCat(kGcsUriBase, "b/", bucket)); request->AddAuthBearerHeader(auth_token); - return request->Send(); + const Status status = request->Send(); + switch (status.code()) { + case errors::Code::OK: + *result = true; + return Status::OK(); + case errors::Code::NOT_FOUND: + *result = false; + return Status::OK(); + default: + return status; + } } -Status GcsFileSystem::FolderExists(const string& dirname) { +Status GcsFileSystem::FolderExists(const string& dirname, bool* result) { + if (!result) { + return errors::Internal("'result' cannot be nullptr."); + } std::vector children; TF_RETURN_IF_ERROR(GetChildrenBounded(dirname, 1, &children, true)); - if (children.empty()) { - return errors::NotFound("Folder does not exist."); - } + *result = !children.empty(); return Status::OK(); } @@ -740,8 +772,8 @@ Status GcsFileSystem::GetMatchingPaths(const string& pattern, pattern.substr(0, pattern.find_first_of("*?[\\")); const string& dir = io::Dirname(fixed_prefix).ToString(); if (dir.empty()) { - return errors::InvalidArgument( - strings::StrCat("A GCS pattern doesn't have a bucket name: ", pattern)); + return errors::InvalidArgument("A GCS pattern doesn't have a bucket name: ", + pattern); } std::vector all_files; TF_RETURN_IF_ERROR(GetChildrenBounded(dir, UINT64_MAX, &all_files, true)); @@ -854,9 +886,9 @@ Status GcsFileSystem::GetChildrenBounded(const string& dirname, const string& prefix_str = prefix.asString(); StringPiece relative_path(prefix_str); if (!relative_path.Consume(object_prefix)) { - return errors::Internal(strings::StrCat( + return errors::Internal( "Unexpected response: the returned folder name ", prefix_str, - " doesn't match the prefix ", object_prefix)); + " doesn't match the prefix ", object_prefix); } result->emplace_back(relative_path.ToString()); if (++retrieved_results >= max_results) { @@ -882,18 +914,30 @@ Status GcsFileSystem::Stat(const string& fname, FileStatistics* stat) { } string bucket, object; TF_RETURN_IF_ERROR(ParseGcsPath(fname, true, &bucket, &object)); - if (StatForObject(bucket, object, stat).ok()) { + if (object.empty()) { + bool is_bucket; + TF_RETURN_IF_ERROR(BucketExists(bucket, &is_bucket)); + if (is_bucket) { + *stat = DIRECTORY_STAT; + return Status::OK(); + } + return errors::NotFound("The specified bucket ", fname, " was not found."); + } + + const Status status = StatForObject(bucket, object, stat); + if (status.ok()) { return Status::OK(); } - if ((object.empty() && BucketExists(bucket).ok()) || - (!object.empty() && FolderExists(fname).ok())) { - stat->length = 0; - stat->mtime_nsec = 0; - stat->is_directory = true; + if (status.code() != errors::Code::NOT_FOUND) { + return status; + } + bool is_folder; + TF_RETURN_IF_ERROR(FolderExists(fname, &is_folder)); + if (is_folder) { + *stat = DIRECTORY_STAT; return Status::OK(); } - return errors::NotFound( - strings::StrCat("The specified path ", fname, " was not found.")); + return errors::NotFound("The specified path ", fname, " was not found."); } Status GcsFileSystem::DeleteFile(const string& fname) { @@ -917,11 +961,11 @@ Status GcsFileSystem::CreateDir(const string& dirname) { string bucket, object; TF_RETURN_IF_ERROR(ParseGcsPath(dirname, true, &bucket, &object)); if (object.empty()) { - if (BucketExists(bucket).ok()) { - return Status::OK(); - } - return errors::NotFound( - strings::StrCat("The specified bucket ", dirname, " was not found.")); + bool is_bucket; + TF_RETURN_IF_ERROR(BucketExists(bucket, &is_bucket)); + return is_bucket ? Status::OK() + : errors::NotFound("The specified bucket ", dirname, + " was not found."); } // Create a zero-length directory marker object. std::unique_ptr file; @@ -1014,9 +1058,9 @@ Status GcsFileSystem::RenameObject(const string& src, const string& target) { // which requires multiple rewrite calls. // TODO(surkov): implement multi-step rewrites. return errors::Unimplemented( - strings::StrCat("Couldn't rename ", src, " to ", target, - ": moving large files between buckets with different " - "locations or storage classes is not supported.")); + "Couldn't rename ", src, " to ", target, + ": moving large files between buckets with different " + "locations or storage classes is not supported."); } TF_RETURN_IF_ERROR(DeleteFile(src)); @@ -1027,21 +1071,26 @@ Status GcsFileSystem::IsDirectory(const string& fname) { string bucket, object; TF_RETURN_IF_ERROR(ParseGcsPath(fname, true, &bucket, &object)); if (object.empty()) { - if (BucketExists(bucket).ok()) { + bool is_bucket; + TF_RETURN_IF_ERROR(BucketExists(bucket, &is_bucket)); + if (is_bucket) { return Status::OK(); } - return errors::NotFound(strings::StrCat("The specified bucket gs://", - bucket, " was not found.")); + return errors::NotFound("The specified bucket gs://", bucket, + " was not found."); } - if (FolderExists(fname).ok()) { + bool is_folder; + TF_RETURN_IF_ERROR(FolderExists(fname, &is_folder)); + if (is_folder) { return Status::OK(); } - if (ObjectExists(bucket, object).ok()) { - return errors::FailedPrecondition( - strings::StrCat("The specified path ", fname, " is not a directory.")); + bool is_object; + TF_RETURN_IF_ERROR(ObjectExists(bucket, object, &is_object)); + if (is_object) { + return errors::FailedPrecondition("The specified path ", fname, + " is not a directory."); } - return errors::NotFound( - strings::StrCat("The specified path ", fname, " was not found.")); + return errors::NotFound("The specified path ", fname, " was not found."); } Status GcsFileSystem::DeleteRecursively(const string& dirname, diff --git a/tensorflow/core/platform/cloud/gcs_file_system.h b/tensorflow/core/platform/cloud/gcs_file_system.h index 618be5934ea..c98a50cc879 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.h +++ b/tensorflow/core/platform/cloud/gcs_file_system.h @@ -76,9 +76,21 @@ class GcsFileSystem : public FileSystem { int64* undeleted_dirs) override; private: - Status BucketExists(const string& bucket); - Status ObjectExists(const string& bucket, const string& object); - Status FolderExists(const string& dirname); + /// \brief Checks if the bucket exists. Returns OK if the check succeeded. + /// + /// 'result' is set if the function returns OK. 'result' cannot be nullptr. + Status BucketExists(const string& bucket, bool* result); + + /// \brief Checks if the object exists. Returns OK if the check succeeded. + /// + /// 'result' is set if the function returns OK. 'result' cannot be nullptr. + Status ObjectExists(const string& bucket, const string& object, bool* result); + + /// \brief Checks if the folder exists. Returns OK if the check succeeded. + /// + /// 'result' is set if the function returns OK. 'result' cannot be nullptr. + Status FolderExists(const string& dirname, bool* result); + Status GetChildrenBounded(const string& dir, uint64 max_results, std::vector* result, bool recursively); /// Retrieves file statistics assuming fname points to a GCS object. diff --git a/tensorflow/core/platform/default/build_config/BUILD b/tensorflow/core/platform/default/build_config/BUILD index fe51a698d15..a63aa4d7a97 100644 --- a/tensorflow/core/platform/default/build_config/BUILD +++ b/tensorflow/core/platform/default/build_config/BUILD @@ -86,6 +86,14 @@ cc_library( ], ) +cc_library( + name = "jpeg", + copts = tf_copts(), + deps = [ + "@jpeg_archive//:jpeg", + ], +) + cc_library( name = "protos_cc", copts = tf_copts(), diff --git a/tensorflow/core/platform/env_test.cc b/tensorflow/core/platform/env_test.cc index 36586d3f822..d3e9e08c46c 100644 --- a/tensorflow/core/platform/env_test.cc +++ b/tensorflow/core/platform/env_test.cc @@ -303,178 +303,4 @@ TEST_F(DefaultEnvTest, RecursivelyCreateDirWithUri) { EXPECT_TRUE(env->FileExists(create_path)); } -// Creates a new TestEnv that uses Env::Default for all basic ops but -// uses the default implementation for the GetMatchingFiles function instead. -class TestEnv : public EnvWrapper { - public: - explicit TestEnv(Env* env) : EnvWrapper(env) {} - - ~TestEnv() override = default; -}; - -Env* GetTestEnv() { - static Env* default_env = new TestEnv(Env::Default()); - return default_env; -} - -class InterPlanetaryFileSystem : public NullFileSystem { - public: - Status IsDirectory(const string& dirname) override { - if (dirname == "ipfs://solarsystem" || - dirname == "ipfs://solarsystem/Earth" || - dirname == "ipfs://solarsystem/Jupiter") { - return Status::OK(); - } - return Status(tensorflow::error::FAILED_PRECONDITION, "Not a directory"); - } - - Status GetChildren(const string& dir, std::vector* result) override { - std::vector celestial_bodies; - if (dir == "ipfs://solarsystem") { - celestial_bodies = {"Mercury", "Venus", "Earth", "Mars", - "Jupiter", "Saturn", "Uranus", "Neptune", - ".PlanetX", "Planet0", "Planet1"}; - - } else if (dir == "ipfs://solarsystem/Earth") { - celestial_bodies = {"Moon"}; - } else if (dir == "ipfs://solarsystem/Jupiter") { - celestial_bodies = {"Europa", "Io", "Ganymede"}; - } - result->insert(result->end(), celestial_bodies.begin(), - celestial_bodies.end()); - return Status::OK(); - } -}; - -REGISTER_FILE_SYSTEM_ENV(GetTestEnv(), "ipfs", InterPlanetaryFileSystem); - -class TestEnvTest : public ::testing::Test { - protected: - void SetUp() override { env_->CreateDir(BaseDir()); } - - void TearDown() override { - int64 undeleted_files, undeleted_dirs; - env_->DeleteRecursively(BaseDir(), &undeleted_files, &undeleted_dirs); - } - - // Returns all the matched entries as a comma separated string removing the - // common prefix of BaseDir(). - string Match(const string& base_dir, const string& suffix_pattern) { - std::vector results; - Status s = env_->GetMatchingPaths(io::JoinPath(base_dir, suffix_pattern), - &results); - if (!s.ok()) { - return s.ToString(); - } else { - std::vector trimmed_results; - std::sort(results.begin(), results.end()); - for (const string& result : results) { - StringPiece trimmed_result(result); - EXPECT_TRUE(trimmed_result.Consume(base_dir + "/")); - trimmed_results.push_back(trimmed_result); - } - return str_util::Join(trimmed_results, ","); - } - } - - Env* env_ = GetTestEnv(); -}; - -TEST_F(TestEnvTest, IPFS) { - std::vector matched_planets; - TF_EXPECT_OK(env_->GetChildren("ipfs://solarsystem", &matched_planets)); - std::vector planets = {"Mercury", "Venus", "Earth", "Mars", - "Jupiter", "Saturn", "Uranus", "Neptune", - ".PlanetX", "Planet0", "Planet1"}; - int c = 0; - for (auto p : matched_planets) { - EXPECT_EQ(p, planets[c++]); - } -} - -TEST_F(TestEnvTest, MatchNonExistentFile) { - EXPECT_EQ(Match(BaseDir(), "thereisnosuchfile"), ""); -} - -TEST_F(TestEnvTest, MatchSimple) { - // Create a few files. - TF_EXPECT_OK( - WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-00"), "")); - TF_EXPECT_OK( - WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-0a"), "")); - TF_EXPECT_OK( - WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-01"), "")); - TF_EXPECT_OK( - WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-aaa"), "")); - - EXPECT_EQ(Match(BaseDir(), "match-*"), - "match-00,match-01,match-0a,match-aaa"); - EXPECT_EQ(Match(BaseDir(), "match-0[0-9]"), "match-00,match-01"); - EXPECT_EQ(Match(BaseDir(), "match-?[0-9]"), "match-00,match-01"); - EXPECT_EQ(Match(BaseDir(), "match-?a*"), "match-0a,match-aaa"); - EXPECT_EQ(Match(BaseDir(), "match-??"), "match-00,match-01,match-0a"); -} - -TEST_F(TestEnvTest, MatchDirectory) { - // Create some directories. - TF_EXPECT_OK( - env_->RecursivelyCreateDir(io::JoinPath(BaseDir(), "match-00/abc"))); - TF_EXPECT_OK( - env_->RecursivelyCreateDir(io::JoinPath(BaseDir(), "match-0a/abc"))); - TF_EXPECT_OK( - env_->RecursivelyCreateDir(io::JoinPath(BaseDir(), "match-01/abc"))); - TF_EXPECT_OK( - env_->RecursivelyCreateDir(io::JoinPath(BaseDir(), "match-aaa/abc"))); - - // Create a few files. - TF_EXPECT_OK( - WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-00/abc/x"), "")); - TF_EXPECT_OK( - WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-0a/abc/x"), "")); - TF_EXPECT_OK( - WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-01/abc/x"), "")); - TF_EXPECT_OK( - WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-aaa/abc/x"), "")); - - EXPECT_EQ(Match(BaseDir(), "match-*/abc/x"), - "match-00/abc/x,match-01/abc/x,match-0a/abc/x,match-aaa/abc/x"); - EXPECT_EQ(Match(BaseDir(), "match-0[0-9]/abc/x"), - "match-00/abc/x,match-01/abc/x"); - EXPECT_EQ(Match(BaseDir(), "match-?[0-9]/abc/x"), - "match-00/abc/x,match-01/abc/x"); - EXPECT_EQ(Match(BaseDir(), "match-?a*/abc/x"), - "match-0a/abc/x,match-aaa/abc/x"); - EXPECT_EQ(Match(BaseDir(), "match-?[^a]/abc/x"), - "match-00/abc/x,match-01/abc/x"); -} - -TEST_F(TestEnvTest, MatchMultipleWildcards) { - // Create some directories. - TF_EXPECT_OK( - env_->RecursivelyCreateDir(io::JoinPath(BaseDir(), "match-00/abc"))); - TF_EXPECT_OK( - env_->RecursivelyCreateDir(io::JoinPath(BaseDir(), "match-01/abc"))); - TF_EXPECT_OK( - env_->RecursivelyCreateDir(io::JoinPath(BaseDir(), "match-02/abc"))); - - // Create a few files. - TF_EXPECT_OK( - WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-00/abc/00"), "")); - TF_EXPECT_OK( - WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-00/abc/01"), "")); - TF_EXPECT_OK( - WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-00/abc/09"), "")); - TF_EXPECT_OK( - WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-01/abc/00"), "")); - TF_EXPECT_OK( - WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-01/abc/04"), "")); - TF_EXPECT_OK( - WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-01/abc/10"), "")); - TF_EXPECT_OK( - WriteStringToFile(env_, io::JoinPath(BaseDir(), "match-02/abc/00"), "")); - - EXPECT_EQ(Match(BaseDir(), "match-0[0-1]/abc/0[0-8]"), - "match-00/abc/00,match-00/abc/01,match-01/abc/00,match-01/abc/04"); -} - } // namespace tensorflow diff --git a/tensorflow/core/platform/file_statistics.h b/tensorflow/core/platform/file_statistics.h index 6bb34c19dd0..7629db6ef9e 100644 --- a/tensorflow/core/platform/file_statistics.h +++ b/tensorflow/core/platform/file_statistics.h @@ -29,6 +29,8 @@ struct FileStatistics { bool is_directory = false; FileStatistics() {} + FileStatistics(int64 length, int64 mtime_nsec, bool is_directory) + : length(length), mtime_nsec(mtime_nsec), is_directory(is_directory) {} ~FileStatistics() {} }; diff --git a/tensorflow/core/platform/file_system.cc b/tensorflow/core/platform/file_system.cc index 3e68f48eb17..62167b4f768 100644 --- a/tensorflow/core/platform/file_system.cc +++ b/tensorflow/core/platform/file_system.cc @@ -17,6 +17,7 @@ limitations under the License. #include #include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/lib/gtl/stl_util.h" #include "tensorflow/core/lib/io/path.h" @@ -29,6 +30,12 @@ limitations under the License. namespace tensorflow { +namespace { + +constexpr int32 kNumThreads = 8; + +} // anonymous namespace + FileSystem::~FileSystem() {} string FileSystem::TranslateName(const string& name) const { @@ -105,16 +112,32 @@ Status FileSystem::GetMatchingPaths(const string& pattern, std::deque dir_q; dir_q.push_back(dir); Status ret; // Status to return. + std::vector children_dir_status; // holds is_dir status for children. while (!dir_q.empty()) { string current_dir = dir_q.front(); dir_q.pop_front(); std::vector children; Status s = GetChildren(current_dir, &children); ret.Update(s); - for (const string& child : children) { - const string child_path = io::JoinPath(current_dir, child); + if (children.empty()) continue; + // This IsDirectory call can be expensive for some FS. Parallelizing it. + thread::ThreadPool* children_threads = + new thread::ThreadPool(Env::Default(), "TraverseChildren", kNumThreads); + children_dir_status.resize(children.size()); + for (int i = 0; i < children.size(); ++i) { + const string child_path = io::JoinPath(current_dir, children[i]); + children_threads->Schedule([this, child_path, i, &children_dir_status] { + children_dir_status[i] = this->IsDirectory(child_path).ok(); + }); + } + delete children_threads; + for (int i = 0; i < children.size(); ++i) { + const string child_path = io::JoinPath(current_dir, children[i]); + // In case the child_path doesn't start with the fixed_prefix then we bail + // and don't add it to the queue / candidates. + if (!StringPiece(child_path).starts_with(fixed_prefix)) continue; // If the child is a directory add it to the queue. - if (IsDirectory(child_path).ok()) { + if (children_dir_status[i]) { dir_q.push_back(child_path); } all_files.push_back(child_path); diff --git a/tensorflow/core/platform/file_system_test.cc b/tensorflow/core/platform/file_system_test.cc index 1a37251177f..600af91206b 100644 --- a/tensorflow/core/platform/file_system_test.cc +++ b/tensorflow/core/platform/file_system_test.cc @@ -25,42 +25,125 @@ limitations under the License. namespace tensorflow { +static const char* const kPrefix = "ipfs://solarsystem"; + +// A file system that has Planets, Satellites and Sub Satellites. Sub satellites +// cannot have children further. class InterPlanetaryFileSystem : public NullFileSystem { public: - Status IsDirectory(const string& dirname) override { - if (dirname == "ipfs://solarsystem" || - dirname == "ipfs://solarsystem/Earth" || - dirname == "ipfs://solarsystem/Jupiter") { + bool FileExists(const string& fname) override { + string parsed_path; + ParsePath(fname, &parsed_path); + return BodyExists(parsed_path); + } + + // Adds the dir to the parent's children list and creates an entry for itself. + Status CreateDir(const string& dirname) override { + string parsed_path; + ParsePath(dirname, &parsed_path); + // If the directory already exists then ignore. + if (celestial_bodies_.find(parsed_path) != celestial_bodies_.end()) { return Status::OK(); } - return Status(tensorflow::error::FAILED_PRECONDITION, "Not a directory"); + std::vector split_path = str_util::Split(parsed_path, '/'); + // If the path is too long then we don't support it. + if (split_path.size() > 3) { + return Status(tensorflow::error::INVALID_ARGUMENT, "Bad dirname"); + } + if (split_path.empty()) { + return Status::OK(); + } + if (split_path.size() == 1) { + celestial_bodies_[""].insert(parsed_path); + celestial_bodies_.insert( + std::pair>(parsed_path, {})); + return Status::OK(); + } + if (split_path.size() == 2) { + if (!BodyExists(split_path[0])) { + return Status(tensorflow::error::FAILED_PRECONDITION, + "Base dir not created"); + } + celestial_bodies_[split_path[0]].insert(split_path[1]); + celestial_bodies_.insert( + std::pair>(parsed_path, {})); + return Status::OK(); + } + if (split_path.size() == 3) { + const string& parent_path = io::JoinPath(split_path[0], split_path[1]); + if (!BodyExists(parent_path)) { + return Status(tensorflow::error::FAILED_PRECONDITION, + "Base dir not created"); + } + celestial_bodies_[parent_path].insert(split_path[2]); + celestial_bodies_.insert( + std::pair>(parsed_path, {})); + return Status::OK(); + } + return Status(tensorflow::error::FAILED_PRECONDITION, "Failed to create"); + } + + Status IsDirectory(const string& dirname) override { + string parsed_path; + ParsePath(dirname, &parsed_path); + std::vector split_path = str_util::Split(parsed_path, '/'); + if (split_path.size() > 2) { + return Status(tensorflow::error::FAILED_PRECONDITION, "Not a dir"); + } + if (celestial_bodies_.find(parsed_path) != celestial_bodies_.end()) { + return Status::OK(); + } + return Status(tensorflow::error::FAILED_PRECONDITION, "Not a dir"); } Status GetChildren(const string& dir, std::vector* result) override { - std::vector celestial_bodies; - if (dir == "ipfs://solarsystem") { - celestial_bodies = {"Mercury", "Venus", "Earth", "Mars", - "Jupiter", "Saturn", "Uranus", "Neptune", - ".PlanetX", "Planet0", "Planet1"}; - - } else if (dir == "ipfs://solarsystem/Earth") { - celestial_bodies = {"Moon"}; - } else if (dir == "ipfs://solarsystem/Jupiter") { - celestial_bodies = {"Europa", "Io", "Ganymede"}; - } - result->insert(result->end(), celestial_bodies.begin(), - celestial_bodies.end()); + TF_RETURN_IF_ERROR(IsDirectory(dir)); + string parsed_path; + ParsePath(dir, &parsed_path); + result->insert(result->begin(), celestial_bodies_[parsed_path].begin(), + celestial_bodies_[parsed_path].end()); return Status::OK(); } + + private: + bool BodyExists(const string& name) { + return celestial_bodies_.find(name) != celestial_bodies_.end(); + } + + void ParsePath(const string& name, string* parsed_path) { + StringPiece scheme, host, path; + ParseURI(name, &scheme, &host, &path); + ASSERT_EQ(scheme, "ipfs"); + ASSERT_EQ(host, "solarsystem"); + path.Consume("/"); + *parsed_path = path.ToString(); + } + + std::map> celestial_bodies_ = { + std::pair>( + "", {"Mercury", "Venus", "Earth", "Mars", "Jupiter", "Saturn", + "Uranus", "Neptune"}), + std::pair>("Mercury", {}), + std::pair>("Venus", {}), + std::pair>("Earth", {"Moon"}), + std::pair>("Mars", {}), + std::pair>("Jupiter", + {"Europa", "Io", "Ganymede"}), + std::pair>("Saturn", {}), + std::pair>("Uranus", {}), + std::pair>("Neptune", {}), + std::pair>("Earth/Moon", {}), + std::pair>("Jupiter/Europa", {}), + std::pair>("Jupiter/Io", {}), + std::pair>("Jupiter/Ganymede", {})}; }; // Returns all the matched entries as a comma separated string removing the // common prefix of BaseDir(). -string Match(const string& base_dir, const string& suffix_pattern) { - InterPlanetaryFileSystem fs; +string Match(InterPlanetaryFileSystem* ipfs, const string& suffix_pattern) { std::vector results; Status s = - fs.GetMatchingPaths(io::JoinPath(base_dir, suffix_pattern), &results); + ipfs->GetMatchingPaths(io::JoinPath(kPrefix, suffix_pattern), &results); if (!s.ok()) { return s.ToString(); } else { @@ -68,7 +151,7 @@ string Match(const string& base_dir, const string& suffix_pattern) { std::sort(results.begin(), results.end()); for (const string& result : results) { StringPiece trimmed_result(result); - EXPECT_TRUE(trimmed_result.Consume(base_dir + "/")); + EXPECT_TRUE(trimmed_result.Consume(strings::StrCat(kPrefix, "/"))); trimmed_results.push_back(trimmed_result); } return str_util::Join(trimmed_results, ","); @@ -76,17 +159,76 @@ string Match(const string& base_dir, const string& suffix_pattern) { } TEST(TestFileSystem, IPFSMatch) { - // Make sure we only get the 11 planets and not all their children. - EXPECT_EQ(Match("ipfs://solarsystem", "*"), - ".PlanetX,Earth,Jupiter,Mars,Mercury,Neptune,Planet0,Planet1," - "Saturn,Uranus,Venus"); + InterPlanetaryFileSystem ipfs; + EXPECT_EQ(Match(&ipfs, "thereisnosuchfile"), ""); + EXPECT_EQ(Match(&ipfs, "*"), + "Earth,Jupiter,Mars,Mercury,Neptune,Saturn,Uranus,Venus"); // Returns Jupiter's moons. - EXPECT_EQ(Match("ipfs://solarsystem", "Jupiter/*"), + EXPECT_EQ(Match(&ipfs, "Jupiter/*"), "Jupiter/Europa,Jupiter/Ganymede,Jupiter/Io"); // Returns Jupiter's and Earth's moons. - EXPECT_EQ(Match("ipfs://solarsystem", "*/*"), + EXPECT_EQ(Match(&ipfs, "*/*"), "Earth/Moon,Jupiter/Europa,Jupiter/Ganymede,Jupiter/Io"); - EXPECT_EQ(Match("ipfs://solarsystem", "Planet[0-1]"), "Planet0,Planet1"); + TF_EXPECT_OK(ipfs.CreateDir(io::JoinPath(kPrefix, "Planet0"))); + TF_EXPECT_OK(ipfs.CreateDir(io::JoinPath(kPrefix, "Planet1"))); + EXPECT_EQ(Match(&ipfs, "Planet[0-1]"), "Planet0,Planet1"); + EXPECT_EQ(Match(&ipfs, "Planet?"), "Planet0,Planet1"); +} + +TEST(TestFileSystem, MatchSimple) { + InterPlanetaryFileSystem ipfs; + TF_EXPECT_OK(ipfs.CreateDir(io::JoinPath(kPrefix, "match-00"))); + TF_EXPECT_OK(ipfs.CreateDir(io::JoinPath(kPrefix, "match-0a"))); + TF_EXPECT_OK(ipfs.CreateDir(io::JoinPath(kPrefix, "match-01"))); + TF_EXPECT_OK(ipfs.CreateDir(io::JoinPath(kPrefix, "match-aaa"))); + + EXPECT_EQ(Match(&ipfs, "match-*"), "match-00,match-01,match-0a,match-aaa"); + EXPECT_EQ(Match(&ipfs, "match-0[0-9]"), "match-00,match-01"); + EXPECT_EQ(Match(&ipfs, "match-?[0-9]"), "match-00,match-01"); + EXPECT_EQ(Match(&ipfs, "match-?a*"), "match-0a,match-aaa"); + EXPECT_EQ(Match(&ipfs, "match-??"), "match-00,match-01,match-0a"); +} + +TEST(TestFileSystem, MatchDirectory) { + InterPlanetaryFileSystem ipfs; + TF_EXPECT_OK( + ipfs.RecursivelyCreateDir(io::JoinPath(kPrefix, "match-00/abc/x"))); + TF_EXPECT_OK( + ipfs.RecursivelyCreateDir(io::JoinPath(kPrefix, "match-0a/abc/x"))); + TF_EXPECT_OK( + ipfs.RecursivelyCreateDir(io::JoinPath(kPrefix, "match-01/abc/x"))); + TF_EXPECT_OK( + ipfs.RecursivelyCreateDir(io::JoinPath(kPrefix, "match-aaa/abc/x"))); + + EXPECT_EQ(Match(&ipfs, "match-*/abc/x"), + "match-00/abc/x,match-01/abc/x,match-0a/abc/x,match-aaa/abc/x"); + EXPECT_EQ(Match(&ipfs, "match-0[0-9]/abc/x"), + "match-00/abc/x,match-01/abc/x"); + EXPECT_EQ(Match(&ipfs, "match-?[0-9]/abc/x"), + "match-00/abc/x,match-01/abc/x"); + EXPECT_EQ(Match(&ipfs, "match-?a*/abc/x"), "match-0a/abc/x,match-aaa/abc/x"); + EXPECT_EQ(Match(&ipfs, "match-?[^a]/abc/x"), "match-00/abc/x,match-01/abc/x"); +} + +TEST(TestFileSystem, MatchMultipleWildcards) { + InterPlanetaryFileSystem ipfs; + TF_EXPECT_OK( + ipfs.RecursivelyCreateDir(io::JoinPath(kPrefix, "match-00/abc/00"))); + TF_EXPECT_OK( + ipfs.RecursivelyCreateDir(io::JoinPath(kPrefix, "match-00/abc/01"))); + TF_EXPECT_OK( + ipfs.RecursivelyCreateDir(io::JoinPath(kPrefix, "match-00/abc/09"))); + TF_EXPECT_OK( + ipfs.RecursivelyCreateDir(io::JoinPath(kPrefix, "match-01/abc/00"))); + TF_EXPECT_OK( + ipfs.RecursivelyCreateDir(io::JoinPath(kPrefix, "match-01/abc/04"))); + TF_EXPECT_OK( + ipfs.RecursivelyCreateDir(io::JoinPath(kPrefix, "match-01/abc/10"))); + TF_EXPECT_OK( + ipfs.RecursivelyCreateDir(io::JoinPath(kPrefix, "match-02/abc/00"))); + + EXPECT_EQ(Match(&ipfs, "match-0[0-1]/abc/0[0-8]"), + "match-00/abc/00,match-00/abc/01,match-01/abc/00,match-01/abc/04"); } } // namespace tensorflow diff --git a/tensorflow/core/protobuf/worker.proto b/tensorflow/core/protobuf/worker.proto index 7a50aa3e649..81ff1047e7b 100644 --- a/tensorflow/core/protobuf/worker.proto +++ b/tensorflow/core/protobuf/worker.proto @@ -22,6 +22,7 @@ option java_multiple_files = true; option java_package = "org.tensorflow.distruntime"; import "google/protobuf/any.proto"; +import "tensorflow/core/framework/cost_graph.proto"; import "tensorflow/core/framework/step_stats.proto"; import "tensorflow/core/framework/device_attributes.proto"; import "tensorflow/core/framework/graph.proto"; @@ -181,8 +182,10 @@ message RunGraphResponse { // `RunGraphRequest.recv_key`. repeated NamedTensor recv = 1; - // If the request asked for execution stats, these are returned here. + // If the request asked for execution stats or cost graph, these are returned + // here. StepStats step_stats = 2; + CostGraphDef cost_graph = 3; } //////////////////////////////////////////////////////////////////////////////// diff --git a/tensorflow/core/util/example_proto_fast_parsing.cc b/tensorflow/core/util/example_proto_fast_parsing.cc index 1a2c4aeedab..abf8d77f869 100644 --- a/tensorflow/core/util/example_proto_fast_parsing.cc +++ b/tensorflow/core/util/example_proto_fast_parsing.cc @@ -92,7 +92,8 @@ class Feature { return Status::OK(); } - bool ParseBytesList(SmallVector* bytes_list) { + template + bool ParseBytesList(Result* bytes_list) { DCHECK(bytes_list != nullptr); protobuf::io::CodedInputStream stream( reinterpret_cast(serialized_.data()), serialized_.size()); @@ -116,7 +117,8 @@ class Feature { return true; } - bool ParseFloatList(SmallVector* float_list) { + template + bool ParseFloatList(Result* float_list) { DCHECK(float_list != nullptr); protobuf::io::CodedInputStream stream( reinterpret_cast(serialized_.data()), serialized_.size()); @@ -158,7 +160,8 @@ class Feature { return true; } - bool ParseInt64List(SmallVector* int64_list) { + template + bool ParseInt64List(Result* int64_list) { DCHECK(int64_list != nullptr); protobuf::io::CodedInputStream stream( reinterpret_cast(serialized_.data()), serialized_.size()); @@ -181,7 +184,7 @@ class Feature { while (!stream.ExpectAtEnd()) { protobuf_uint64 n; // There is no API for int64 if (!stream.ReadVarint64(&n)) return false; - int64_list->push_back(n); + int64_list->push_back(static_cast(n)); } stream.PopLimit(packed_limit); @@ -190,7 +193,7 @@ class Feature { if (!stream.ExpectTag(kVarintTag(1))) return false; protobuf_uint64 n; // There is no API for int64 if (!stream.ReadVarint64(&n)) return false; - int64_list->push_back(n); + int64_list->push_back(static_cast(n)); } } } @@ -392,6 +395,28 @@ struct SeededHasher { uint64 seed{0xDECAFCAFFE}; }; +template +class LimitedArraySlice { + public: + LimitedArraySlice(T* begin, size_t num_elements) + : current_(begin), end_(begin + num_elements) {} + + // May return negative if there were push_back calls after slice was filled. + int64 EndDistance() const { return end_ - current_; } + + // Attempts to push value to the back of this. If the slice has + // already been filled, this method has no effect on the underlying data, but + // it changes the number returned by EndDistance into negative values. + void push_back(T&& value) { + if (EndDistance() > 0) *current_ = std::move(value); + ++current_; + } + + private: + T* current_; + T* end_; +}; + Status FastParseSerializedExample( const string& serialized_example, const string& example_name, const size_t example_index, const Config& config, @@ -487,37 +512,29 @@ Status FastParseSerializedExample( switch (config.dense[d].dtype) { case DT_INT64: { - SmallVector list; - list.reserve(num_elements); - if (!feature.ParseInt64List(&list)) return parse_error(); - if (list.size() != num_elements) { - return shape_error(list.size(), "int64"); - } auto out_p = out.flat().data() + offset; - std::copy_n(list.begin(), list.size(), out_p); + LimitedArraySlice slice(out_p, num_elements); + if (!feature.ParseInt64List(&slice)) return parse_error(); + if (slice.EndDistance() != 0) { + return shape_error(num_elements - slice.EndDistance(), "int64"); + } break; } case DT_FLOAT: { - SmallVector list; - list.reserve(num_elements); - if (!feature.ParseFloatList(&list)) return parse_error(); - if (list.size() != num_elements) { - return shape_error(list.size(), "float"); - } auto out_p = out.flat().data() + offset; - std::copy_n(list.begin(), list.size(), out_p); + LimitedArraySlice slice(out_p, num_elements); + if (!feature.ParseFloatList(&slice)) return parse_error(); + if (slice.EndDistance() != 0) { + return shape_error(num_elements - slice.EndDistance(), "float"); + } break; } case DT_STRING: { - SmallVector list; - list.reserve(num_elements); - if (!feature.ParseBytesList(&list)) return parse_error(); - if (list.size() != num_elements) { - return shape_error(list.size(), "bytes"); - } auto out_p = out.flat().data() + offset; - for (size_t i = 0; i < list.size(); ++i) { - out_p[i] = std::move(list[i]); + LimitedArraySlice slice(out_p, num_elements); + if (!feature.ParseBytesList(&slice)) return parse_error(); + if (slice.EndDistance() != 0) { + return shape_error(num_elements - slice.EndDistance(), "bytes"); } break; } diff --git a/tensorflow/examples/android/AndroidManifest.xml b/tensorflow/examples/android/AndroidManifest.xml index 3cb18ab73ce..0a48d3d50b7 100644 --- a/tensorflow/examples/android/AndroidManifest.xml +++ b/tensorflow/examples/android/AndroidManifest.xml @@ -33,9 +33,9 @@ android:icon="@drawable/ic_launcher" android:theme="@style/MaterialTheme"> - + android:label="@string/activity_name_classification"> diff --git a/tensorflow/examples/android/res/values/base-strings.xml b/tensorflow/examples/android/res/values/base-strings.xml index 992ba2dc987..93cfe0dac28 100644 --- a/tensorflow/examples/android/res/values/base-strings.xml +++ b/tensorflow/examples/android/res/values/base-strings.xml @@ -17,4 +17,5 @@ TensorFlow Demo + TF Classification diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java index 82c37ac757d..ede3af1467f 100644 --- a/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java +++ b/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java @@ -18,13 +18,14 @@ package org.tensorflow.demo; import android.Manifest; import android.app.Activity; +import android.app.Fragment; import android.content.pm.PackageManager; import android.os.Build; import android.os.Bundle; import android.view.WindowManager; import android.widget.Toast; -public class CameraActivity extends Activity { +public abstract class CameraActivity extends Activity { private static final int PERMISSIONS_REQUEST = 1; private static final String PERMISSION_CAMERA = Manifest.permission.CAMERA; @@ -48,7 +49,8 @@ public class CameraActivity extends Activity { } @Override - public void onRequestPermissionsResult(int requestCode, String permissions[], int[] grantResults) { + public void onRequestPermissionsResult( + final int requestCode, final String[] permissions, final int[] grantResults) { switch (requestCode) { case PERMISSIONS_REQUEST: { if (grantResults.length > 0 @@ -79,10 +81,12 @@ public class CameraActivity extends Activity { } } - private void setFragment() { + protected void setFragment() { getFragmentManager() - .beginTransaction() - .replace(R.id.container, CameraConnectionFragment.newInstance()) - .commit(); + .beginTransaction() + .replace(R.id.container, createFragment()) + .commit(); } + + protected abstract Fragment createFragment(); } diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/CameraConnectionFragment.java b/tensorflow/examples/android/src/org/tensorflow/demo/CameraConnectionFragment.java index e73278ed608..0bd963b39ef 100644 --- a/tensorflow/examples/android/src/org/tensorflow/demo/CameraConnectionFragment.java +++ b/tensorflow/examples/android/src/org/tensorflow/demo/CameraConnectionFragment.java @@ -69,7 +69,7 @@ public class CameraConnectionFragment extends Fragment { */ private static final int MINIMUM_PREVIEW_SIZE = 320; - private RecognitionScoreView scoreView; + private ResultsView resultsView; /** * Conversion from screen rotation to JPEG orientation. @@ -132,10 +132,10 @@ public class CameraConnectionFragment extends Fragment { private CameraDevice cameraDevice; /** - * The rotation in degrees of the camera sensor from the display. + * The rotation in degrees of the camera sensor from the display. */ private Integer sensorOrientation; - + /** * The {@link android.util.Size} of camera preview. */ @@ -214,6 +214,27 @@ public class CameraConnectionFragment extends Fragment { */ private final Semaphore cameraOpenCloseLock = new Semaphore(1); + /** + * A {@link Classifier} object wrapping TensorFlow to pass frames to. + */ + private final Classifier classifier; + /** + * The input size in pixels desired by TensorFlow (width and height of a square bitmap). + */ + private final int inputSize; + + /** + * The layout identifier to inflate for this Fragment. + */ + private final int layout; + + private CameraConnectionFragment( + final Classifier classifier, final int layout, final int inputSize) { + this.classifier = classifier; + this.layout = layout; + this.inputSize = inputSize; + } + /** * Shows a {@link Toast} on the UI thread. * @@ -267,20 +288,21 @@ public class CameraConnectionFragment extends Fragment { } } - public static CameraConnectionFragment newInstance() { - return new CameraConnectionFragment(); + public static CameraConnectionFragment newInstance( + final Classifier classifier, final int layout, final int inputSize) { + return new CameraConnectionFragment(classifier, layout, inputSize); } @Override public View onCreateView( final LayoutInflater inflater, final ViewGroup container, final Bundle savedInstanceState) { - return inflater.inflate(R.layout.camera_connection_fragment, container, false); + return inflater.inflate(layout, container, false); } @Override public void onViewCreated(final View view, final Bundle savedInstanceState) { textureView = (AutoFitTextureView) view.findViewById(R.id.texture); - scoreView = (RecognitionScoreView) view.findViewById(R.id.results); + resultsView = (ResultsView) view.findViewById(R.id.results); } @Override @@ -344,7 +366,7 @@ public class CameraConnectionFragment extends Fragment { new CompareSizesByArea()); sensorOrientation = characteristics.get(CameraCharacteristics.SENSOR_ORIENTATION); - + // Danger, W.R.! Attempting to use too large a preview size could exceed the camera // bus' bandwidth limitation, resulting in gorgeous previews but the storage of // garbage capture data. @@ -538,7 +560,7 @@ public class CameraConnectionFragment extends Fragment { LOGGER.i("Getting assets."); tfPreviewListener.initialize( - getActivity().getAssets(), scoreView, inferenceHandler, sensorOrientation); + classifier, resultsView, inputSize, inferenceHandler, sensorOrientation); LOGGER.i("TensorFlow initialized."); } diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/ClassifierActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/ClassifierActivity.java new file mode 100644 index 00000000000..104ffbbd088 --- /dev/null +++ b/tensorflow/examples/android/src/org/tensorflow/demo/ClassifierActivity.java @@ -0,0 +1,58 @@ +/* + * Copyright 2016 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.tensorflow.demo; + +import java.io.IOException; + +import android.app.Fragment; +import org.tensorflow.demo.env.Logger; + +public class ClassifierActivity extends CameraActivity { + private static final Logger LOGGER = new Logger(); + + // These are the settings for the original v1 Inception model. If you want to + // use a model that's been produced from the TensorFlow for Poets codelab, + // you'll need to set IMAGE_SIZE = 299, IMAGE_MEAN = 128, IMAGE_STD = 128, + // INPUT_NAME = "Mul:0", and OUTPUT_NAME = "final_result:0". + // You'll also need to update the MODEL_FILE and LABEL_FILE paths to point to + // the ones you produced. + private static final int NUM_CLASSES = 1001; + private static final int INPUT_SIZE = 224; + private static final int IMAGE_MEAN = 117; + private static final float IMAGE_STD = 1; + private static final String INPUT_NAME = "input:0"; + private static final String OUTPUT_NAME = "output:0"; + + private static final String MODEL_FILE = "file:///android_asset/tensorflow_inception_graph.pb"; + private static final String LABEL_FILE = + "file:///android_asset/imagenet_comp_graph_label_strings.txt"; + + @Override + protected Fragment createFragment() { + final TensorFlowImageClassifier classifier = new TensorFlowImageClassifier(); + try { + classifier.initializeTensorFlow( + getAssets(), MODEL_FILE, LABEL_FILE, NUM_CLASSES, INPUT_SIZE, IMAGE_MEAN, IMAGE_STD, + INPUT_NAME, OUTPUT_NAME); + } catch (final IOException e) { + LOGGER.e(e, "Exception!"); + } + + return CameraConnectionFragment.newInstance( + classifier, R.layout.camera_connection_fragment, INPUT_SIZE); + } +} diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/RecognitionScoreView.java b/tensorflow/examples/android/src/org/tensorflow/demo/RecognitionScoreView.java index c20afcc22e4..764c16433c3 100644 --- a/tensorflow/examples/android/src/org/tensorflow/demo/RecognitionScoreView.java +++ b/tensorflow/examples/android/src/org/tensorflow/demo/RecognitionScoreView.java @@ -26,7 +26,7 @@ import org.tensorflow.demo.Classifier.Recognition; import java.util.List; -public class RecognitionScoreView extends View { +public class RecognitionScoreView extends View implements ResultsView { private static final float TEXT_SIZE_DIP = 24; private List results; private final float textSizePx; @@ -46,6 +46,7 @@ public class RecognitionScoreView extends View { bgPaint.setColor(0xcc4285f4); } + @Override public void setResults(final List results) { this.results = results; postInvalidate(); diff --git a/tensorflow/tensorboard/gulp_tasks/tslint.js b/tensorflow/examples/android/src/org/tensorflow/demo/ResultsView.java similarity index 58% rename from tensorflow/tensorboard/gulp_tasks/tslint.js rename to tensorflow/examples/android/src/org/tensorflow/demo/ResultsView.java index 726001fc906..662495202b3 100644 --- a/tensorflow/tensorboard/gulp_tasks/tslint.js +++ b/tensorflow/examples/android/src/org/tensorflow/demo/ResultsView.java @@ -1,4 +1,4 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,19 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -var gulp = require('gulp'); -var tslint = require('gulp-tslint'); +package org.tensorflow.demo; -module.exports = function(strict) { - return function() { - return gulp.src([ - 'components/tf-*/**/*.ts', - 'components/vz-*/**/*.ts', - '!./components/**/deps.d.ts' - ]) - .pipe(tslint()) - .pipe(tslint.report('verbose', { - emitError: strict, - })); - }; +import org.tensorflow.demo.Classifier.Recognition; + +import java.util.List; + +public interface ResultsView { + public void setResults(final List results); } diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowImageListener.java b/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowImageListener.java index f60652ffcff..33da3d40807 100644 --- a/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowImageListener.java +++ b/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowImageListener.java @@ -15,7 +15,6 @@ limitations under the License. package org.tensorflow.demo; -import android.content.res.AssetManager; import android.graphics.Bitmap; import android.graphics.Bitmap.Config; import android.graphics.Canvas; @@ -26,13 +25,12 @@ import android.media.ImageReader; import android.media.ImageReader.OnImageAvailableListener; import android.os.Handler; import android.os.Trace; - -import java.io.IOException; -import java.util.List; import junit.framework.Assert; import org.tensorflow.demo.env.ImageUtils; import org.tensorflow.demo.env.Logger; +import java.util.List; + /** * Class that takes in preview frames and converts the image to Bitmaps to process with Tensorflow. */ @@ -41,29 +39,13 @@ public class TensorFlowImageListener implements OnImageAvailableListener { private static final boolean SAVE_PREVIEW_BITMAP = false; - // These are the settings for the original v1 Inception model. If you want to - // use a model that's been produced from the TensorFlow for Poets codelab, - // you'll need to set IMAGE_SIZE = 299, IMAGE_MEAN = 128, IMAGE_STD = 128, - // INPUT_NAME = "Mul:0", and OUTPUT_NAME = "final_result:0". - // You'll also need to update the MODEL_FILE and LABEL_FILE paths to point to - // the ones you produced. - private static final int NUM_CLASSES = 1001; - private static final int INPUT_SIZE = 224; - private static final int IMAGE_MEAN = 117; - private static final float IMAGE_STD = 1; - private static final String INPUT_NAME = "input:0"; - private static final String OUTPUT_NAME = "output:0"; - - private static final String MODEL_FILE = "file:///android_asset/tensorflow_inception_graph.pb"; - private static final String LABEL_FILE = - "file:///android_asset/imagenet_comp_graph_label_strings.txt"; - private Integer sensorOrientation; - private final TensorFlowImageClassifier tensorflow = new TensorFlowImageClassifier(); + private Classifier tensorflow; private int previewWidth = 0; private int previewHeight = 0; + private int inputSize = 0; private byte[][] yuvBytes; private int[] rgbBytes = null; private Bitmap rgbFrameBitmap = null; @@ -72,22 +54,18 @@ public class TensorFlowImageListener implements OnImageAvailableListener { private boolean computing = false; private Handler handler; - private RecognitionScoreView scoreView; + private ResultsView resultsView; public void initialize( - final AssetManager assetManager, - final RecognitionScoreView scoreView, + final Classifier tensorflow, + final ResultsView resultsView, + final int inputSize, final Handler handler, final Integer sensorOrientation) { Assert.assertNotNull(sensorOrientation); - try { - tensorflow.initializeTensorFlow( - assetManager, MODEL_FILE, LABEL_FILE, NUM_CLASSES, INPUT_SIZE, IMAGE_MEAN, IMAGE_STD, - INPUT_NAME, OUTPUT_NAME); - } catch (IOException e) { - LOGGER.e(e, "Exception!"); - } - this.scoreView = scoreView; + this.tensorflow = tensorflow; + this.resultsView = resultsView; + this.inputSize = inputSize; this.handler = handler; this.sensorOrientation = sensorOrientation; } @@ -146,7 +124,7 @@ public class TensorFlowImageListener implements OnImageAvailableListener { LOGGER.i("Initializing at size %dx%d", previewWidth, previewHeight); rgbBytes = new int[previewWidth * previewHeight]; rgbFrameBitmap = Bitmap.createBitmap(previewWidth, previewHeight, Config.ARGB_8888); - croppedBitmap = Bitmap.createBitmap(INPUT_SIZE, INPUT_SIZE, Config.ARGB_8888); + croppedBitmap = Bitmap.createBitmap(inputSize, inputSize, Config.ARGB_8888); yuvBytes = new byte[planes.length][]; for (int i = 0; i < planes.length; ++i) { @@ -201,7 +179,7 @@ public class TensorFlowImageListener implements OnImageAvailableListener { for (final Classifier.Recognition result : results) { LOGGER.v("Result: " + result.getTitle()); } - scoreView.setResults(results); + resultsView.setResults(results); computing = false; } }); diff --git a/tensorflow/examples/how_tos/reading_data/convert_to_records.py b/tensorflow/examples/how_tos/reading_data/convert_to_records.py index c3555a882d6..5457b27ecac 100644 --- a/tensorflow/examples/how_tos/reading_data/convert_to_records.py +++ b/tensorflow/examples/how_tos/reading_data/convert_to_records.py @@ -20,6 +20,7 @@ from __future__ import print_function import argparse import os +import sys import tensorflow as tf @@ -102,6 +103,5 @@ if __name__ == '__main__': set.\ """ ) - FLAGS = parser.parse_args() - - tf.app.run() + FLAGS, unparsed = parser.parse_known_args() + tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/examples/how_tos/reading_data/fully_connected_preloaded.py b/tensorflow/examples/how_tos/reading_data/fully_connected_preloaded.py index 7795248f82d..888da421bfa 100644 --- a/tensorflow/examples/how_tos/reading_data/fully_connected_preloaded.py +++ b/tensorflow/examples/how_tos/reading_data/fully_connected_preloaded.py @@ -31,6 +31,7 @@ from __future__ import division from __future__ import print_function import argparse +import sys import time import tensorflow as tf @@ -184,6 +185,5 @@ if __name__ == '__main__': help='If true, uses fake data for unit testing.', action='store_true' ) - FLAGS = parser.parse_args() - - tf.app.run() + FLAGS, unparsed = parser.parse_known_args() + tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/examples/how_tos/reading_data/fully_connected_preloaded_var.py b/tensorflow/examples/how_tos/reading_data/fully_connected_preloaded_var.py index 5325afbe60e..f19c3f38fd5 100644 --- a/tensorflow/examples/how_tos/reading_data/fully_connected_preloaded_var.py +++ b/tensorflow/examples/how_tos/reading_data/fully_connected_preloaded_var.py @@ -30,6 +30,7 @@ from __future__ import division from __future__ import print_function import argparse +import sys import time import tensorflow as tf @@ -194,6 +195,5 @@ if __name__ == '__main__': help='If true, uses fake data for unit testing.', action='store_true' ) - FLAGS = parser.parse_args() - - tf.app.run() + FLAGS, unparsed = parser.parse_known_args() + tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py b/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py index 127153a00bb..4c5dbc65c6f 100644 --- a/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py +++ b/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py @@ -29,6 +29,7 @@ from __future__ import print_function import argparse import os.path +import sys import time import tensorflow as tf @@ -224,6 +225,5 @@ if __name__ == '__main__': default='/tmp/data', help='Directory with the training data.' ) - FLAGS = parser.parse_args() - - tf.app.run() + FLAGS, unparsed = parser.parse_known_args() + tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/examples/image_retraining/retrain.py b/tensorflow/examples/image_retraining/retrain.py index 4f06cb8add1..392f0176d37 100644 --- a/tensorflow/examples/image_retraining/retrain.py +++ b/tensorflow/examples/image_retraining/retrain.py @@ -1009,6 +1009,5 @@ if __name__ == '__main__': input pixels up or down by.\ """ ) - FLAGS = parser.parse_args() - - tf.app.run() + FLAGS, unparsed = parser.parse_known_args() + tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/examples/learn/random_forest_mnist.py b/tensorflow/examples/learn/random_forest_mnist.py index c20965fff6e..a34d52275ac 100644 --- a/tensorflow/examples/learn/random_forest_mnist.py +++ b/tensorflow/examples/learn/random_forest_mnist.py @@ -18,13 +18,20 @@ from __future__ import division from __future__ import print_function import argparse +import sys import tempfile import tensorflow as tf # pylint: disable=g-backslash-continuation +from tensorflow.contrib.learn.python.learn\ + import metric_spec from tensorflow.contrib.learn.python.learn.estimators\ import random_forest +from tensorflow.contrib.tensor_forest.client\ + import eval_metrics +from tensorflow.contrib.tensor_forest.python\ + import tensor_forest from tensorflow.examples.tutorials.mnist import input_data FLAGS = None @@ -35,7 +42,12 @@ def build_estimator(model_dir): params = tf.contrib.tensor_forest.python.tensor_forest.ForestHParams( num_classes=10, num_features=784, num_trees=FLAGS.num_trees, max_nodes=FLAGS.max_nodes) - return random_forest.TensorForestEstimator(params, model_dir=model_dir) + graph_builder_class = tensor_forest.RandomForestGraphs + if FLAGS.use_training_loss: + graph_builder_class = tensor_forest.TrainingLossForest + return random_forest.TensorForestEstimator( + params, graph_builder_class=graph_builder_class, + model_dir=model_dir) def train_and_eval(): @@ -45,20 +57,25 @@ def train_and_eval(): estimator = build_estimator(model_dir) - # TensorForest's LossMonitor allows training to terminate early if the + # TensorForest's loss hook allows training to terminate early if the # forest is no longer growing. early_stopping_rounds = 100 - check_every_n_steps = 100 - monitor = random_forest.TensorForestLossMonitor(early_stopping_rounds, - check_every_n_steps) + monitor = random_forest.TensorForestLossHook(early_stopping_rounds) mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=False) estimator.fit(x=mnist.train.images, y=mnist.train.labels, batch_size=FLAGS.batch_size, monitors=[monitor]) + metric_name = 'accuracy' + metric = {metric_name: + metric_spec.MetricSpec( + eval_metrics.get_metric(metric_name), + prediction_key=eval_metrics.get_prediction_key(metric_name))} + results = estimator.evaluate(x=mnist.test.images, y=mnist.test.labels, - batch_size=FLAGS.batch_size) + batch_size=FLAGS.batch_size, + metrics=metric) for key in sorted(results): print('%s: %s' % (key, results[key])) @@ -105,6 +122,11 @@ if __name__ == '__main__': default=1000, help='Max total nodes in a single tree.' ) - FLAGS = parser.parse_args() - - tf.app.run() + parser.add_argument( + '--use_training_loss', + type=bool, + default=False, + help='If true, use training loss as termination criteria.' + ) + FLAGS, unparsed = parser.parse_known_args() + tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/examples/learn/text_classification.py b/tensorflow/examples/learn/text_classification.py index e0997cf921a..87a23831f35 100644 --- a/tensorflow/examples/learn/text_classification.py +++ b/tensorflow/examples/learn/text_classification.py @@ -18,6 +18,7 @@ from __future__ import division from __future__ import print_function import argparse +import sys import numpy as np import pandas @@ -117,6 +118,5 @@ if __name__ == '__main__': help='Test the example code with fake data.', action='store_true' ) - FLAGS = parser.parse_args() - - tf.app.run() + FLAGS, unparsed = parser.parse_known_args() + tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/examples/learn/text_classification_builtin_rnn_model.py b/tensorflow/examples/learn/text_classification_builtin_rnn_model.py index 865ce12516a..6a1c05b86b1 100644 --- a/tensorflow/examples/learn/text_classification_builtin_rnn_model.py +++ b/tensorflow/examples/learn/text_classification_builtin_rnn_model.py @@ -16,6 +16,7 @@ from __future__ import division from __future__ import print_function import argparse +import sys import numpy as np import pandas @@ -84,6 +85,5 @@ if __name__ == '__main__': help='Test the example code with fake data.', action='store_true' ) - FLAGS = parser.parse_args() - - tf.app.run() + FLAGS, unparsed = parser.parse_known_args() + tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/examples/learn/text_classification_character_cnn.py b/tensorflow/examples/learn/text_classification_character_cnn.py index dbf34f35945..e84790471b5 100644 --- a/tensorflow/examples/learn/text_classification_character_cnn.py +++ b/tensorflow/examples/learn/text_classification_character_cnn.py @@ -29,6 +29,7 @@ from __future__ import division from __future__ import print_function import argparse +import sys import numpy as np import pandas @@ -114,6 +115,5 @@ if __name__ == '__main__': help='Test the example code with fake data.', action='store_true' ) - FLAGS = parser.parse_args() - - tf.app.run() + FLAGS, unparsed = parser.parse_known_args() + tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/examples/learn/text_classification_character_rnn.py b/tensorflow/examples/learn/text_classification_character_rnn.py index 68b15505a67..e62663aa8af 100644 --- a/tensorflow/examples/learn/text_classification_character_rnn.py +++ b/tensorflow/examples/learn/text_classification_character_rnn.py @@ -29,6 +29,7 @@ from __future__ import division from __future__ import print_function import argparse +import sys import numpy as np import pandas @@ -94,6 +95,5 @@ if __name__ == '__main__': help='Test the example code with fake data.', action='store_true' ) - FLAGS = parser.parse_args() - - tf.app.run() + FLAGS, unparsed = parser.parse_known_args() + tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/examples/learn/text_classification_cnn.py b/tensorflow/examples/learn/text_classification_cnn.py index e1836720cca..f71df272ead 100644 --- a/tensorflow/examples/learn/text_classification_cnn.py +++ b/tensorflow/examples/learn/text_classification_cnn.py @@ -18,6 +18,7 @@ from __future__ import division from __future__ import print_function import argparse +import sys import numpy as np import pandas @@ -114,6 +115,5 @@ if __name__ == '__main__': help='Test the example code with fake data.', action='store_true' ) - FLAGS = parser.parse_args() - - tf.app.run() + FLAGS, unparsed = parser.parse_known_args() + tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/examples/tutorials/mnist/fully_connected_feed.py b/tensorflow/examples/tutorials/mnist/fully_connected_feed.py index 5147801f961..7e4d4081102 100644 --- a/tensorflow/examples/tutorials/mnist/fully_connected_feed.py +++ b/tensorflow/examples/tutorials/mnist/fully_connected_feed.py @@ -21,6 +21,7 @@ from __future__ import print_function # pylint: disable=missing-docstring import argparse import os.path +import sys import time from six.moves import xrange # pylint: disable=redefined-builtin @@ -271,5 +272,6 @@ if __name__ == '__main__': help='If true, uses fake data for unit testing.', action='store_true' ) - FLAGS = parser.parse_args() - tf.app.run() \ No newline at end of file + + FLAGS, unparsed = parser.parse_known_args() + tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/examples/tutorials/mnist/mnist_softmax.py b/tensorflow/examples/tutorials/mnist/mnist_softmax.py index c42d1eff15d..beb184f7755 100644 --- a/tensorflow/examples/tutorials/mnist/mnist_softmax.py +++ b/tensorflow/examples/tutorials/mnist/mnist_softmax.py @@ -23,6 +23,7 @@ from __future__ import division from __future__ import print_function import argparse +import sys # Import data from tensorflow.examples.tutorials.mnist import input_data @@ -73,5 +74,5 @@ if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--data_dir', type=str, default='/tmp/tensorflow/mnist/input_data', help='Directory for storing input data') - FLAGS = parser.parse_args() - tf.app.run() \ No newline at end of file + FLAGS, unparsed = parser.parse_known_args() + tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py b/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py index 66946f72849..fc91ac4ddd3 100644 --- a/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py +++ b/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py @@ -25,6 +25,7 @@ from __future__ import division from __future__ import print_function import argparse +import sys import tensorflow as tf @@ -200,5 +201,5 @@ if __name__ == '__main__': help='Directory for storing input data') parser.add_argument('--log_dir', type=str, default='/tmp/tensorflow/mnist/logs/mnist_with_summaries', help='Summaries log directory') - FLAGS = parser.parse_args() - tf.app.run() \ No newline at end of file + FLAGS, unparsed = parser.parse_known_args() + tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/examples/udacity/Dockerfile b/tensorflow/examples/udacity/Dockerfile index b7b094621a8..9f5ef1aca3e 100644 --- a/tensorflow/examples/udacity/Dockerfile +++ b/tensorflow/examples/udacity/Dockerfile @@ -1,5 +1,13 @@ FROM gcr.io/tensorflow/tensorflow:latest MAINTAINER Vincent Vanhoucke + +# Pillow needs libjpeg by default as of 3.0. +RUN apt-get update && apt-get install -y --no-install-recommends \ + libjpeg8-dev \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + RUN pip install scikit-learn pyreadline Pillow RUN rm -rf /notebooks/* ADD *.ipynb /notebooks/ diff --git a/tensorflow/examples/udacity/README.md b/tensorflow/examples/udacity/README.md index 1b0e5df5ee4..2814e5c62a8 100644 --- a/tensorflow/examples/udacity/README.md +++ b/tensorflow/examples/udacity/README.md @@ -6,7 +6,7 @@ Course information can be found at https://www.udacity.com/course/deep-learning- Running the Docker container from the Google Cloud repository ------------------------------------------------------------- - docker run -p 8888:8888 --name tensorflow-udacity -it b.gcr.io/tensorflow-udacity/assignments:0.5.0 + docker run -p 8888:8888 --name tensorflow-udacity -it gcr.io/tensorflow/udacity-assignments:0.6.0 Note that if you ever exit the container, you can return to it using: @@ -82,11 +82,11 @@ This will allow you to save work and have access to generated files on the host Pushing a Google Cloud release ------------------------------ - V=0.5.0 - docker tag $USER/assignments b.gcr.io/tensorflow-udacity/assignments:$V - gcloud docker push b.gcr.io/tensorflow-udacity/assignments - docker tag -f $USER/assignments b.gcr.io/tensorflow-udacity/assignments:latest - gcloud docker push b.gcr.io/tensorflow-udacity/assignments + V=0.6.0 + docker tag $USER/assignments gcr.io/tensorflow/udacity-assignments:$V + gcloud docker push gcr.io/tensorflow/udacity-assignments + docker tag -f $USER/assignments gcr.io/tensorflow/udacity-assignments:latest + gcloud docker push gcr.io/tensorflow/udacity-assignments History ------- @@ -96,3 +96,4 @@ History * 0.3.0: Use 0.7.1 release. * 0.4.0: Move notMMNIST data for Google Cloud. * 0.5.0: Actually use 0.7.1 release. +* 0.6.0: Update to TF 0.10.0, add libjpeg (for Pillow). diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.bijector.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.bijector.md index 37d95f969ed..8577cd012e6 100644 --- a/tensorflow/g3doc/api_docs/python/contrib.distributions.bijector.md +++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.bijector.md @@ -5,7 +5,7 @@ Bijector Ops. -An API for reversible (bijective) transformations of random variables. +An API for invertible, differentiable transformations of random variables. ## Background @@ -24,11 +24,13 @@ To apply a `Bijector`, use `distributions.TransformedDistribution`. ### `class tf.contrib.distributions.bijector.Bijector` {#Bijector} -Interface for transforming a `Distribution` via `TransformedDistribution`. +Interface for transforming a `Distribution` sample. -A `Bijector` implements a bijective, differentiable function by transforming -an input `Tensor`. The output `Tensor` shape is constrained by the input -`sample`, `batch`, and `event` shape. A `Bijector` is characterized by three +A `Bijector` implements a +[diffeomorphism](https://en.wikipedia.org/wiki/Diffeomorphism), i.e., a +bijective, differentiable function. A `Bijector` is used by +`TransformedDistribution` but can be generally used for transforming a +`Distribution` generated `Tensor`. A `Bijector` is characterized by three operations: 1. Forward Evaluation @@ -169,7 +171,8 @@ Tips for implementing `_inverse` and `_inverse_log_det_jacobian`: - The inverse `log o det o Jacobian` can be implemented as the negative of the forward `log o det o Jacobian`. This is useful if the `inverse` is implemented as a cache or the inverse Jacobian is computationally more - expensive. The following demonstrates the suggested implementation. + expensive (e.g., `CholeskyOuterProduct` `Bijector`). The following + demonstrates the suggested implementation. ```python def _inverse_and_log_det_jacobian(self, y): @@ -476,8 +479,8 @@ Instantiates `Chain` bijector. * `bijectors`: Python list of bijector instances. An empty list makes this bijector equivalent to the `Identity` bijector. -* `validate_args`: `Boolean` indicated whether arguments should be checked for - correctness. +* `validate_args`: `Boolean` indicating whether arguments should be checked + for correctness. * `name`: `String`, name given to ops managed by this object. Default: E.g., `Chain([Exp(), Softplus()]).name == "chain_of_exp_of_softplus"`. @@ -681,6 +684,234 @@ Returns True if Tensor arguments will be validated. +- - - + +### `class tf.contrib.distributions.bijector.CholeskyOuterProduct` {#CholeskyOuterProduct} + +Bijector which computes Y = g(X) = X X^T where X is a lower-triangular, positive-diagonal matrix. + +`event_ndims` must be 0 or 2, i.e., scalar or matrix. + +Note: the upper-triangular part of X is ignored (whether or not its zero). + +Examples: + +```python +bijector.CholeskyOuterProduct(event_ndims=2).forward(x=[[1., 0], [2, 1]]) +# Result: [[1, 1], [1, 5]], i.e., x x^T + +bijector.SoftmaxCentered(event_ndims=2).inverse(y=[[1., 1], [1, 5]]) +# Result: [[1, 0], [2, 1]], i.e., chol(y). +``` +- - - + +#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.__init__(event_ndims=2, validate_args=False, name='cholesky_outer_product')` {#CholeskyOuterProduct.__init__} + +Instantiates the `CholeskyOuterProduct` bijector. + +##### Args: + + +* `event_ndims`: `constant` `int32` scalar `Tensor` indicating the number of + dimensions associated with a particular draw from the distribution. Must + be 0 or 2. +* `validate_args`: `Boolean` indicating whether arguments should be checked + for correctness. +* `name`: `String` name given to ops managed by this object. + +##### Raises: + + +* `ValueError`: if event_ndims is neither 0 or 2. + + +- - - + +#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.dtype` {#CholeskyOuterProduct.dtype} + +dtype of `Tensor`s transformable by this distribution. + + +- - - + +#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.forward(x, name='forward', **condition_kwargs)` {#CholeskyOuterProduct.forward} + +Returns the forward `Bijector` evaluation, i.e., X = g(Y). + +##### Args: + + +* `x`: `Tensor`. The input to the "forward" evaluation. +* `name`: The name to give this op. +* `**condition_kwargs`: Named arguments forwarded to subclass implementation. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if `_forward` is not implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.forward_log_det_jacobian(x, name='forward_log_det_jacobian', **condition_kwargs)` {#CholeskyOuterProduct.forward_log_det_jacobian} + +Returns both the forward_log_det_jacobian. + +##### Args: + + +* `x`: `Tensor`. The input to the "forward" Jacobian evaluation. +* `name`: The name to give this op. +* `**condition_kwargs`: Named arguments forwarded to subclass implementation. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `y.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_forward_log_det_jacobian` + nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.inverse(y, name='inverse', **condition_kwargs)` {#CholeskyOuterProduct.inverse} + +Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y). + +##### Args: + + +* `y`: `Tensor`. The input to the "inverse" evaluation. +* `name`: The name to give this op. +* `**condition_kwargs`: Named arguments forwarded to subclass implementation. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `y.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse` nor + `_inverse_and_inverse_log_det_jacobian` are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.inverse_and_inverse_log_det_jacobian(y, name='inverse_and_inverse_log_det_jacobian', **condition_kwargs)` {#CholeskyOuterProduct.inverse_and_inverse_log_det_jacobian} + +Returns both the inverse evaluation and inverse_log_det_jacobian. + +Enables possibly more efficient calculation when both inverse and +corresponding Jacobian are needed. + +See `inverse()`, `inverse_log_det_jacobian()` for more details. + +##### Args: + + +* `y`: `Tensor`. The input to the "inverse" Jacobian evaluation. +* `name`: The name to give this op. +* `**condition_kwargs`: Named arguments forwarded to subclass implementation. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `y.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse_and_inverse_log_det_jacobian` + nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.inverse_log_det_jacobian(y, name='inverse_log_det_jacobian', **condition_kwargs)` {#CholeskyOuterProduct.inverse_log_det_jacobian} + +Returns the (log o det o Jacobian o inverse)(y). + +Mathematically, returns: `log(det(dX/dY))(Y)`. (Recall that: `X=g^{-1}(Y)`.) + +Note that `forward_log_det_jacobian` is the negative of this function. + +##### Args: + + +* `y`: `Tensor`. The input to the "inverse" Jacobian evaluation. +* `name`: The name to give this op. +* `**condition_kwargs`: Named arguments forwarded to subclass implementation. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `y.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse_log_det_jacobian` nor + `_inverse_and_inverse_log_det_jacobian` are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.is_constant_jacobian` {#CholeskyOuterProduct.is_constant_jacobian} + +Returns true iff the Jacobian is not a function of x. + +Note: Jacobian is either constant for both forward and inverse or neither. + +##### Returns: + + `Boolean`. + + +- - - + +#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.name` {#CholeskyOuterProduct.name} + +Returns the string name of this `Bijector`. + + +- - - + +#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.parameters` {#CholeskyOuterProduct.parameters} + +Returns this `Bijector`'s parameters as a name/value dictionary. + + +- - - + +#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.shaper` {#CholeskyOuterProduct.shaper} + +Returns shape object used to manage shape constraints. + + +- - - + +#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.validate_args` {#CholeskyOuterProduct.validate_args} + +Returns True if Tensor arguments will be validated. + + + - - - ### `class tf.contrib.distributions.bijector.Exp` {#Exp} @@ -714,8 +945,8 @@ Instantiates the `Exp` bijector. * `event_ndims`: Scalar `int32` `Tensor` indicating the number of dimensions associated with a particular draw from the distribution. -* `validate_args`: `Boolean` indicated whether arguments should be checked for - correctness. +* `validate_args`: `Boolean` indicating whether arguments should be checked + for correctness. * `name`: `String` name given to ops managed by this object. @@ -1130,7 +1361,7 @@ exp = Inline( inverse_fn=tf.log, inverse_log_det_jacobian_fn=( lambda y: -tf.reduce_sum(tf.log(y), reduction_indices=-1)), - name="Exp") + name="exp") ``` The above example is equivalent to the `Bijector` `Exp(event_ndims=1)`. @@ -1151,8 +1382,8 @@ Creates a `Bijector` from callables. log o det o jacobian of the forward transformation. * `is_constant_jacobian`: `Boolean` indicating that the Jacobian is constant for all input arguments. -* `validate_args`: `Boolean` indicated whether arguments should be checked for - correctness. +* `validate_args`: `Boolean` indicating whether arguments should be checked + for correctness. * `name`: `String`, name given to ops managed by this object. @@ -1378,8 +1609,8 @@ return -self.inverse_log_det_jacobian(y, **condition_kwargs) * `bijector`: Bijector instance. -* `validate_args`: `Boolean` indicated whether arguments should be checked for - correctness. +* `validate_args`: `Boolean` indicating whether arguments should be checked + for correctness. * `name`: `String`, name given to ops managed by this object. @@ -1634,8 +1865,8 @@ Instantiates the `Exp` bijector. * `scale`: `Tensor` used to scale input, i.e., `Y = g(X) = scale * X + shift`. * `event_ndims`: Scalar `int32` `Tensor` indicating the number of dimensions associated with a particular draw from the distribution. -* `validate_args`: `Boolean` indicated whether arguments should be checked for - correctness. +* `validate_args`: `Boolean` indicating whether arguments should be checked + for correctness. * `name`: `String` name given to ops managed by this object. diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md index 0011737c600..bc4a79cf85f 100644 --- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md +++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md @@ -231,6 +231,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Distribution.copy(**override_parameters_kwargs)` {#Distribution.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Distribution.dtype` {#Distribution.dtype} @@ -840,6 +863,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Binomial.copy(**override_parameters_kwargs)` {#Binomial.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Binomial.dtype` {#Binomial.dtype} @@ -1442,6 +1488,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Bernoulli.copy(**override_parameters_kwargs)` {#Bernoulli.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Bernoulli.dtype` {#Bernoulli.dtype} @@ -1987,6 +2056,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.BernoulliWithSigmoidP.copy(**override_parameters_kwargs)` {#BernoulliWithSigmoidP.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.BernoulliWithSigmoidP.dtype` {#BernoulliWithSigmoidP.dtype} @@ -2642,6 +2734,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Beta.copy(**override_parameters_kwargs)` {#Beta.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Beta.dtype` {#Beta.dtype} @@ -3206,6 +3321,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.BetaWithSoftplusAB.copy(**override_parameters_kwargs)` {#BetaWithSoftplusAB.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.BetaWithSoftplusAB.dtype` {#BetaWithSoftplusAB.dtype} @@ -3809,6 +3947,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Categorical.copy(**override_parameters_kwargs)` {#Categorical.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Categorical.dtype` {#Categorical.dtype} @@ -4388,6 +4549,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Chi2.copy(**override_parameters_kwargs)` {#Chi2.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Chi2.df` {#Chi2.df} @@ -4951,6 +5135,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Chi2WithAbsDf.copy(**override_parameters_kwargs)` {#Chi2WithAbsDf.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Chi2WithAbsDf.df` {#Chi2WithAbsDf.df} @@ -5536,6 +5743,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Exponential.copy(**override_parameters_kwargs)` {#Exponential.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Exponential.dtype` {#Exponential.dtype} @@ -6099,6 +6329,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.ExponentialWithSoftplusLam.copy(**override_parameters_kwargs)` {#ExponentialWithSoftplusLam.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.ExponentialWithSoftplusLam.dtype` {#ExponentialWithSoftplusLam.dtype} @@ -6711,6 +6964,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Gamma.copy(**override_parameters_kwargs)` {#Gamma.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Gamma.dtype` {#Gamma.dtype} @@ -7267,6 +7543,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.GammaWithSoftplusAlphaBeta.copy(**override_parameters_kwargs)` {#GammaWithSoftplusAlphaBeta.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.GammaWithSoftplusAlphaBeta.dtype` {#GammaWithSoftplusAlphaBeta.dtype} @@ -7868,6 +8167,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.InverseGamma.copy(**override_parameters_kwargs)` {#InverseGamma.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.InverseGamma.dtype` {#InverseGamma.dtype} @@ -8434,6 +8756,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.InverseGammaWithSoftplusAlphaBeta.copy(**override_parameters_kwargs)` {#InverseGammaWithSoftplusAlphaBeta.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.InverseGammaWithSoftplusAlphaBeta.dtype` {#InverseGammaWithSoftplusAlphaBeta.dtype} @@ -9019,6 +9364,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Laplace.copy(**override_parameters_kwargs)` {#Laplace.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Laplace.dtype` {#Laplace.dtype} @@ -9553,6 +9921,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.LaplaceWithSoftplusScale.copy(**override_parameters_kwargs)` {#LaplaceWithSoftplusScale.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.LaplaceWithSoftplusScale.dtype` {#LaplaceWithSoftplusScale.dtype} @@ -10151,6 +10542,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Normal.copy(**override_parameters_kwargs)` {#Normal.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Normal.dtype` {#Normal.dtype} @@ -10685,6 +11099,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.NormalWithSoftplusSigma.copy(**override_parameters_kwargs)` {#NormalWithSoftplusSigma.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.NormalWithSoftplusSigma.dtype` {#NormalWithSoftplusSigma.dtype} @@ -11243,6 +11680,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Poisson.copy(**override_parameters_kwargs)` {#Poisson.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Poisson.dtype` {#Poisson.dtype} @@ -11862,6 +12322,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.StudentT.copy(**override_parameters_kwargs)` {#StudentT.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.StudentT.df` {#StudentT.df} @@ -12419,6 +12902,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.StudentTWithAbsDfSoftplusSigma.copy(**override_parameters_kwargs)` {#StudentTWithAbsDfSoftplusSigma.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.StudentTWithAbsDfSoftplusSigma.df` {#StudentTWithAbsDfSoftplusSigma.df} @@ -13032,6 +13538,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Uniform.copy(**override_parameters_kwargs)` {#Uniform.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Uniform.dtype` {#Uniform.dtype} @@ -13633,6 +14162,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.MultivariateNormalDiag.copy(**override_parameters_kwargs)` {#MultivariateNormalDiag.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.MultivariateNormalDiag.dtype` {#MultivariateNormalDiag.dtype} @@ -14274,6 +14826,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.MultivariateNormalFull.copy(**override_parameters_kwargs)` {#MultivariateNormalFull.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.MultivariateNormalFull.dtype` {#MultivariateNormalFull.dtype} @@ -14924,6 +15499,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.MultivariateNormalCholesky.copy(**override_parameters_kwargs)` {#MultivariateNormalCholesky.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.MultivariateNormalCholesky.dtype` {#MultivariateNormalCholesky.dtype} @@ -15600,6 +16198,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.copy(**override_parameters_kwargs)` {#MultivariateNormalDiagPlusVDVT.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.dtype` {#MultivariateNormalDiagPlusVDVT.dtype} @@ -16180,6 +16801,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.MultivariateNormalDiagWithSoftplusStDev.copy(**override_parameters_kwargs)` {#MultivariateNormalDiagWithSoftplusStDev.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.MultivariateNormalDiagWithSoftplusStDev.dtype` {#MultivariateNormalDiagWithSoftplusStDev.dtype} @@ -16920,6 +17564,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Dirichlet.copy(**override_parameters_kwargs)` {#Dirichlet.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Dirichlet.dtype` {#Dirichlet.dtype} @@ -17576,6 +18243,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.DirichletMultinomial.copy(**override_parameters_kwargs)` {#DirichletMultinomial.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.DirichletMultinomial.dtype` {#DirichletMultinomial.dtype} @@ -18247,6 +18937,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Multinomial.copy(**override_parameters_kwargs)` {#Multinomial.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Multinomial.dtype` {#Multinomial.dtype} @@ -18906,6 +19619,29 @@ cdf(x) := P[X <= x] Boolean indicating if `Tensor` input/outputs are Cholesky factorized. +- - - + +#### `tf.contrib.distributions.WishartCholesky.copy(**override_parameters_kwargs)` {#WishartCholesky.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.WishartCholesky.df` {#WishartCholesky.df} @@ -19550,6 +20286,29 @@ cdf(x) := P[X <= x] Boolean indicating if `Tensor` input/outputs are Cholesky factorized. +- - - + +#### `tf.contrib.distributions.WishartFull.copy(**override_parameters_kwargs)` {#WishartFull.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.WishartFull.df` {#WishartFull.df} @@ -20227,8 +20986,8 @@ Additional documentation from `TransformedDistribution`: ##### `condition_kwargs`: -* `distribution_kwargs`: Python dictionary of arg names/values forwarded to the distribution. * `bijector_kwargs`: Python dictionary of arg names/values forwarded to the bijector. +* `distribution_kwargs`: Python dictionary of arg names/values forwarded to the distribution. ##### Args: @@ -20244,6 +21003,29 @@ Additional documentation from `TransformedDistribution`: values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.TransformedDistribution.copy(**override_parameters_kwargs)` {#TransformedDistribution.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.TransformedDistribution.distribution` {#TransformedDistribution.distribution} @@ -20345,8 +21127,8 @@ Additional documentation from `TransformedDistribution`: ##### `condition_kwargs`: -* `distribution_kwargs`: Python dictionary of arg names/values forwarded to the distribution. * `bijector_kwargs`: Python dictionary of arg names/values forwarded to the bijector. +* `distribution_kwargs`: Python dictionary of arg names/values forwarded to the distribution. ##### Args: @@ -20429,8 +21211,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`, ##### `condition_kwargs`: -* `distribution_kwargs`: Python dictionary of arg names/values forwarded to the distribution. * `bijector_kwargs`: Python dictionary of arg names/values forwarded to the bijector. +* `distribution_kwargs`: Python dictionary of arg names/values forwarded to the distribution. ##### Args: @@ -20468,8 +21250,8 @@ Additional documentation from `TransformedDistribution`: ##### `condition_kwargs`: -* `distribution_kwargs`: Python dictionary of arg names/values forwarded to the distribution. * `bijector_kwargs`: Python dictionary of arg names/values forwarded to the bijector. +* `distribution_kwargs`: Python dictionary of arg names/values forwarded to the distribution. ##### Args: @@ -20621,8 +21403,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the ##### `condition_kwargs`: -* `distribution_kwargs`: Python dictionary of arg names/values forwarded to the distribution. * `bijector_kwargs`: Python dictionary of arg names/values forwarded to the bijector. +* `distribution_kwargs`: Python dictionary of arg names/values forwarded to the distribution. ##### Args: @@ -20675,8 +21457,8 @@ Samples from the base distribution and then passes through ##### `condition_kwargs`: -* `distribution_kwargs`: Python dictionary of arg names/values forwarded to the distribution. * `bijector_kwargs`: Python dictionary of arg names/values forwarded to the bijector. +* `distribution_kwargs`: Python dictionary of arg names/values forwarded to the distribution. ##### Args: @@ -20724,8 +21506,8 @@ Additional documentation from `TransformedDistribution`: ##### `condition_kwargs`: -* `distribution_kwargs`: Python dictionary of arg names/values forwarded to the distribution. * `bijector_kwargs`: Python dictionary of arg names/values forwarded to the bijector. +* `distribution_kwargs`: Python dictionary of arg names/values forwarded to the distribution. ##### Args: @@ -20931,6 +21713,29 @@ The base distribution's `cdf` method must be defined on `y - 1`. values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.QuantizedDistribution.copy(**override_parameters_kwargs)` {#QuantizedDistribution.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.QuantizedDistribution.distribution` {#QuantizedDistribution.distribution} @@ -21612,6 +22417,29 @@ cdf(x) := P[X <= x] +- - - + +#### `tf.contrib.distributions.Mixture.copy(**override_parameters_kwargs)` {#Mixture.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Mixture.dtype` {#Mixture.dtype} @@ -22403,6 +23231,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.beta_aa.copy(**override_parameters_kwargs)` {#beta_aa.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.beta_aa.dtype` {#beta_aa.dtype} @@ -22967,6 +23818,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.beta_bb.copy(**override_parameters_kwargs)` {#beta_bb.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.beta_bb.dtype` {#beta_bb.dtype} diff --git a/tensorflow/g3doc/api_docs/python/contrib.layers.md b/tensorflow/g3doc/api_docs/python/contrib.layers.md index 72c61191485..604c215b213 100644 --- a/tensorflow/g3doc/api_docs/python/contrib.layers.md +++ b/tensorflow/g3doc/api_docs/python/contrib.layers.md @@ -977,7 +977,11 @@ Various ways of passing optimizers, include: * `gradient_multipliers`: dict of variables or variable names to floats. If present, gradients for specified variables will be multiplied by given constant. -* `clip_gradients`: float or `None`, clips gradients by this value. +* `clip_gradients`: float, callable or `None`. If float, is provided, a global + clipping is applied to prevent the norm of the gradient to exceed this + value. Alternatively, a callable can be provided e.g.: adaptive_clipping. + This callable takes a `list` of `(gradients, variables)` `tuple`s and + returns the same thing with the gradients modified. * `learning_rate_decay_fn`: function, takes `learning_rate` and `global_step` `Tensor`s, returns `Tensor`. Can be used to implement any learning rate decay @@ -1008,6 +1012,7 @@ Various ways of passing optimizers, include: * `global_step` is an invalid type or shape. * `learning_rate` is an invalid type or value. * `optimizer` is wrong type. + * `clip_gradients' is not float or callable. * `learning_rate` and `learning_rate_decay_fn` are supplied, but no `global_step` is available. diff --git a/tensorflow/g3doc/api_docs/python/contrib.metrics.md b/tensorflow/g3doc/api_docs/python/contrib.metrics.md index 2e159c475ce..326a90b2c40 100644 --- a/tensorflow/g3doc/api_docs/python/contrib.metrics.md +++ b/tensorflow/g3doc/api_docs/python/contrib.metrics.md @@ -86,11 +86,6 @@ Certain metrics, such as streaming_mean or streaming_accuracy, can be weighted via a `weights` argument. The `weights` tensor must be the same size as the labels and predictions tensors and results in a weighted average of the metric. -Other metrics, such as streaming_recall, streaming_precision, and streaming_auc, -are not well defined with regard to weighted samples. However, a binary -`ignore_mask` argument can be used to ignore certain values at graph executation -time. - ## Metric `Ops` - - - @@ -191,104 +186,100 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - - - -### `tf.contrib.metrics.streaming_recall(*args, **kwargs)` {#streaming_recall} +### `tf.contrib.metrics.streaming_recall(predictions, labels, weights=None, metrics_collections=None, updates_collections=None, name=None)` {#streaming_recall} -Computes the recall of the predictions with respect to the labels. (deprecated arguments) +Computes the recall of the predictions with respect to the labels. -SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19. -Instructions for updating: -`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`. +The `streaming_recall` function creates two local variables, `true_positives` +and `false_negatives`, that are used to compute the recall. This value is +ultimately returned as `recall`, an idempotent operation that simply divides +`true_positives` by the sum of `true_positives` and `false_negatives`. - The `streaming_recall` function creates two local variables, `true_positives` - and `false_negatives`, that are used to compute the recall. This value is - ultimately returned as `recall`, an idempotent operation that simply divides - `true_positives` by the sum of `true_positives` and `false_negatives`. +For estimation of the metric over a stream of data, the function creates an +`update_op` that updates these variables and returns the `recall`. `update_op` +weights each prediction by the corresponding value in `weights`. - For estimation of the metric over a stream of data, the function creates an - `update_op` that updates these variables and returns the `recall`. `update_op` - weights each prediction by the corresponding value in `weights`. +If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - Alternatively, if `ignore_mask` is not `None`, then mask values where - `ignore_mask` is `True`. +##### Args: - Args: - predictions: The predicted values, a `bool` `Tensor` of arbitrary shape. - labels: The ground truth values, a `bool` `Tensor` whose dimensions must - match `predictions`. - ignore_mask: An optional, `bool` `Tensor` whose shape matches `predictions`. - weights: An optional `Tensor` whose shape is broadcastable to `predictions`. - metrics_collections: An optional list of collections that `recall` should - be added to. - updates_collections: An optional list of collections that `update_op` should - be added to. - name: An optional variable_scope name. - Returns: - recall: Scalar float `Tensor` with the value of `true_positives` divided - by the sum of `true_positives` and `false_negatives`. - update_op: `Operation` that increments `true_positives` and - `false_negatives` variables appropriately and whose value matches - `recall`. +* `predictions`: The predicted values, a `bool` `Tensor` of arbitrary shape. +* `labels`: The ground truth values, a `bool` `Tensor` whose dimensions must + match `predictions`. +* `weights`: An optional `Tensor` whose shape is broadcastable to `predictions`. +* `metrics_collections`: An optional list of collections that `recall` should + be added to. +* `updates_collections`: An optional list of collections that `update_op` should + be added to. +* `name`: An optional variable_scope name. - Raises: - ValueError: If `predictions` and `labels` have mismatched shapes, or if - `ignore_mask` is not `None` and its shape doesn't match `predictions`, or - if `weights` is not `None` and its shape doesn't match `predictions`, or - if either `metrics_collections` or `updates_collections` are not a list or - tuple. +##### Returns: + + +* `recall`: Scalar float `Tensor` with the value of `true_positives` divided + by the sum of `true_positives` and `false_negatives`. +* `update_op`: `Operation` that increments `true_positives` and + `false_negatives` variables appropriately and whose value matches + `recall`. + +##### Raises: + + +* `ValueError`: If `predictions` and `labels` have mismatched shapes, or if + `weights` is not `None` and its shape doesn't match `predictions`, or if + either `metrics_collections` or `updates_collections` are not a list or + tuple. - - - -### `tf.contrib.metrics.streaming_precision(*args, **kwargs)` {#streaming_precision} +### `tf.contrib.metrics.streaming_precision(predictions, labels, weights=None, metrics_collections=None, updates_collections=None, name=None)` {#streaming_precision} -Computes the precision of the predictions with respect to the labels. (deprecated arguments) +Computes the precision of the predictions with respect to the labels. -SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19. -Instructions for updating: -`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`. +The `streaming_precision` function creates two local variables, +`true_positives` and `false_positives`, that are used to compute the +precision. This value is ultimately returned as `precision`, an idempotent +operation that simply divides `true_positives` by the sum of `true_positives` +and `false_positives`. - The `streaming_precision` function creates two local variables, - `true_positives` and `false_positives`, that are used to compute the - precision. This value is ultimately returned as `precision`, an idempotent - operation that simply divides `true_positives` by the sum of `true_positives` - and `false_positives`. +For estimation of the metric over a stream of data, the function creates an +`update_op` operation that updates these variables and returns the +`precision`. `update_op` weights each prediction by the corresponding value in +`weights`. - For estimation of the metric over a stream of data, the function creates an - `update_op` operation that updates these variables and returns the - `precision`. `update_op` weights each prediction by the corresponding value in - `weights`. +If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - Alternatively, if `ignore_mask` is not `None`, then mask values where - `ignore_mask` is `True`. +##### Args: - Args: - predictions: The predicted values, a `bool` `Tensor` of arbitrary shape. - labels: The ground truth values, a `bool` `Tensor` whose dimensions must - match `predictions`. - ignore_mask: An optional, `bool` `Tensor` whose shape matches `predictions`. - weights: An optional `Tensor` whose shape is broadcastable to `predictions`. - metrics_collections: An optional list of collections that `precision` should - be added to. - updates_collections: An optional list of collections that `update_op` should - be added to. - name: An optional variable_scope name. - Returns: - precision: Scalar float `Tensor` with the value of `true_positives` - divided by the sum of `true_positives` and `false_positives`. - update_op: `Operation` that increments `true_positives` and - `false_positives` variables appropriately and whose value matches - `precision`. +* `predictions`: The predicted values, a `bool` `Tensor` of arbitrary shape. +* `labels`: The ground truth values, a `bool` `Tensor` whose dimensions must + match `predictions`. +* `weights`: An optional `Tensor` whose shape is broadcastable to `predictions`. +* `metrics_collections`: An optional list of collections that `precision` should + be added to. +* `updates_collections`: An optional list of collections that `update_op` should + be added to. +* `name`: An optional variable_scope name. - Raises: - ValueError: If `predictions` and `labels` have mismatched shapes, or if - `ignore_mask` is not `None` and its shape doesn't match `predictions`, or - if `weights` is not `None` and its shape doesn't match `predictions`, or - if either `metrics_collections` or `updates_collections` are not a list or - tuple. +##### Returns: + + +* `precision`: Scalar float `Tensor` with the value of `true_positives` + divided by the sum of `true_positives` and `false_positives`. +* `update_op`: `Operation` that increments `true_positives` and + `false_positives` variables appropriately and whose value matches + `precision`. + +##### Raises: + + +* `ValueError`: If `predictions` and `labels` have mismatched shapes, or if + `weights` is not `None` and its shape doesn't match `predictions`, or if + either `metrics_collections` or `updates_collections` are not a list or + tuple. - - - @@ -355,16 +346,12 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. ### `tf.contrib.metrics.streaming_recall_at_k(*args, **kwargs)` {#streaming_recall_at_k} -Computes the recall@k of the predictions with respect to dense labels. (deprecated arguments) (deprecated) +Computes the recall@k of the predictions with respect to dense labels. (deprecated) THIS FUNCTION IS DEPRECATED. It will be removed after 2016-11-08. Instructions for updating: Please use `streaming_sparse_recall_at_k`, and reshape labels from [batch_size] to [batch_size, 1]. -SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19. -Instructions for updating: -`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`. - The `streaming_recall_at_k` function creates two local variables, `total` and `count`, that are used to compute the recall@k frequency. This frequency is ultimately returned as `recall_at_`: an idempotent operation that simply @@ -379,15 +366,12 @@ Instructions for updating: increments `count` with the reduced sum of `weights`. If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - Alternatively, if `ignore_mask` is not `None`, then mask values where - `ignore_mask` is `True`. Args: predictions: A floating point tensor of dimension [batch_size, num_classes] labels: A tensor of dimension [batch_size] whose type is in `int32`, `int64`. k: The number of top elements to look at for computing recall. - ignore_mask: An optional, `bool` `Tensor` whose shape matches `predictions`. weights: An optional `Tensor` whose shape is broadcastable to `predictions`. metrics_collections: An optional list of collections that `recall_at_k` should be added to. @@ -403,9 +387,8 @@ Instructions for updating: Raises: ValueError: If `predictions` and `labels` have mismatched shapes, or if - `ignore_mask` is not `None` and its shape doesn't match `predictions`, or - if `weights` is not `None` and its shape doesn't match `predictions`, or - if either `metrics_collections` or `updates_collections` are not a list or + `weights` is not `None` and its shape doesn't match `predictions`, or if + either `metrics_collections` or `updates_collections` are not a list or tuple. @@ -462,56 +445,56 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - - - -### `tf.contrib.metrics.streaming_mean_iou(*args, **kwargs)` {#streaming_mean_iou} +### `tf.contrib.metrics.streaming_mean_iou(predictions, labels, num_classes, weights=None, metrics_collections=None, updates_collections=None, name=None)` {#streaming_mean_iou} -Calculate per-step mean Intersection-Over-Union (mIOU). (deprecated arguments) +Calculate per-step mean Intersection-Over-Union (mIOU). -SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19. -Instructions for updating: -`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`. +Mean Intersection-Over-Union is a common evaluation metric for +semantic image segmentation, which first computes the IOU for each +semantic class and then computes the average over classes. - Mean Intersection-Over-Union is a common evaluation metric for - semantic image segmentation, which first computes the IOU for each - semantic class and then computes the average over classes. - IOU is defined as follows: - IOU = true_positive / (true_positive + false_positive + false_negative). - The predictions are accumulated in a confusion matrix, weighted by `weights`, - and mIOU is then calculated from it. +##### IOU is defined as follows: - For estimation of the metric over a stream of data, the function creates an - `update_op` operation that updates these variables and returns the `mean_iou`. + IOU = true_positive / (true_positive + false_positive + false_negative). +The predictions are accumulated in a confusion matrix, weighted by `weights`, +and mIOU is then calculated from it. - If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - Alternatively, if `ignore_mask` is not `None`, then mask values where - `ignore_mask` is `True`. +For estimation of the metric over a stream of data, the function creates an +`update_op` operation that updates these variables and returns the `mean_iou`. - Args: - predictions: A tensor of prediction results for semantic labels, whose - shape is [batch size] and type `int32` or `int64`. The tensor will be - flattened, if its rank > 1. - labels: A tensor of ground truth labels with shape [batch size] and of - type `int32` or `int64`. The tensor will be flattened, if its rank > 1. - num_classes: The possible number of labels the prediction task can - have. This value must be provided, since a confusion matrix of - dimension = [num_classes, num_classes] will be allocated. - ignore_mask: An optional, `bool` `Tensor` whose shape matches `predictions`. - weights: An optional `Tensor` whose shape is broadcastable to `predictions`. - metrics_collections: An optional list of collections that `mean_iou` - should be added to. - updates_collections: An optional list of collections `update_op` should be - added to. - name: An optional variable_scope name. +If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - Returns: - mean_iou: A tensor representing the mean intersection-over-union. - update_op: An operation that increments the confusion matrix. +##### Args: - Raises: - ValueError: If `predictions` and `labels` have mismatched shapes, or if - `ignore_mask` is not `None` and its shape doesn't match `predictions`, or - if `weights` is not `None` and its shape doesn't match `predictions`, or - if either `metrics_collections` or `updates_collections` are not a list or - tuple. + +* `predictions`: A tensor of prediction results for semantic labels, whose + shape is [batch size] and type `int32` or `int64`. The tensor will be + flattened, if its rank > 1. +* `labels`: A tensor of ground truth labels with shape [batch size] and of + type `int32` or `int64`. The tensor will be flattened, if its rank > 1. +* `num_classes`: The possible number of labels the prediction task can + have. This value must be provided, since a confusion matrix of + dimension = [num_classes, num_classes] will be allocated. +* `weights`: An optional `Tensor` whose shape is broadcastable to `predictions`. +* `metrics_collections`: An optional list of collections that `mean_iou` + should be added to. +* `updates_collections`: An optional list of collections `update_op` should be + added to. +* `name`: An optional variable_scope name. + +##### Returns: + + +* `mean_iou`: A tensor representing the mean intersection-over-union. +* `update_op`: An operation that increments the confusion matrix. + +##### Raises: + + +* `ValueError`: If `predictions` and `labels` have mismatched shapes, or if + `weights` is not `None` and its shape doesn't match `predictions`, or if + either `metrics_collections` or `updates_collections` are not a list or + tuple. - - - @@ -828,50 +811,48 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - - - -### `tf.contrib.metrics.streaming_percentage_less(*args, **kwargs)` {#streaming_percentage_less} +### `tf.contrib.metrics.streaming_percentage_less(values, threshold, weights=None, metrics_collections=None, updates_collections=None, name=None)` {#streaming_percentage_less} -Computes the percentage of values less than the given threshold. (deprecated arguments) +Computes the percentage of values less than the given threshold. -SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19. -Instructions for updating: -`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`. +The `streaming_percentage_less` function creates two local variables, +`total` and `count` that are used to compute the percentage of `values` that +fall below `threshold`. This rate is weighted by `weights`, and it is +ultimately returned as `percentage` which is an idempotent operation that +simply divides `total` by `count`. - The `streaming_percentage_less` function creates two local variables, - `total` and `count` that are used to compute the percentage of `values` that - fall below `threshold`. This rate is weighted by `weights`, and it is - ultimately returned as `percentage` which is an idempotent operation that - simply divides `total` by `count`. +For estimation of the metric over a stream of data, the function creates an +`update_op` operation that updates these variables and returns the +`percentage`. - For estimation of the metric over a stream of data, the function creates an - `update_op` operation that updates these variables and returns the - `percentage`. +If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - Alternatively, if `ignore_mask` is not `None`, then mask values where - `ignore_mask` is `True`. +##### Args: - Args: - values: A numeric `Tensor` of arbitrary size. - threshold: A scalar threshold. - ignore_mask: An optional, `bool` `Tensor` whose shape matches `values`. - weights: An optional `Tensor` whose shape is broadcastable to `values`. - metrics_collections: An optional list of collections that the metric - value variable should be added to. - updates_collections: An optional list of collections that the metric update - ops should be added to. - name: An optional variable_scope name. - Returns: - percentage: A tensor representing the current mean, the value of `total` - divided by `count`. - update_op: An operation that increments the `total` and `count` variables - appropriately. +* `values`: A numeric `Tensor` of arbitrary size. +* `threshold`: A scalar threshold. +* `weights`: An optional `Tensor` whose shape is broadcastable to `values`. +* `metrics_collections`: An optional list of collections that the metric + value variable should be added to. +* `updates_collections`: An optional list of collections that the metric update + ops should be added to. +* `name`: An optional variable_scope name. - Raises: - ValueError: If `ignore_mask` is not `None` and its shape doesn't match - `values`, or if `weights` is not `None` and its shape doesn't match - `values`, or if either `metrics_collections` or `updates_collections` are - not a list or tuple. +##### Returns: + + +* `percentage`: A tensor representing the current mean, the value of `total` + divided by `count`. +* `update_op`: An operation that increments the `total` and `count` variables + appropriately. + +##### Raises: + + +* `ValueError`: If `weights` is not `None` and its shape doesn't match `values`, + or if either `metrics_collections` or `updates_collections` are not a list + or tuple. - - - @@ -991,232 +972,223 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - - - -### `tf.contrib.metrics.streaming_sparse_precision_at_k(*args, **kwargs)` {#streaming_sparse_precision_at_k} +### `tf.contrib.metrics.streaming_sparse_precision_at_k(predictions, labels, k, class_id=None, weights=None, metrics_collections=None, updates_collections=None, name=None)` {#streaming_sparse_precision_at_k} -Computes precision@k of the predictions with respect to sparse labels. (deprecated arguments) +Computes precision@k of the predictions with respect to sparse labels. -SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19. -Instructions for updating: -`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`. +If `class_id` is specified, we calculate precision by considering only the + entries in the batch for which `class_id` is in the top-k highest + `predictions`, and computing the fraction of them for which `class_id` is + indeed a correct label. +If `class_id` is not specified, we'll calculate precision as how often on + average a class among the top-k classes with the highest predicted values + of a batch entry is correct and can be found in the label for that entry. - If `class_id` is specified, we calculate precision by considering only the - entries in the batch for which `class_id` is in the top-k highest - `predictions`, and computing the fraction of them for which `class_id` is - indeed a correct label. - If `class_id` is not specified, we'll calculate precision as how often on - average a class among the top-k classes with the highest predicted values - of a batch entry is correct and can be found in the label for that entry. +`streaming_sparse_precision_at_k` creates two local variables, +`true_positive_at_` and `false_positive_at_`, that are used to compute +the precision@k frequency. This frequency is ultimately returned as +`precision_at_`: an idempotent operation that simply divides +`true_positive_at_` by total (`true_positive_at_` + +`false_positive_at_`). - `streaming_sparse_precision_at_k` creates two local variables, - `true_positive_at_` and `false_positive_at_`, that are used to compute - the precision@k frequency. This frequency is ultimately returned as - `precision_at_`: an idempotent operation that simply divides - `true_positive_at_` by total (`true_positive_at_` + - `false_positive_at_`). +For estimation of the metric over a stream of data, the function creates an +`update_op` operation that updates these variables and returns the +`precision_at_`. Internally, a `top_k` operation computes a `Tensor` +indicating the top `k` `predictions`. Set operations applied to `top_k` and +`labels` calculate the true positives and false positives weighted by +`weights`. Then `update_op` increments `true_positive_at_` and +`false_positive_at_` using these values. - For estimation of the metric over a stream of data, the function creates an - `update_op` operation that updates these variables and returns the - `precision_at_`. Internally, a `top_k` operation computes a `Tensor` - indicating the top `k` `predictions`. Set operations applied to `top_k` and - `labels` calculate the true positives and false positives weighted by - `weights`. Then `update_op` increments `true_positive_at_` and - `false_positive_at_` using these values. +If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - Alternatively, if `ignore_mask` is not `None`, then mask values where - `ignore_mask` is `True`. +##### Args: - Args: - predictions: Float `Tensor` with shape [D1, ... DN, num_classes] where - N >= 1. Commonly, N=1 and predictions has shape [batch size, num_classes]. - The final dimension contains the logit values for each class. [D1, ... DN] - must match `labels`. - labels: `int64` `Tensor` or `SparseTensor` with shape - [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of - target classes for the associated prediction. Commonly, N=1 and `labels` - has shape [batch_size, num_labels]. [D1, ... DN] must match - `predictions`. Values should be in range [0, num_classes), where - num_classes is the last dimension of `predictions`. Values outside this - range are ignored. - k: Integer, k for @k metric. - class_id: Integer class ID for which we want binary metrics. This should be - in range [0, num_classes], where num_classes is the last dimension of - `predictions`. If `class_id` is outside this range, the method returns - NAN. - ignore_mask: An optional, `bool` `Tensor` whose shape is broadcastable to - the the first [D1, ... DN] dimensions of `predictions` and `labels`. - weights: An optional `Tensor` whose shape is broadcastable to the the first - [D1, ... DN] dimensions of `predictions` and `labels`. - metrics_collections: An optional list of collections that values should - be added to. - updates_collections: An optional list of collections that updates should - be added to. - name: Name of new update operation, and namespace for other dependent ops. - Returns: - precision: Scalar `float64` `Tensor` with the value of `true_positives` - divided by the sum of `true_positives` and `false_positives`. - update_op: `Operation` that increments `true_positives` and - `false_positives` variables appropriately, and whose value matches - `precision`. +* `predictions`: Float `Tensor` with shape [D1, ... DN, num_classes] where + N >= 1. Commonly, N=1 and predictions has shape [batch size, num_classes]. + The final dimension contains the logit values for each class. [D1, ... DN] + must match `labels`. +* `labels`: `int64` `Tensor` or `SparseTensor` with shape + [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of + target classes for the associated prediction. Commonly, N=1 and `labels` + has shape [batch_size, num_labels]. [D1, ... DN] must match + `predictions`. Values should be in range [0, num_classes), where + num_classes is the last dimension of `predictions`. Values outside this + range are ignored. +* `k`: Integer, k for @k metric. +* `class_id`: Integer class ID for which we want binary metrics. This should be + in range [0, num_classes], where num_classes is the last dimension of + `predictions`. If `class_id` is outside this range, the method returns + NAN. +* `weights`: An optional `Tensor` whose shape is broadcastable to the the first + [D1, ... DN] dimensions of `predictions` and `labels`. +* `metrics_collections`: An optional list of collections that values should + be added to. +* `updates_collections`: An optional list of collections that updates should + be added to. +* `name`: Name of new update operation, and namespace for other dependent ops. - Raises: - ValueError: If `ignore_mask` is not `None` and its shape doesn't match - `predictions`, or if `weights` is not `None` and its shape doesn't match - `predictions`, or if either `metrics_collections` or `updates_collections` - are not a list or tuple. +##### Returns: + + +* `precision`: Scalar `float64` `Tensor` with the value of `true_positives` + divided by the sum of `true_positives` and `false_positives`. +* `update_op`: `Operation` that increments `true_positives` and + `false_positives` variables appropriately, and whose value matches + `precision`. + +##### Raises: + + +* `ValueError`: If `weights` is not `None` and its shape doesn't match + `predictions`, or if either `metrics_collections` or `updates_collections` + are not a list or tuple. - - - -### `tf.contrib.metrics.streaming_sparse_precision_at_top_k(*args, **kwargs)` {#streaming_sparse_precision_at_top_k} +### `tf.contrib.metrics.streaming_sparse_precision_at_top_k(top_k_predictions, labels, class_id=None, weights=None, metrics_collections=None, updates_collections=None, name=None)` {#streaming_sparse_precision_at_top_k} -Computes precision@k of top-k predictions with respect to sparse labels. (deprecated arguments) +Computes precision@k of top-k predictions with respect to sparse labels. -SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19. -Instructions for updating: -`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`. +If `class_id` is specified, we calculate precision by considering only the + entries in the batch for which `class_id` is in the top-k highest + `predictions`, and computing the fraction of them for which `class_id` is + indeed a correct label. +If `class_id` is not specified, we'll calculate precision as how often on + average a class among the top-k classes with the highest predicted values + of a batch entry is correct and can be found in the label for that entry. - If `class_id` is specified, we calculate precision by considering only the - entries in the batch for which `class_id` is in the top-k highest - `predictions`, and computing the fraction of them for which `class_id` is - indeed a correct label. - If `class_id` is not specified, we'll calculate precision as how often on - average a class among the top-k classes with the highest predicted values - of a batch entry is correct and can be found in the label for that entry. +`streaming_sparse_precision_at_top_k` creates two local variables, +`true_positive_at_k` and `false_positive_at_k`, that are used to compute +the precision@k frequency. This frequency is ultimately returned as +`precision_at_k`: an idempotent operation that simply divides +`true_positive_at_k` by total (`true_positive_at_k` + `false_positive_at_k`). - `streaming_sparse_precision_at_top_k` creates two local variables, - `true_positive_at_k` and `false_positive_at_k`, that are used to compute - the precision@k frequency. This frequency is ultimately returned as - `precision_at_k`: an idempotent operation that simply divides - `true_positive_at_k` by total (`true_positive_at_k` + `false_positive_at_k`). +For estimation of the metric over a stream of data, the function creates an +`update_op` operation that updates these variables and returns the +`precision_at_k`. Internally, set operations applied to `top_k_predictions` +and `labels` calculate the true positives and false positives weighted by +`weights`. Then `update_op` increments `true_positive_at_k` and +`false_positive_at_k` using these values. - For estimation of the metric over a stream of data, the function creates an - `update_op` operation that updates these variables and returns the - `precision_at_k`. Internally, set operations applied to `top_k_predictions` - and `labels` calculate the true positives and false positives weighted by - `weights`. Then `update_op` increments `true_positive_at_k` and - `false_positive_at_k` using these values. +If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - Alternatively, if `ignore_mask` is not `None`, then mask values where - `ignore_mask` is `True`. +##### Args: - Args: - top_k_predictions: Integer `Tensor` with shape [D1, ... DN, k] where - N >= 1. Commonly, N=1 and top_k_predictions has shape [batch size, k]. - The final dimension contains the indices of top-k labels. [D1, ... DN] - must match `labels`. - labels: `int64` `Tensor` or `SparseTensor` with shape - [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of - target classes for the associated prediction. Commonly, N=1 and `labels` - has shape [batch_size, num_labels]. [D1, ... DN] must match - `top_k_predictions`. Values should be in range [0, num_classes), where - num_classes is the last dimension of `predictions`. Values outside this - range are ignored. - class_id: Integer class ID for which we want binary metrics. This should be - in range [0, num_classes), where num_classes is the last dimension of - `predictions`. If `class_id` is outside this range, the method returns - NAN. - ignore_mask: An optional, `bool` `Tensor` whose shape is broadcastable to - the the first [D1, ... DN] dimensions of `predictions` and `labels`. - weights: An optional `Tensor` whose shape is broadcastable to the the first - [D1, ... DN] dimensions of `predictions` and `labels`. - metrics_collections: An optional list of collections that values should - be added to. - updates_collections: An optional list of collections that updates should - be added to. - name: Name of new update operation, and namespace for other dependent ops. - Returns: - precision: Scalar `float64` `Tensor` with the value of `true_positives` - divided by the sum of `true_positives` and `false_positives`. - update_op: `Operation` that increments `true_positives` and - `false_positives` variables appropriately, and whose value matches - `precision`. +* `top_k_predictions`: Integer `Tensor` with shape [D1, ... DN, k] where + N >= 1. Commonly, N=1 and top_k_predictions has shape [batch size, k]. + The final dimension contains the indices of top-k labels. [D1, ... DN] + must match `labels`. +* `labels`: `int64` `Tensor` or `SparseTensor` with shape + [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of + target classes for the associated prediction. Commonly, N=1 and `labels` + has shape [batch_size, num_labels]. [D1, ... DN] must match + `top_k_predictions`. Values should be in range [0, num_classes), where + num_classes is the last dimension of `predictions`. Values outside this + range are ignored. +* `class_id`: Integer class ID for which we want binary metrics. This should be + in range [0, num_classes), where num_classes is the last dimension of + `predictions`. If `class_id` is outside this range, the method returns + NAN. +* `weights`: An optional `Tensor` whose shape is broadcastable to the the first + [D1, ... DN] dimensions of `predictions` and `labels`. +* `metrics_collections`: An optional list of collections that values should + be added to. +* `updates_collections`: An optional list of collections that updates should + be added to. +* `name`: Name of new update operation, and namespace for other dependent ops. - Raises: - ValueError: If `ignore_mask` is not `None` and its shape doesn't match - `predictions`, or if `weights` is not `None` and its shape doesn't match - `predictions`, or if either `metrics_collections` or `updates_collections` - are not a list or tuple. - ValueError: If `top_k_predictions` has rank < 2. +##### Returns: + + +* `precision`: Scalar `float64` `Tensor` with the value of `true_positives` + divided by the sum of `true_positives` and `false_positives`. +* `update_op`: `Operation` that increments `true_positives` and + `false_positives` variables appropriately, and whose value matches + `precision`. + +##### Raises: + + +* `ValueError`: If `weights` is not `None` and its shape doesn't match + `predictions`, or if either `metrics_collections` or `updates_collections` + are not a list or tuple. +* `ValueError`: If `top_k_predictions` has rank < 2. - - - -### `tf.contrib.metrics.streaming_sparse_recall_at_k(*args, **kwargs)` {#streaming_sparse_recall_at_k} +### `tf.contrib.metrics.streaming_sparse_recall_at_k(predictions, labels, k, class_id=None, weights=None, metrics_collections=None, updates_collections=None, name=None)` {#streaming_sparse_recall_at_k} -Computes recall@k of the predictions with respect to sparse labels. (deprecated arguments) +Computes recall@k of the predictions with respect to sparse labels. -SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19. -Instructions for updating: -`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`. +If `class_id` is specified, we calculate recall by considering only the + entries in the batch for which `class_id` is in the label, and computing + the fraction of them for which `class_id` is in the top-k `predictions`. +If `class_id` is not specified, we'll calculate recall as how often on + average a class among the labels of a batch entry is in the top-k + `predictions`. - If `class_id` is specified, we calculate recall by considering only the - entries in the batch for which `class_id` is in the label, and computing - the fraction of them for which `class_id` is in the top-k `predictions`. - If `class_id` is not specified, we'll calculate recall as how often on - average a class among the labels of a batch entry is in the top-k - `predictions`. +`streaming_sparse_recall_at_k` creates two local variables, +`true_positive_at_` and `false_negative_at_`, that are used to compute +the recall_at_k frequency. This frequency is ultimately returned as +`recall_at_`: an idempotent operation that simply divides +`true_positive_at_` by total (`true_positive_at_` + +`false_negative_at_`). - `streaming_sparse_recall_at_k` creates two local variables, - `true_positive_at_` and `false_negative_at_`, that are used to compute - the recall_at_k frequency. This frequency is ultimately returned as - `recall_at_`: an idempotent operation that simply divides - `true_positive_at_` by total (`true_positive_at_` + - `false_negative_at_`). +For estimation of the metric over a stream of data, the function creates an +`update_op` operation that updates these variables and returns the +`recall_at_`. Internally, a `top_k` operation computes a `Tensor` +indicating the top `k` `predictions`. Set operations applied to `top_k` and +`labels` calculate the true positives and false negatives weighted by +`weights`. Then `update_op` increments `true_positive_at_` and +`false_negative_at_` using these values. - For estimation of the metric over a stream of data, the function creates an - `update_op` operation that updates these variables and returns the - `recall_at_`. Internally, a `top_k` operation computes a `Tensor` - indicating the top `k` `predictions`. Set operations applied to `top_k` and - `labels` calculate the true positives and false negatives weighted by - `weights`. Then `update_op` increments `true_positive_at_` and - `false_negative_at_` using these values. +If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - Alternatively, if `ignore_mask` is not `None`, then mask values where - `ignore_mask` is `True`. +##### Args: - Args: - predictions: Float `Tensor` with shape [D1, ... DN, num_classes] where - N >= 1. Commonly, N=1 and predictions has shape [batch size, num_classes]. - The final dimension contains the logit values for each class. [D1, ... DN] - must match `labels`. - labels: `int64` `Tensor` or `SparseTensor` with shape - [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of - target classes for the associated prediction. Commonly, N=1 and `labels` - has shape [batch_size, num_labels]. [D1, ... DN] must match `predictions`. - Values should be in range [0, num_classes), where num_classes is the last - dimension of `predictions`. Values outside this range always count - towards `false_negative_at_`. - k: Integer, k for @k metric. - class_id: Integer class ID for which we want binary metrics. This should be - in range [0, num_classes), where num_classes is the last dimension of - `predictions`. If class_id is outside this range, the method returns NAN. - ignore_mask: An optional, `bool` `Tensor` whose shape is broadcastable to - the the first [D1, ... DN] dimensions of `predictions` and `labels`. - weights: An optional `Tensor` whose shape is broadcastable to the the first - [D1, ... DN] dimensions of `predictions` and `labels`. - metrics_collections: An optional list of collections that values should - be added to. - updates_collections: An optional list of collections that updates should - be added to. - name: Name of new update operation, and namespace for other dependent ops. - Returns: - recall: Scalar `float64` `Tensor` with the value of `true_positives` divided - by the sum of `true_positives` and `false_negatives`. - update_op: `Operation` that increments `true_positives` and - `false_negatives` variables appropriately, and whose value matches - `recall`. +* `predictions`: Float `Tensor` with shape [D1, ... DN, num_classes] where + N >= 1. Commonly, N=1 and predictions has shape [batch size, num_classes]. + The final dimension contains the logit values for each class. [D1, ... DN] + must match `labels`. +* `labels`: `int64` `Tensor` or `SparseTensor` with shape + [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of + target classes for the associated prediction. Commonly, N=1 and `labels` + has shape [batch_size, num_labels]. [D1, ... DN] must match `predictions`. + Values should be in range [0, num_classes), where num_classes is the last + dimension of `predictions`. Values outside this range always count + towards `false_negative_at_`. +* `k`: Integer, k for @k metric. +* `class_id`: Integer class ID for which we want binary metrics. This should be + in range [0, num_classes), where num_classes is the last dimension of + `predictions`. If class_id is outside this range, the method returns NAN. +* `weights`: An optional `Tensor` whose shape is broadcastable to the the first + [D1, ... DN] dimensions of `predictions` and `labels`. +* `metrics_collections`: An optional list of collections that values should + be added to. +* `updates_collections`: An optional list of collections that updates should + be added to. +* `name`: Name of new update operation, and namespace for other dependent ops. - Raises: - ValueError: If `ignore_mask` is not `None` and its shape doesn't match - `predictions`, or if `weights` is not `None` and its shape doesn't match - `predictions`, or if either `metrics_collections` or `updates_collections` - are not a list or tuple. +##### Returns: + + +* `recall`: Scalar `float64` `Tensor` with the value of `true_positives` divided + by the sum of `true_positives` and `false_negatives`. +* `update_op`: `Operation` that increments `true_positives` and + `false_negatives` variables appropriately, and whose value matches + `recall`. + +##### Raises: + + +* `ValueError`: If `weights` is not `None` and its shape doesn't match + `predictions`, or if either `metrics_collections` or `updates_collections` + are not a list or tuple. - - - diff --git a/tensorflow/g3doc/api_docs/python/contrib.rnn.md b/tensorflow/g3doc/api_docs/python/contrib.rnn.md index f0d70436a5e..1d59c1c6304 100644 --- a/tensorflow/g3doc/api_docs/python/contrib.rnn.md +++ b/tensorflow/g3doc/api_docs/python/contrib.rnn.md @@ -744,7 +744,7 @@ the shapes `[batch_size x s]` for each s in `state_size`. Basic attention cell wrapper. -Implementation based on https://arxiv.org/pdf/1601.06733.pdf. +Implementation based on https://arxiv.org/abs/1409.0473. - - - #### `tf.contrib.rnn.AttentionCellWrapper.__call__(inputs, state, scope=None)` {#AttentionCellWrapper.__call__} diff --git a/tensorflow/g3doc/api_docs/python/contrib.training.md b/tensorflow/g3doc/api_docs/python/contrib.training.md index 8b22edf7c1a..935c163e060 100644 --- a/tensorflow/g3doc/api_docs/python/contrib.training.md +++ b/tensorflow/g3doc/api_docs/python/contrib.training.md @@ -726,8 +726,9 @@ It should be run in a separate thread via e.g. a `QueueRunner`. To resample data with replacement on a per-example basis, use ['rejection_sample'](#rejection_sample) or ['resample_at_rate'](#resample_at_rate). For `rejection_sample`, provide -a boolean Tensor describing whether to accept or reject. For `resample_at_rate`, -providing the desired rate for each example. If you wish to specify relative +a boolean Tensor describing whether to accept or reject. Resulting batch sizes +are always the same. For `resample_at_rate`, provide the desired rate for each +example. Resulting batch sizes may vary. If you wish to specify relative rates, rather than absolute ones, use ['weighted_resample'](#weighted_resample) (which also returns the actual resampling rate used for each output example). diff --git a/tensorflow/g3doc/api_docs/python/functional_ops.md b/tensorflow/g3doc/api_docs/python/functional_ops.md index 338f315b553..3102cad0e55 100644 --- a/tensorflow/g3doc/api_docs/python/functional_ops.md +++ b/tensorflow/g3doc/api_docs/python/functional_ops.md @@ -41,6 +41,22 @@ Furthermore, `fn` may emit a different structure than its input. For example, the `dtype` parameter is not optional: `dtype` must be a type or (possibly nested) tuple of types matching the output of `fn`. +To apply a functional operation to the nonzero elements of a SparseTensor +one of the following methods is recommended. First, if the function is +expressible as TensorFlow ops, use + +```python + result = SparseTensor(input.indices, fn(input.values), input.shape) +``` + +If, however, the function is not expressible as a TensorFlow op, then use + +```python +result = SparseTensor(input.indices, map_fn(fn, input.values), input.shape) +``` + +instead. + ##### Args: @@ -71,7 +87,7 @@ nested) tuple of types matching the output of `fn`. * `TypeError`: if `fn` is not callable or the structure of the output of - `fn` and `dtype` do not match. + `fn` and `dtype` do not match, or if elems is a SparseTensor. * `ValueError`: if the lengths of the output of `fn` and `dtype` do not match. ##### Examples: diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Bernoulli.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Bernoulli.md index 7338070ba5e..bb563579927 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Bernoulli.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Bernoulli.md @@ -102,6 +102,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Bernoulli.copy(**override_parameters_kwargs)` {#Bernoulli.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Bernoulli.dtype` {#Bernoulli.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Chi2WithAbsDf.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Chi2WithAbsDf.md index 551713320e6..7b99144e983 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Chi2WithAbsDf.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Chi2WithAbsDf.md @@ -87,6 +87,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Chi2WithAbsDf.copy(**override_parameters_kwargs)` {#Chi2WithAbsDf.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Chi2WithAbsDf.df` {#Chi2WithAbsDf.df} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Dirichlet.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Dirichlet.md index 0bc2ed75745..92ebb7b3a79 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Dirichlet.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Dirichlet.md @@ -174,6 +174,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Dirichlet.copy(**override_parameters_kwargs)` {#Dirichlet.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Dirichlet.dtype` {#Dirichlet.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Distribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Distribution.md index edbf045d475..a85e6bed2b6 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Distribution.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Distribution.md @@ -213,6 +213,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Distribution.copy(**override_parameters_kwargs)` {#Distribution.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Distribution.dtype` {#Distribution.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.MultivariateNormalCholesky.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.MultivariateNormalCholesky.md index ded3478b77b..d4b6c1c2180 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.MultivariateNormalCholesky.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.MultivariateNormalCholesky.md @@ -143,6 +143,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.MultivariateNormalCholesky.copy(**override_parameters_kwargs)` {#MultivariateNormalCholesky.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.MultivariateNormalCholesky.dtype` {#MultivariateNormalCholesky.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.bijector.CholeskyOuterProduct.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.bijector.CholeskyOuterProduct.md new file mode 100644 index 00000000000..5805851802a --- /dev/null +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.bijector.CholeskyOuterProduct.md @@ -0,0 +1,223 @@ +Bijector which computes Y = g(X) = X X^T where X is a lower-triangular, positive-diagonal matrix. + +`event_ndims` must be 0 or 2, i.e., scalar or matrix. + +Note: the upper-triangular part of X is ignored (whether or not its zero). + +Examples: + +```python +bijector.CholeskyOuterProduct(event_ndims=2).forward(x=[[1., 0], [2, 1]]) +# Result: [[1, 1], [1, 5]], i.e., x x^T + +bijector.SoftmaxCentered(event_ndims=2).inverse(y=[[1., 1], [1, 5]]) +# Result: [[1, 0], [2, 1]], i.e., chol(y). +``` +- - - + +#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.__init__(event_ndims=2, validate_args=False, name='cholesky_outer_product')` {#CholeskyOuterProduct.__init__} + +Instantiates the `CholeskyOuterProduct` bijector. + +##### Args: + + +* `event_ndims`: `constant` `int32` scalar `Tensor` indicating the number of + dimensions associated with a particular draw from the distribution. Must + be 0 or 2. +* `validate_args`: `Boolean` indicating whether arguments should be checked + for correctness. +* `name`: `String` name given to ops managed by this object. + +##### Raises: + + +* `ValueError`: if event_ndims is neither 0 or 2. + + +- - - + +#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.dtype` {#CholeskyOuterProduct.dtype} + +dtype of `Tensor`s transformable by this distribution. + + +- - - + +#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.forward(x, name='forward', **condition_kwargs)` {#CholeskyOuterProduct.forward} + +Returns the forward `Bijector` evaluation, i.e., X = g(Y). + +##### Args: + + +* `x`: `Tensor`. The input to the "forward" evaluation. +* `name`: The name to give this op. +* `**condition_kwargs`: Named arguments forwarded to subclass implementation. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if `_forward` is not implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.forward_log_det_jacobian(x, name='forward_log_det_jacobian', **condition_kwargs)` {#CholeskyOuterProduct.forward_log_det_jacobian} + +Returns both the forward_log_det_jacobian. + +##### Args: + + +* `x`: `Tensor`. The input to the "forward" Jacobian evaluation. +* `name`: The name to give this op. +* `**condition_kwargs`: Named arguments forwarded to subclass implementation. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `y.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_forward_log_det_jacobian` + nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.inverse(y, name='inverse', **condition_kwargs)` {#CholeskyOuterProduct.inverse} + +Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y). + +##### Args: + + +* `y`: `Tensor`. The input to the "inverse" evaluation. +* `name`: The name to give this op. +* `**condition_kwargs`: Named arguments forwarded to subclass implementation. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `y.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse` nor + `_inverse_and_inverse_log_det_jacobian` are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.inverse_and_inverse_log_det_jacobian(y, name='inverse_and_inverse_log_det_jacobian', **condition_kwargs)` {#CholeskyOuterProduct.inverse_and_inverse_log_det_jacobian} + +Returns both the inverse evaluation and inverse_log_det_jacobian. + +Enables possibly more efficient calculation when both inverse and +corresponding Jacobian are needed. + +See `inverse()`, `inverse_log_det_jacobian()` for more details. + +##### Args: + + +* `y`: `Tensor`. The input to the "inverse" Jacobian evaluation. +* `name`: The name to give this op. +* `**condition_kwargs`: Named arguments forwarded to subclass implementation. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `y.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse_and_inverse_log_det_jacobian` + nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.inverse_log_det_jacobian(y, name='inverse_log_det_jacobian', **condition_kwargs)` {#CholeskyOuterProduct.inverse_log_det_jacobian} + +Returns the (log o det o Jacobian o inverse)(y). + +Mathematically, returns: `log(det(dX/dY))(Y)`. (Recall that: `X=g^{-1}(Y)`.) + +Note that `forward_log_det_jacobian` is the negative of this function. + +##### Args: + + +* `y`: `Tensor`. The input to the "inverse" Jacobian evaluation. +* `name`: The name to give this op. +* `**condition_kwargs`: Named arguments forwarded to subclass implementation. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `y.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse_log_det_jacobian` nor + `_inverse_and_inverse_log_det_jacobian` are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.is_constant_jacobian` {#CholeskyOuterProduct.is_constant_jacobian} + +Returns true iff the Jacobian is not a function of x. + +Note: Jacobian is either constant for both forward and inverse or neither. + +##### Returns: + + `Boolean`. + + +- - - + +#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.name` {#CholeskyOuterProduct.name} + +Returns the string name of this `Bijector`. + + +- - - + +#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.parameters` {#CholeskyOuterProduct.parameters} + +Returns this `Bijector`'s parameters as a name/value dictionary. + + +- - - + +#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.shaper` {#CholeskyOuterProduct.shaper} + +Returns shape object used to manage shape constraints. + + +- - - + +#### `tf.contrib.distributions.bijector.CholeskyOuterProduct.validate_args` {#CholeskyOuterProduct.validate_args} + +Returns True if Tensor arguments will be validated. + + diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.rnn.AttentionCellWrapper.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.rnn.AttentionCellWrapper.md index 3bad4deb66d..607aea1f1d6 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.rnn.AttentionCellWrapper.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.rnn.AttentionCellWrapper.md @@ -1,6 +1,6 @@ Basic attention cell wrapper. -Implementation based on https://arxiv.org/pdf/1601.06733.pdf. +Implementation based on https://arxiv.org/abs/1409.0473. - - - #### `tf.contrib.rnn.AttentionCellWrapper.__call__(inputs, state, scope=None)` {#AttentionCellWrapper.__call__} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.MultivariateNormalDiag.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.MultivariateNormalDiag.md index 5d656d040d2..739fb106fd9 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.MultivariateNormalDiag.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.MultivariateNormalDiag.md @@ -142,6 +142,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.MultivariateNormalDiag.copy(**override_parameters_kwargs)` {#MultivariateNormalDiag.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.MultivariateNormalDiag.dtype` {#MultivariateNormalDiag.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.QuantizedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.QuantizedDistribution.md index 6cae002036b..4d16d13397b 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.QuantizedDistribution.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.QuantizedDistribution.md @@ -170,6 +170,29 @@ The base distribution's `cdf` method must be defined on `y - 1`. values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.QuantizedDistribution.copy(**override_parameters_kwargs)` {#QuantizedDistribution.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.QuantizedDistribution.distribution` {#QuantizedDistribution.distribution} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.StudentT.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.StudentT.md index 59dd01bf4d5..ec6513731fb 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.StudentT.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.StudentT.md @@ -145,6 +145,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.StudentT.copy(**override_parameters_kwargs)` {#StudentT.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.StudentT.df` {#StudentT.df} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md index 2a5ff418470..4b4f4413b55 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md @@ -185,8 +185,8 @@ Additional documentation from `TransformedDistribution`: ##### `condition_kwargs`: -* `distribution_kwargs`: Python dictionary of arg names/values forwarded to the distribution. * `bijector_kwargs`: Python dictionary of arg names/values forwarded to the bijector. +* `distribution_kwargs`: Python dictionary of arg names/values forwarded to the distribution. ##### Args: @@ -202,6 +202,29 @@ Additional documentation from `TransformedDistribution`: values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.TransformedDistribution.copy(**override_parameters_kwargs)` {#TransformedDistribution.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.TransformedDistribution.distribution` {#TransformedDistribution.distribution} @@ -303,8 +326,8 @@ Additional documentation from `TransformedDistribution`: ##### `condition_kwargs`: -* `distribution_kwargs`: Python dictionary of arg names/values forwarded to the distribution. * `bijector_kwargs`: Python dictionary of arg names/values forwarded to the bijector. +* `distribution_kwargs`: Python dictionary of arg names/values forwarded to the distribution. ##### Args: @@ -387,8 +410,8 @@ Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`, ##### `condition_kwargs`: -* `distribution_kwargs`: Python dictionary of arg names/values forwarded to the distribution. * `bijector_kwargs`: Python dictionary of arg names/values forwarded to the bijector. +* `distribution_kwargs`: Python dictionary of arg names/values forwarded to the distribution. ##### Args: @@ -426,8 +449,8 @@ Additional documentation from `TransformedDistribution`: ##### `condition_kwargs`: -* `distribution_kwargs`: Python dictionary of arg names/values forwarded to the distribution. * `bijector_kwargs`: Python dictionary of arg names/values forwarded to the bijector. +* `distribution_kwargs`: Python dictionary of arg names/values forwarded to the distribution. ##### Args: @@ -579,8 +602,8 @@ Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the ##### `condition_kwargs`: -* `distribution_kwargs`: Python dictionary of arg names/values forwarded to the distribution. * `bijector_kwargs`: Python dictionary of arg names/values forwarded to the bijector. +* `distribution_kwargs`: Python dictionary of arg names/values forwarded to the distribution. ##### Args: @@ -633,8 +656,8 @@ Samples from the base distribution and then passes through ##### `condition_kwargs`: -* `distribution_kwargs`: Python dictionary of arg names/values forwarded to the distribution. * `bijector_kwargs`: Python dictionary of arg names/values forwarded to the bijector. +* `distribution_kwargs`: Python dictionary of arg names/values forwarded to the distribution. ##### Args: @@ -682,8 +705,8 @@ Additional documentation from `TransformedDistribution`: ##### `condition_kwargs`: -* `distribution_kwargs`: Python dictionary of arg names/values forwarded to the distribution. * `bijector_kwargs`: Python dictionary of arg names/values forwarded to the bijector. +* `distribution_kwargs`: Python dictionary of arg names/values forwarded to the distribution. ##### Args: diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.metrics.streaming_sparse_recall_at_k.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.metrics.streaming_sparse_recall_at_k.md index 7fd1d30790d..1a1086fac19 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.metrics.streaming_sparse_recall_at_k.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.metrics.streaming_sparse_recall_at_k.md @@ -1,73 +1,70 @@ -### `tf.contrib.metrics.streaming_sparse_recall_at_k(*args, **kwargs)` {#streaming_sparse_recall_at_k} +### `tf.contrib.metrics.streaming_sparse_recall_at_k(predictions, labels, k, class_id=None, weights=None, metrics_collections=None, updates_collections=None, name=None)` {#streaming_sparse_recall_at_k} -Computes recall@k of the predictions with respect to sparse labels. (deprecated arguments) +Computes recall@k of the predictions with respect to sparse labels. -SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19. -Instructions for updating: -`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`. +If `class_id` is specified, we calculate recall by considering only the + entries in the batch for which `class_id` is in the label, and computing + the fraction of them for which `class_id` is in the top-k `predictions`. +If `class_id` is not specified, we'll calculate recall as how often on + average a class among the labels of a batch entry is in the top-k + `predictions`. - If `class_id` is specified, we calculate recall by considering only the - entries in the batch for which `class_id` is in the label, and computing - the fraction of them for which `class_id` is in the top-k `predictions`. - If `class_id` is not specified, we'll calculate recall as how often on - average a class among the labels of a batch entry is in the top-k - `predictions`. +`streaming_sparse_recall_at_k` creates two local variables, +`true_positive_at_` and `false_negative_at_`, that are used to compute +the recall_at_k frequency. This frequency is ultimately returned as +`recall_at_`: an idempotent operation that simply divides +`true_positive_at_` by total (`true_positive_at_` + +`false_negative_at_`). - `streaming_sparse_recall_at_k` creates two local variables, - `true_positive_at_` and `false_negative_at_`, that are used to compute - the recall_at_k frequency. This frequency is ultimately returned as - `recall_at_`: an idempotent operation that simply divides - `true_positive_at_` by total (`true_positive_at_` + - `false_negative_at_`). +For estimation of the metric over a stream of data, the function creates an +`update_op` operation that updates these variables and returns the +`recall_at_`. Internally, a `top_k` operation computes a `Tensor` +indicating the top `k` `predictions`. Set operations applied to `top_k` and +`labels` calculate the true positives and false negatives weighted by +`weights`. Then `update_op` increments `true_positive_at_` and +`false_negative_at_` using these values. - For estimation of the metric over a stream of data, the function creates an - `update_op` operation that updates these variables and returns the - `recall_at_`. Internally, a `top_k` operation computes a `Tensor` - indicating the top `k` `predictions`. Set operations applied to `top_k` and - `labels` calculate the true positives and false negatives weighted by - `weights`. Then `update_op` increments `true_positive_at_` and - `false_negative_at_` using these values. +If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - Alternatively, if `ignore_mask` is not `None`, then mask values where - `ignore_mask` is `True`. +##### Args: - Args: - predictions: Float `Tensor` with shape [D1, ... DN, num_classes] where - N >= 1. Commonly, N=1 and predictions has shape [batch size, num_classes]. - The final dimension contains the logit values for each class. [D1, ... DN] - must match `labels`. - labels: `int64` `Tensor` or `SparseTensor` with shape - [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of - target classes for the associated prediction. Commonly, N=1 and `labels` - has shape [batch_size, num_labels]. [D1, ... DN] must match `predictions`. - Values should be in range [0, num_classes), where num_classes is the last - dimension of `predictions`. Values outside this range always count - towards `false_negative_at_`. - k: Integer, k for @k metric. - class_id: Integer class ID for which we want binary metrics. This should be - in range [0, num_classes), where num_classes is the last dimension of - `predictions`. If class_id is outside this range, the method returns NAN. - ignore_mask: An optional, `bool` `Tensor` whose shape is broadcastable to - the the first [D1, ... DN] dimensions of `predictions` and `labels`. - weights: An optional `Tensor` whose shape is broadcastable to the the first - [D1, ... DN] dimensions of `predictions` and `labels`. - metrics_collections: An optional list of collections that values should - be added to. - updates_collections: An optional list of collections that updates should - be added to. - name: Name of new update operation, and namespace for other dependent ops. - Returns: - recall: Scalar `float64` `Tensor` with the value of `true_positives` divided - by the sum of `true_positives` and `false_negatives`. - update_op: `Operation` that increments `true_positives` and - `false_negatives` variables appropriately, and whose value matches - `recall`. +* `predictions`: Float `Tensor` with shape [D1, ... DN, num_classes] where + N >= 1. Commonly, N=1 and predictions has shape [batch size, num_classes]. + The final dimension contains the logit values for each class. [D1, ... DN] + must match `labels`. +* `labels`: `int64` `Tensor` or `SparseTensor` with shape + [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of + target classes for the associated prediction. Commonly, N=1 and `labels` + has shape [batch_size, num_labels]. [D1, ... DN] must match `predictions`. + Values should be in range [0, num_classes), where num_classes is the last + dimension of `predictions`. Values outside this range always count + towards `false_negative_at_`. +* `k`: Integer, k for @k metric. +* `class_id`: Integer class ID for which we want binary metrics. This should be + in range [0, num_classes), where num_classes is the last dimension of + `predictions`. If class_id is outside this range, the method returns NAN. +* `weights`: An optional `Tensor` whose shape is broadcastable to the the first + [D1, ... DN] dimensions of `predictions` and `labels`. +* `metrics_collections`: An optional list of collections that values should + be added to. +* `updates_collections`: An optional list of collections that updates should + be added to. +* `name`: Name of new update operation, and namespace for other dependent ops. - Raises: - ValueError: If `ignore_mask` is not `None` and its shape doesn't match - `predictions`, or if `weights` is not `None` and its shape doesn't match - `predictions`, or if either `metrics_collections` or `updates_collections` - are not a list or tuple. +##### Returns: + + +* `recall`: Scalar `float64` `Tensor` with the value of `true_positives` divided + by the sum of `true_positives` and `false_negatives`. +* `update_op`: `Operation` that increments `true_positives` and + `false_negatives` variables appropriately, and whose value matches + `recall`. + +##### Raises: + + +* `ValueError`: If `weights` is not `None` and its shape doesn't match + `predictions`, or if either `metrics_collections` or `updates_collections` + are not a list or tuple. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.map_fn.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.map_fn.md index dd98fd9dd8a..5e49278a182 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.map_fn.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.map_fn.md @@ -23,6 +23,22 @@ Furthermore, `fn` may emit a different structure than its input. For example, the `dtype` parameter is not optional: `dtype` must be a type or (possibly nested) tuple of types matching the output of `fn`. +To apply a functional operation to the nonzero elements of a SparseTensor +one of the following methods is recommended. First, if the function is +expressible as TensorFlow ops, use + +```python + result = SparseTensor(input.indices, fn(input.values), input.shape) +``` + +If, however, the function is not expressible as a TensorFlow op, then use + +```python +result = SparseTensor(input.indices, map_fn(fn, input.values), input.shape) +``` + +instead. + ##### Args: @@ -53,7 +69,7 @@ nested) tuple of types matching the output of `fn`. * `TypeError`: if `fn` is not callable or the structure of the output of - `fn` and `dtype` do not match. + `fn` and `dtype` do not match, or if elems is a SparseTensor. * `ValueError`: if the lengths of the output of `fn` and `dtype` do not match. ##### Examples: diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Categorical.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Categorical.md index 87b72a52cdb..db1f68f83a9 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Categorical.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Categorical.md @@ -133,6 +133,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Categorical.copy(**override_parameters_kwargs)` {#Categorical.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Categorical.dtype` {#Categorical.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Chi2.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Chi2.md index c0268e6b012..8ed0532a845 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Chi2.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Chi2.md @@ -109,6 +109,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Chi2.copy(**override_parameters_kwargs)` {#Chi2.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Chi2.df` {#Chi2.df} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Uniform.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Uniform.md index a294d0b9c4f..0b4357976a6 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Uniform.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Uniform.md @@ -129,6 +129,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Uniform.copy(**override_parameters_kwargs)` {#Uniform.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Uniform.dtype` {#Uniform.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.WishartCholesky.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.WishartCholesky.md index 8aa83efb7b1..142c2b2c70c 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.WishartCholesky.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.WishartCholesky.md @@ -159,6 +159,29 @@ cdf(x) := P[X <= x] Boolean indicating if `Tensor` input/outputs are Cholesky factorized. +- - - + +#### `tf.contrib.distributions.WishartCholesky.copy(**override_parameters_kwargs)` {#WishartCholesky.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.WishartCholesky.df` {#WishartCholesky.df} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.bijector.Bijector.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.bijector.Bijector.md index d994a57f457..b1f349e7592 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.bijector.Bijector.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.bijector.Bijector.md @@ -1,8 +1,10 @@ -Interface for transforming a `Distribution` via `TransformedDistribution`. +Interface for transforming a `Distribution` sample. -A `Bijector` implements a bijective, differentiable function by transforming -an input `Tensor`. The output `Tensor` shape is constrained by the input -`sample`, `batch`, and `event` shape. A `Bijector` is characterized by three +A `Bijector` implements a +[diffeomorphism](https://en.wikipedia.org/wiki/Diffeomorphism), i.e., a +bijective, differentiable function. A `Bijector` is used by +`TransformedDistribution` but can be generally used for transforming a +`Distribution` generated `Tensor`. A `Bijector` is characterized by three operations: 1. Forward Evaluation @@ -143,7 +145,8 @@ Tips for implementing `_inverse` and `_inverse_log_det_jacobian`: - The inverse `log o det o Jacobian` can be implemented as the negative of the forward `log o det o Jacobian`. This is useful if the `inverse` is implemented as a cache or the inverse Jacobian is computationally more - expensive. The following demonstrates the suggested implementation. + expensive (e.g., `CholeskyOuterProduct` `Bijector`). The following + demonstrates the suggested implementation. ```python def _inverse_and_log_det_jacobian(self, y): diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.layers.optimize_loss.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.layers.optimize_loss.md index dbd0d465729..fc460e7cacc 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.layers.optimize_loss.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.layers.optimize_loss.md @@ -42,7 +42,11 @@ Various ways of passing optimizers, include: * `gradient_multipliers`: dict of variables or variable names to floats. If present, gradients for specified variables will be multiplied by given constant. -* `clip_gradients`: float or `None`, clips gradients by this value. +* `clip_gradients`: float, callable or `None`. If float, is provided, a global + clipping is applied to prevent the norm of the gradient to exceed this + value. Alternatively, a callable can be provided e.g.: adaptive_clipping. + This callable takes a `list` of `(gradients, variables)` `tuple`s and + returns the same thing with the gradients modified. * `learning_rate_decay_fn`: function, takes `learning_rate` and `global_step` `Tensor`s, returns `Tensor`. Can be used to implement any learning rate decay @@ -73,6 +77,7 @@ Various ways of passing optimizers, include: * `global_step` is an invalid type or shape. * `learning_rate` is an invalid type or value. * `optimizer` is wrong type. + * `clip_gradients' is not float or callable. * `learning_rate` and `learning_rate_decay_fn` are supplied, but no `global_step` is available. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.metrics.streaming_sparse_precision_at_k.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.metrics.streaming_sparse_precision_at_k.md index c2c025724dd..bb10bc85947 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.metrics.streaming_sparse_precision_at_k.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.metrics.streaming_sparse_precision_at_k.md @@ -1,75 +1,72 @@ -### `tf.contrib.metrics.streaming_sparse_precision_at_k(*args, **kwargs)` {#streaming_sparse_precision_at_k} +### `tf.contrib.metrics.streaming_sparse_precision_at_k(predictions, labels, k, class_id=None, weights=None, metrics_collections=None, updates_collections=None, name=None)` {#streaming_sparse_precision_at_k} -Computes precision@k of the predictions with respect to sparse labels. (deprecated arguments) +Computes precision@k of the predictions with respect to sparse labels. -SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19. -Instructions for updating: -`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`. +If `class_id` is specified, we calculate precision by considering only the + entries in the batch for which `class_id` is in the top-k highest + `predictions`, and computing the fraction of them for which `class_id` is + indeed a correct label. +If `class_id` is not specified, we'll calculate precision as how often on + average a class among the top-k classes with the highest predicted values + of a batch entry is correct and can be found in the label for that entry. - If `class_id` is specified, we calculate precision by considering only the - entries in the batch for which `class_id` is in the top-k highest - `predictions`, and computing the fraction of them for which `class_id` is - indeed a correct label. - If `class_id` is not specified, we'll calculate precision as how often on - average a class among the top-k classes with the highest predicted values - of a batch entry is correct and can be found in the label for that entry. +`streaming_sparse_precision_at_k` creates two local variables, +`true_positive_at_` and `false_positive_at_`, that are used to compute +the precision@k frequency. This frequency is ultimately returned as +`precision_at_`: an idempotent operation that simply divides +`true_positive_at_` by total (`true_positive_at_` + +`false_positive_at_`). - `streaming_sparse_precision_at_k` creates two local variables, - `true_positive_at_` and `false_positive_at_`, that are used to compute - the precision@k frequency. This frequency is ultimately returned as - `precision_at_`: an idempotent operation that simply divides - `true_positive_at_` by total (`true_positive_at_` + - `false_positive_at_`). +For estimation of the metric over a stream of data, the function creates an +`update_op` operation that updates these variables and returns the +`precision_at_`. Internally, a `top_k` operation computes a `Tensor` +indicating the top `k` `predictions`. Set operations applied to `top_k` and +`labels` calculate the true positives and false positives weighted by +`weights`. Then `update_op` increments `true_positive_at_` and +`false_positive_at_` using these values. - For estimation of the metric over a stream of data, the function creates an - `update_op` operation that updates these variables and returns the - `precision_at_`. Internally, a `top_k` operation computes a `Tensor` - indicating the top `k` `predictions`. Set operations applied to `top_k` and - `labels` calculate the true positives and false positives weighted by - `weights`. Then `update_op` increments `true_positive_at_` and - `false_positive_at_` using these values. +If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - Alternatively, if `ignore_mask` is not `None`, then mask values where - `ignore_mask` is `True`. +##### Args: - Args: - predictions: Float `Tensor` with shape [D1, ... DN, num_classes] where - N >= 1. Commonly, N=1 and predictions has shape [batch size, num_classes]. - The final dimension contains the logit values for each class. [D1, ... DN] - must match `labels`. - labels: `int64` `Tensor` or `SparseTensor` with shape - [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of - target classes for the associated prediction. Commonly, N=1 and `labels` - has shape [batch_size, num_labels]. [D1, ... DN] must match - `predictions`. Values should be in range [0, num_classes), where - num_classes is the last dimension of `predictions`. Values outside this - range are ignored. - k: Integer, k for @k metric. - class_id: Integer class ID for which we want binary metrics. This should be - in range [0, num_classes], where num_classes is the last dimension of - `predictions`. If `class_id` is outside this range, the method returns - NAN. - ignore_mask: An optional, `bool` `Tensor` whose shape is broadcastable to - the the first [D1, ... DN] dimensions of `predictions` and `labels`. - weights: An optional `Tensor` whose shape is broadcastable to the the first - [D1, ... DN] dimensions of `predictions` and `labels`. - metrics_collections: An optional list of collections that values should - be added to. - updates_collections: An optional list of collections that updates should - be added to. - name: Name of new update operation, and namespace for other dependent ops. - Returns: - precision: Scalar `float64` `Tensor` with the value of `true_positives` - divided by the sum of `true_positives` and `false_positives`. - update_op: `Operation` that increments `true_positives` and - `false_positives` variables appropriately, and whose value matches - `precision`. +* `predictions`: Float `Tensor` with shape [D1, ... DN, num_classes] where + N >= 1. Commonly, N=1 and predictions has shape [batch size, num_classes]. + The final dimension contains the logit values for each class. [D1, ... DN] + must match `labels`. +* `labels`: `int64` `Tensor` or `SparseTensor` with shape + [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of + target classes for the associated prediction. Commonly, N=1 and `labels` + has shape [batch_size, num_labels]. [D1, ... DN] must match + `predictions`. Values should be in range [0, num_classes), where + num_classes is the last dimension of `predictions`. Values outside this + range are ignored. +* `k`: Integer, k for @k metric. +* `class_id`: Integer class ID for which we want binary metrics. This should be + in range [0, num_classes], where num_classes is the last dimension of + `predictions`. If `class_id` is outside this range, the method returns + NAN. +* `weights`: An optional `Tensor` whose shape is broadcastable to the the first + [D1, ... DN] dimensions of `predictions` and `labels`. +* `metrics_collections`: An optional list of collections that values should + be added to. +* `updates_collections`: An optional list of collections that updates should + be added to. +* `name`: Name of new update operation, and namespace for other dependent ops. - Raises: - ValueError: If `ignore_mask` is not `None` and its shape doesn't match - `predictions`, or if `weights` is not `None` and its shape doesn't match - `predictions`, or if either `metrics_collections` or `updates_collections` - are not a list or tuple. +##### Returns: + + +* `precision`: Scalar `float64` `Tensor` with the value of `true_positives` + divided by the sum of `true_positives` and `false_positives`. +* `update_op`: `Operation` that increments `true_positives` and + `false_positives` variables appropriately, and whose value matches + `precision`. + +##### Raises: + + +* `ValueError`: If `weights` is not `None` and its shape doesn't match + `predictions`, or if either `metrics_collections` or `updates_collections` + are not a list or tuple. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.image.per_image_whitening.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.image.per_image_whitening.md index 13797eeab84..dfad97e766e 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.image.per_image_whitening.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.image.per_image_whitening.md @@ -1,30 +1,4 @@ ### `tf.image.per_image_whitening(image)` {#per_image_whitening} -Linearly scales `image` to have zero mean and unit norm. - -This op computes `(x - mean) / adjusted_stddev`, where `mean` is the average -of all values in image, and -`adjusted_stddev = max(stddev, 1.0/sqrt(image.NumElements()))`. - -`stddev` is the standard deviation of all values in `image`. It is capped -away from zero to protect against division by 0 when handling uniform images. - -Note that this implementation is limited: - -* It only whitens based on the statistics of an individual image. -* It does not take into account the covariance structure. - -##### Args: -* `image`: 3-D tensor of shape `[height, width, channels]`. - -##### Returns: - - The whitened image with same shape as `image`. - -##### Raises: - - -* `ValueError`: if the shape of 'image' is incompatible with this function. - diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.train.natural_exp_decay.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.train.natural_exp_decay.md new file mode 100644 index 00000000000..5fbff8f9d4e --- /dev/null +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.train.natural_exp_decay.md @@ -0,0 +1,56 @@ +### `tf.train.natural_exp_decay(learning_rate, global_step, decay_steps, decay_rate, staircase=False, name=None)` {#natural_exp_decay} + +Applies natural exponential decay to the initial learning rate. + +When training a model, it is often recommended to lower the learning rate as +the training progresses. This function applies an exponential decay function +to a provided initial learning rate. It requires an `global_step` value to +compute the decayed learning rate. You can just pass a TensorFlow variable +that you increment at each training step. + +The function returns the decayed learning rate. It is computed as: + +```python +decayed_learning_rate = learning_rate * exp(-decay_rate * global_step) +``` + +Example: decay exponentially with a base of 0.96: + +```python +... +global_step = tf.Variable(0, trainable=False) +learning_rate = 0.1 +k = 0.5 +learning_rate = tf.train.exponential_time_decay(learning_rate, global_step, k) + +# Passing global_step to minimize() will increment it at each step. +learning_step = ( + tf.train.GradientDescentOptimizer(learning_rate) + .minimize(...my loss..., global_step=global_step) +) +``` + +##### Args: + + +* `learning_rate`: A scalar `float32` or `float64` `Tensor` or a + Python number. The initial learning rate. +* `global_step`: A Python number. + Global step to use for the decay computation. Must not be negative. +* `decay_steps`: How often to apply decay. +* `decay_rate`: A Python number. The decay rate. +* `staircase`: Whether to apply decay in a discrete staircase, as opposed to + continuous, fashion. +* `name`: String. Optional name of the operation. Defaults to + 'ExponentialTimeDecay'. + +##### Returns: + + A scalar `Tensor` of the same type as `learning_rate`. The decayed + learning rate. + +##### Raises: + + +* `ValueError`: if `global_step` is not supplied. + diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.BetaWithSoftplusAB.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.BetaWithSoftplusAB.md index 50ce4a3e6ee..a23bf3b5c53 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.BetaWithSoftplusAB.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.BetaWithSoftplusAB.md @@ -94,6 +94,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.BetaWithSoftplusAB.copy(**override_parameters_kwargs)` {#BetaWithSoftplusAB.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.BetaWithSoftplusAB.dtype` {#BetaWithSoftplusAB.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Binomial.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Binomial.md index 36989a55033..19e3a20bc8f 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Binomial.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Binomial.md @@ -159,6 +159,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Binomial.copy(**override_parameters_kwargs)` {#Binomial.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Binomial.dtype` {#Binomial.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.DirichletMultinomial.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.DirichletMultinomial.md index 6dcf35cd20b..76b7093595b 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.DirichletMultinomial.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.DirichletMultinomial.md @@ -186,6 +186,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.DirichletMultinomial.copy(**override_parameters_kwargs)` {#DirichletMultinomial.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.DirichletMultinomial.dtype` {#DirichletMultinomial.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Exponential.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Exponential.md index c1c2fde90d8..fad44a07215 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Exponential.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Exponential.md @@ -109,6 +109,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Exponential.copy(**override_parameters_kwargs)` {#Exponential.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Exponential.dtype` {#Exponential.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Gamma.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Gamma.md index 82f66d080e8..d990fcff3b2 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Gamma.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Gamma.md @@ -136,6 +136,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Gamma.copy(**override_parameters_kwargs)` {#Gamma.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Gamma.dtype` {#Gamma.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.GammaWithSoftplusAlphaBeta.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.GammaWithSoftplusAlphaBeta.md index 5c9ca305fb1..dfe8d1fb547 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.GammaWithSoftplusAlphaBeta.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.GammaWithSoftplusAlphaBeta.md @@ -87,6 +87,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.GammaWithSoftplusAlphaBeta.copy(**override_parameters_kwargs)` {#GammaWithSoftplusAlphaBeta.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.GammaWithSoftplusAlphaBeta.dtype` {#GammaWithSoftplusAlphaBeta.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.InverseGamma.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.InverseGamma.md index 077e2b5e2bc..01e3c77478e 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.InverseGamma.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.InverseGamma.md @@ -132,6 +132,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.InverseGamma.copy(**override_parameters_kwargs)` {#InverseGamma.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.InverseGamma.dtype` {#InverseGamma.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.InverseGammaWithSoftplusAlphaBeta.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.InverseGammaWithSoftplusAlphaBeta.md index 430b0243e79..e960ace66d7 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.InverseGammaWithSoftplusAlphaBeta.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.InverseGammaWithSoftplusAlphaBeta.md @@ -87,6 +87,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.InverseGammaWithSoftplusAlphaBeta.copy(**override_parameters_kwargs)` {#InverseGammaWithSoftplusAlphaBeta.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.InverseGammaWithSoftplusAlphaBeta.dtype` {#InverseGammaWithSoftplusAlphaBeta.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Multinomial.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Multinomial.md index 95fce3d5240..811f913be7a 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Multinomial.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Multinomial.md @@ -169,6 +169,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Multinomial.copy(**override_parameters_kwargs)` {#Multinomial.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Multinomial.dtype` {#Multinomial.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.md index c1774a5a63a..9aa1a69a73a 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.md @@ -169,6 +169,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.copy(**override_parameters_kwargs)` {#MultivariateNormalDiagPlusVDVT.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.MultivariateNormalDiagPlusVDVT.dtype` {#MultivariateNormalDiagPlusVDVT.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.bijector.ScaleAndShift.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.bijector.ScaleAndShift.md index d8cd7de27c6..4c65892d755 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.bijector.ScaleAndShift.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.bijector.ScaleAndShift.md @@ -51,8 +51,8 @@ Instantiates the `Exp` bijector. * `scale`: `Tensor` used to scale input, i.e., `Y = g(X) = scale * X + shift`. * `event_ndims`: Scalar `int32` `Tensor` indicating the number of dimensions associated with a particular draw from the distribution. -* `validate_args`: `Boolean` indicated whether arguments should be checked for - correctness. +* `validate_args`: `Boolean` indicating whether arguments should be checked + for correctness. * `name`: `String` name given to ops managed by this object. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.BernoulliWithSigmoidP.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.BernoulliWithSigmoidP.md index 83dc4f9c7e0..e9a7b10c687 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.BernoulliWithSigmoidP.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.BernoulliWithSigmoidP.md @@ -73,6 +73,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.BernoulliWithSigmoidP.copy(**override_parameters_kwargs)` {#BernoulliWithSigmoidP.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.BernoulliWithSigmoidP.dtype` {#BernoulliWithSigmoidP.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.bijector.Invert.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.bijector.Invert.md index 80ba0266a88..41ced3f4755 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.bijector.Invert.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.bijector.Invert.md @@ -29,8 +29,8 @@ return -self.inverse_log_det_jacobian(y, **condition_kwargs) * `bijector`: Bijector instance. -* `validate_args`: `Boolean` indicated whether arguments should be checked for - correctness. +* `validate_args`: `Boolean` indicating whether arguments should be checked + for correctness. * `name`: `String`, name given to ops managed by this object. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_recall_at_k.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_recall_at_k.md index 24e2d3d8b5a..9ae2059a5dc 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_recall_at_k.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_recall_at_k.md @@ -1,15 +1,11 @@ ### `tf.contrib.metrics.streaming_recall_at_k(*args, **kwargs)` {#streaming_recall_at_k} -Computes the recall@k of the predictions with respect to dense labels. (deprecated arguments) (deprecated) +Computes the recall@k of the predictions with respect to dense labels. (deprecated) THIS FUNCTION IS DEPRECATED. It will be removed after 2016-11-08. Instructions for updating: Please use `streaming_sparse_recall_at_k`, and reshape labels from [batch_size] to [batch_size, 1]. -SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19. -Instructions for updating: -`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`. - The `streaming_recall_at_k` function creates two local variables, `total` and `count`, that are used to compute the recall@k frequency. This frequency is ultimately returned as `recall_at_`: an idempotent operation that simply @@ -24,15 +20,12 @@ Instructions for updating: increments `count` with the reduced sum of `weights`. If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - Alternatively, if `ignore_mask` is not `None`, then mask values where - `ignore_mask` is `True`. Args: predictions: A floating point tensor of dimension [batch_size, num_classes] labels: A tensor of dimension [batch_size] whose type is in `int32`, `int64`. k: The number of top elements to look at for computing recall. - ignore_mask: An optional, `bool` `Tensor` whose shape matches `predictions`. weights: An optional `Tensor` whose shape is broadcastable to `predictions`. metrics_collections: An optional list of collections that `recall_at_k` should be added to. @@ -48,8 +41,7 @@ Instructions for updating: Raises: ValueError: If `predictions` and `labels` have mismatched shapes, or if - `ignore_mask` is not `None` and its shape doesn't match `predictions`, or - if `weights` is not `None` and its shape doesn't match `predictions`, or - if either `metrics_collections` or `updates_collections` are not a list or + `weights` is not `None` and its shape doesn't match `predictions`, or if + either `metrics_collections` or `updates_collections` are not a list or tuple. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_sparse_precision_at_top_k.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_sparse_precision_at_top_k.md index 53f6e786b23..d9d3f8ecec4 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_sparse_precision_at_top_k.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_sparse_precision_at_top_k.md @@ -1,73 +1,70 @@ -### `tf.contrib.metrics.streaming_sparse_precision_at_top_k(*args, **kwargs)` {#streaming_sparse_precision_at_top_k} +### `tf.contrib.metrics.streaming_sparse_precision_at_top_k(top_k_predictions, labels, class_id=None, weights=None, metrics_collections=None, updates_collections=None, name=None)` {#streaming_sparse_precision_at_top_k} -Computes precision@k of top-k predictions with respect to sparse labels. (deprecated arguments) +Computes precision@k of top-k predictions with respect to sparse labels. -SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19. -Instructions for updating: -`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`. +If `class_id` is specified, we calculate precision by considering only the + entries in the batch for which `class_id` is in the top-k highest + `predictions`, and computing the fraction of them for which `class_id` is + indeed a correct label. +If `class_id` is not specified, we'll calculate precision as how often on + average a class among the top-k classes with the highest predicted values + of a batch entry is correct and can be found in the label for that entry. - If `class_id` is specified, we calculate precision by considering only the - entries in the batch for which `class_id` is in the top-k highest - `predictions`, and computing the fraction of them for which `class_id` is - indeed a correct label. - If `class_id` is not specified, we'll calculate precision as how often on - average a class among the top-k classes with the highest predicted values - of a batch entry is correct and can be found in the label for that entry. +`streaming_sparse_precision_at_top_k` creates two local variables, +`true_positive_at_k` and `false_positive_at_k`, that are used to compute +the precision@k frequency. This frequency is ultimately returned as +`precision_at_k`: an idempotent operation that simply divides +`true_positive_at_k` by total (`true_positive_at_k` + `false_positive_at_k`). - `streaming_sparse_precision_at_top_k` creates two local variables, - `true_positive_at_k` and `false_positive_at_k`, that are used to compute - the precision@k frequency. This frequency is ultimately returned as - `precision_at_k`: an idempotent operation that simply divides - `true_positive_at_k` by total (`true_positive_at_k` + `false_positive_at_k`). +For estimation of the metric over a stream of data, the function creates an +`update_op` operation that updates these variables and returns the +`precision_at_k`. Internally, set operations applied to `top_k_predictions` +and `labels` calculate the true positives and false positives weighted by +`weights`. Then `update_op` increments `true_positive_at_k` and +`false_positive_at_k` using these values. - For estimation of the metric over a stream of data, the function creates an - `update_op` operation that updates these variables and returns the - `precision_at_k`. Internally, set operations applied to `top_k_predictions` - and `labels` calculate the true positives and false positives weighted by - `weights`. Then `update_op` increments `true_positive_at_k` and - `false_positive_at_k` using these values. +If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - Alternatively, if `ignore_mask` is not `None`, then mask values where - `ignore_mask` is `True`. +##### Args: - Args: - top_k_predictions: Integer `Tensor` with shape [D1, ... DN, k] where - N >= 1. Commonly, N=1 and top_k_predictions has shape [batch size, k]. - The final dimension contains the indices of top-k labels. [D1, ... DN] - must match `labels`. - labels: `int64` `Tensor` or `SparseTensor` with shape - [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of - target classes for the associated prediction. Commonly, N=1 and `labels` - has shape [batch_size, num_labels]. [D1, ... DN] must match - `top_k_predictions`. Values should be in range [0, num_classes), where - num_classes is the last dimension of `predictions`. Values outside this - range are ignored. - class_id: Integer class ID for which we want binary metrics. This should be - in range [0, num_classes), where num_classes is the last dimension of - `predictions`. If `class_id` is outside this range, the method returns - NAN. - ignore_mask: An optional, `bool` `Tensor` whose shape is broadcastable to - the the first [D1, ... DN] dimensions of `predictions` and `labels`. - weights: An optional `Tensor` whose shape is broadcastable to the the first - [D1, ... DN] dimensions of `predictions` and `labels`. - metrics_collections: An optional list of collections that values should - be added to. - updates_collections: An optional list of collections that updates should - be added to. - name: Name of new update operation, and namespace for other dependent ops. - Returns: - precision: Scalar `float64` `Tensor` with the value of `true_positives` - divided by the sum of `true_positives` and `false_positives`. - update_op: `Operation` that increments `true_positives` and - `false_positives` variables appropriately, and whose value matches - `precision`. +* `top_k_predictions`: Integer `Tensor` with shape [D1, ... DN, k] where + N >= 1. Commonly, N=1 and top_k_predictions has shape [batch size, k]. + The final dimension contains the indices of top-k labels. [D1, ... DN] + must match `labels`. +* `labels`: `int64` `Tensor` or `SparseTensor` with shape + [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of + target classes for the associated prediction. Commonly, N=1 and `labels` + has shape [batch_size, num_labels]. [D1, ... DN] must match + `top_k_predictions`. Values should be in range [0, num_classes), where + num_classes is the last dimension of `predictions`. Values outside this + range are ignored. +* `class_id`: Integer class ID for which we want binary metrics. This should be + in range [0, num_classes), where num_classes is the last dimension of + `predictions`. If `class_id` is outside this range, the method returns + NAN. +* `weights`: An optional `Tensor` whose shape is broadcastable to the the first + [D1, ... DN] dimensions of `predictions` and `labels`. +* `metrics_collections`: An optional list of collections that values should + be added to. +* `updates_collections`: An optional list of collections that updates should + be added to. +* `name`: Name of new update operation, and namespace for other dependent ops. - Raises: - ValueError: If `ignore_mask` is not `None` and its shape doesn't match - `predictions`, or if `weights` is not `None` and its shape doesn't match - `predictions`, or if either `metrics_collections` or `updates_collections` - are not a list or tuple. - ValueError: If `top_k_predictions` has rank < 2. +##### Returns: + + +* `precision`: Scalar `float64` `Tensor` with the value of `true_positives` + divided by the sum of `true_positives` and `false_positives`. +* `update_op`: `Operation` that increments `true_positives` and + `false_positives` variables appropriately, and whose value matches + `precision`. + +##### Raises: + + +* `ValueError`: If `weights` is not `None` and its shape doesn't match + `predictions`, or if either `metrics_collections` or `updates_collections` + are not a list or tuple. +* `ValueError`: If `top_k_predictions` has rank < 2. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.beta_bb.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.beta_bb.md index 8e16c312a83..d7fe415774c 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.beta_bb.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.beta_bb.md @@ -94,6 +94,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.beta_bb.copy(**override_parameters_kwargs)` {#beta_bb.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.beta_bb.dtype` {#beta_bb.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.bijector.Chain.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.bijector.Chain.md index f56cef2cb60..a129c3edef5 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.bijector.Chain.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.bijector.Chain.md @@ -40,8 +40,8 @@ Instantiates `Chain` bijector. * `bijectors`: Python list of bijector instances. An empty list makes this bijector equivalent to the `Identity` bijector. -* `validate_args`: `Boolean` indicated whether arguments should be checked for - correctness. +* `validate_args`: `Boolean` indicating whether arguments should be checked + for correctness. * `name`: `String`, name given to ops managed by this object. Default: E.g., `Chain([Exp(), Softplus()]).name == "chain_of_exp_of_softplus"`. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.bijector.Exp.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.bijector.Exp.md index 2a50fd0cfea..84eb7e41277 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.bijector.Exp.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.bijector.Exp.md @@ -27,8 +27,8 @@ Instantiates the `Exp` bijector. * `event_ndims`: Scalar `int32` `Tensor` indicating the number of dimensions associated with a particular draw from the distribution. -* `validate_args`: `Boolean` indicated whether arguments should be checked for - correctness. +* `validate_args`: `Boolean` indicating whether arguments should be checked + for correctness. * `name`: `String` name given to ops managed by this object. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.metrics.streaming_percentage_less.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.metrics.streaming_percentage_less.md index ccf6097f59e..c8c5c757076 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.metrics.streaming_percentage_less.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.metrics.streaming_percentage_less.md @@ -1,45 +1,43 @@ -### `tf.contrib.metrics.streaming_percentage_less(*args, **kwargs)` {#streaming_percentage_less} +### `tf.contrib.metrics.streaming_percentage_less(values, threshold, weights=None, metrics_collections=None, updates_collections=None, name=None)` {#streaming_percentage_less} -Computes the percentage of values less than the given threshold. (deprecated arguments) +Computes the percentage of values less than the given threshold. -SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19. -Instructions for updating: -`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`. +The `streaming_percentage_less` function creates two local variables, +`total` and `count` that are used to compute the percentage of `values` that +fall below `threshold`. This rate is weighted by `weights`, and it is +ultimately returned as `percentage` which is an idempotent operation that +simply divides `total` by `count`. - The `streaming_percentage_less` function creates two local variables, - `total` and `count` that are used to compute the percentage of `values` that - fall below `threshold`. This rate is weighted by `weights`, and it is - ultimately returned as `percentage` which is an idempotent operation that - simply divides `total` by `count`. +For estimation of the metric over a stream of data, the function creates an +`update_op` operation that updates these variables and returns the +`percentage`. - For estimation of the metric over a stream of data, the function creates an - `update_op` operation that updates these variables and returns the - `percentage`. +If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - Alternatively, if `ignore_mask` is not `None`, then mask values where - `ignore_mask` is `True`. +##### Args: - Args: - values: A numeric `Tensor` of arbitrary size. - threshold: A scalar threshold. - ignore_mask: An optional, `bool` `Tensor` whose shape matches `values`. - weights: An optional `Tensor` whose shape is broadcastable to `values`. - metrics_collections: An optional list of collections that the metric - value variable should be added to. - updates_collections: An optional list of collections that the metric update - ops should be added to. - name: An optional variable_scope name. - Returns: - percentage: A tensor representing the current mean, the value of `total` - divided by `count`. - update_op: An operation that increments the `total` and `count` variables - appropriately. +* `values`: A numeric `Tensor` of arbitrary size. +* `threshold`: A scalar threshold. +* `weights`: An optional `Tensor` whose shape is broadcastable to `values`. +* `metrics_collections`: An optional list of collections that the metric + value variable should be added to. +* `updates_collections`: An optional list of collections that the metric update + ops should be added to. +* `name`: An optional variable_scope name. - Raises: - ValueError: If `ignore_mask` is not `None` and its shape doesn't match - `values`, or if `weights` is not `None` and its shape doesn't match - `values`, or if either `metrics_collections` or `updates_collections` are - not a list or tuple. +##### Returns: + + +* `percentage`: A tensor representing the current mean, the value of `total` + divided by `count`. +* `update_op`: An operation that increments the `total` and `count` variables + appropriately. + +##### Raises: + + +* `ValueError`: If `weights` is not `None` and its shape doesn't match `values`, + or if either `metrics_collections` or `updates_collections` are not a list + or tuple. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.train.piecewise_constant.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.train.piecewise_constant.md new file mode 100644 index 00000000000..b41f38eb494 --- /dev/null +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.train.piecewise_constant.md @@ -0,0 +1,41 @@ +### `tf.train.piecewise_constant(x, boundaries, values, name=None)` {#piecewise_constant} + +Piecewise constant from boundaries and interval values. + +Example: use a learning rate that's 1.0 for the first 100000 steps, 0.5 + for steps 100001 to 110000, and 0.1 for any additional steps. + +```python +global_step = tf.Variable(0, trainable=False) +boundaries = [100000, 110000] +values = [1.0, 0.5, 0.1] +learning_rate = tf.train.piecewise_constant(global_step, boundaries, values) + +# Later, whenever we perform an optimization step, we increment global_step. +``` + +##### Args: + + +* `x`: A 0-D scalar `Tensor`. Must be one of the following types: `float32`, + `float64`, `uint8`, `int8`, `int16`, `int32`, `int64`. +* `boundaries`: A list of `Tensor`s or `int`s or `float`s with strictly + increasing entries, and with all elements having the same type as `x`. +* `values`: A list of `Tensor`s or float`s or `int`s that specifies the values + for the intervals defined by `boundaries`. It should have one more element + than `boundaries`, and all elements should have the same type. +* `name`: A string. Optional name of the operation. Defaults to + 'PiecewiseConstant'. + +##### Returns: + + A 0-D Tensor. Its value is `values[0]` when `x <= boundaries[0]`, + `values[1]` when `x > boundaries[0]` and `x <= boundaries[1]`, ..., + and values[-1] when `x > boundaries[-1]`. + +##### Raises: + + +* `ValueError`: if types of `x` and `buondaries` do not match, or types of all + `values` do not match. + diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.train.polynomial_decay.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.train.polynomial_decay.md new file mode 100644 index 00000000000..64a365fb08a --- /dev/null +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.train.polynomial_decay.md @@ -0,0 +1,78 @@ +### `tf.train.polynomial_decay(learning_rate, global_step, decay_steps, end_learning_rate=0.0001, power=1.0, cycle=False, name=None)` {#polynomial_decay} + +Applies a polynomial decay to the learning rate. + +It is commonly observed that a monotonically decreasing learning rate, whose +degree of change is carefully chosen, results in a better performing model. +This function applies a polynomial decay function to a provided initial +`learning_rate` to reach an `end_learning_rate` in the given `decay_steps`. + +It requires a `global_step` value to compute the decayed learning rate. You +can just pass a TensorFlow variable that you increment at each training step. + +The function returns the decayed learning rate. It is computed as: + +```python +global_step = min(global_step, decay_steps) +decayed_learning_rate = (learning_rate - end_learning_rate) * + (1 - global_step / decay_steps) ^ (power) + + end_learning_rate + +``` + +If `cycle` is True then a multiple of `decay_steps` is used, the first one +that is bigger than `global_steps`. + +```python +decay_steps = decay_steps * ceil(global_step / decay_steps) +decayed_learning_rate = (learning_rate - end_learning_rate) * + (1 - global_step / decay_steps) ^ (power) + + end_learning_rate + +``` + +Example: decay from 0.1 to 0.01 in 10000 steps using sqrt (i.e. power=0.5): + +```python +... +global_step = tf.Variable(0, trainable=False) +starter_learning_rate = 0.1 +end_learning_rate = 0.01 +decay_steps = 10000 +learning_rate = tf.train.polynomial_decay(starter_learning_rate, global_step, + decay_steps, end_learning_rate, + power=0.5) +# Passing global_step to minimize() will increment it at each step. +learning_step = ( + tf.train.GradientDescentOptimizer(learning_rate) + .minimize(...my loss..., global_step=global_step) +) +``` + +##### Args: + + +* `learning_rate`: A scalar `float32` or `float64` `Tensor` or a + Python number. The initial learning rate. +* `global_step`: A scalar `int32` or `int64` `Tensor` or a Python number. + Global step to use for the decay computation. Must not be negative. +* `decay_steps`: A scalar `int32` or `int64` `Tensor` or a Python number. + Must be positive. See the decay computation above. +* `end_learning_rate`: A scalar `float32` or `float64` `Tensor` or a + Python number. The minimal end learning rate. +* `power`: A scalar `float32` or `float64` `Tensor` or a + Python number. The power of the polynomial. Defaults to sqrt, i.e. 0.5. +* `cycle`: A boolean, whether or not it should cycle beyond decay_steps. +* `name`: String. Optional name of the operation. Defaults to + 'PolynomialDecay'. + +##### Returns: + + A scalar `Tensor` of the same type as `learning_rate`. The decayed + learning rate. + +##### Raises: + + +* `ValueError`: if `global_step` is not supplied. + diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Beta.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Beta.md index cd70e98acfa..3a3a481a806 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Beta.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Beta.md @@ -183,6 +183,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Beta.copy(**override_parameters_kwargs)` {#Beta.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Beta.dtype` {#Beta.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Laplace.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Laplace.md index ea5c3375029..2adbad22a3f 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Laplace.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.Laplace.md @@ -106,6 +106,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Laplace.copy(**override_parameters_kwargs)` {#Laplace.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Laplace.dtype` {#Laplace.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.LaplaceWithSoftplusScale.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.LaplaceWithSoftplusScale.md index 312dc02f8ca..6b4f3449841 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.LaplaceWithSoftplusScale.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.LaplaceWithSoftplusScale.md @@ -73,6 +73,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.LaplaceWithSoftplusScale.copy(**override_parameters_kwargs)` {#LaplaceWithSoftplusScale.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.LaplaceWithSoftplusScale.dtype` {#LaplaceWithSoftplusScale.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.StudentTWithAbsDfSoftplusSigma.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.StudentTWithAbsDfSoftplusSigma.md index 5cd5b51c303..6e1d00686dd 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.StudentTWithAbsDfSoftplusSigma.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.StudentTWithAbsDfSoftplusSigma.md @@ -73,6 +73,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.StudentTWithAbsDfSoftplusSigma.copy(**override_parameters_kwargs)` {#StudentTWithAbsDfSoftplusSigma.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.StudentTWithAbsDfSoftplusSigma.df` {#StudentTWithAbsDfSoftplusSigma.df} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.metrics.streaming_mean_iou.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.metrics.streaming_mean_iou.md index 45eaf48ba4e..bb5e60c2a8a 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.metrics.streaming_mean_iou.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.metrics.streaming_mean_iou.md @@ -1,51 +1,51 @@ -### `tf.contrib.metrics.streaming_mean_iou(*args, **kwargs)` {#streaming_mean_iou} +### `tf.contrib.metrics.streaming_mean_iou(predictions, labels, num_classes, weights=None, metrics_collections=None, updates_collections=None, name=None)` {#streaming_mean_iou} -Calculate per-step mean Intersection-Over-Union (mIOU). (deprecated arguments) +Calculate per-step mean Intersection-Over-Union (mIOU). -SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19. -Instructions for updating: -`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`. +Mean Intersection-Over-Union is a common evaluation metric for +semantic image segmentation, which first computes the IOU for each +semantic class and then computes the average over classes. - Mean Intersection-Over-Union is a common evaluation metric for - semantic image segmentation, which first computes the IOU for each - semantic class and then computes the average over classes. - IOU is defined as follows: - IOU = true_positive / (true_positive + false_positive + false_negative). - The predictions are accumulated in a confusion matrix, weighted by `weights`, - and mIOU is then calculated from it. +##### IOU is defined as follows: - For estimation of the metric over a stream of data, the function creates an - `update_op` operation that updates these variables and returns the `mean_iou`. + IOU = true_positive / (true_positive + false_positive + false_negative). +The predictions are accumulated in a confusion matrix, weighted by `weights`, +and mIOU is then calculated from it. - If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - Alternatively, if `ignore_mask` is not `None`, then mask values where - `ignore_mask` is `True`. +For estimation of the metric over a stream of data, the function creates an +`update_op` operation that updates these variables and returns the `mean_iou`. - Args: - predictions: A tensor of prediction results for semantic labels, whose - shape is [batch size] and type `int32` or `int64`. The tensor will be - flattened, if its rank > 1. - labels: A tensor of ground truth labels with shape [batch size] and of - type `int32` or `int64`. The tensor will be flattened, if its rank > 1. - num_classes: The possible number of labels the prediction task can - have. This value must be provided, since a confusion matrix of - dimension = [num_classes, num_classes] will be allocated. - ignore_mask: An optional, `bool` `Tensor` whose shape matches `predictions`. - weights: An optional `Tensor` whose shape is broadcastable to `predictions`. - metrics_collections: An optional list of collections that `mean_iou` - should be added to. - updates_collections: An optional list of collections `update_op` should be - added to. - name: An optional variable_scope name. +If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - Returns: - mean_iou: A tensor representing the mean intersection-over-union. - update_op: An operation that increments the confusion matrix. +##### Args: - Raises: - ValueError: If `predictions` and `labels` have mismatched shapes, or if - `ignore_mask` is not `None` and its shape doesn't match `predictions`, or - if `weights` is not `None` and its shape doesn't match `predictions`, or - if either `metrics_collections` or `updates_collections` are not a list or - tuple. + +* `predictions`: A tensor of prediction results for semantic labels, whose + shape is [batch size] and type `int32` or `int64`. The tensor will be + flattened, if its rank > 1. +* `labels`: A tensor of ground truth labels with shape [batch size] and of + type `int32` or `int64`. The tensor will be flattened, if its rank > 1. +* `num_classes`: The possible number of labels the prediction task can + have. This value must be provided, since a confusion matrix of + dimension = [num_classes, num_classes] will be allocated. +* `weights`: An optional `Tensor` whose shape is broadcastable to `predictions`. +* `metrics_collections`: An optional list of collections that `mean_iou` + should be added to. +* `updates_collections`: An optional list of collections `update_op` should be + added to. +* `name`: An optional variable_scope name. + +##### Returns: + + +* `mean_iou`: A tensor representing the mean intersection-over-union. +* `update_op`: An operation that increments the confusion matrix. + +##### Raises: + + +* `ValueError`: If `predictions` and `labels` have mismatched shapes, or if + `weights` is not `None` and its shape doesn't match `predictions`, or if + either `metrics_collections` or `updates_collections` are not a list or + tuple. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.metrics.streaming_recall.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.metrics.streaming_recall.md index e93630f46c1..34e8bd291fd 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.metrics.streaming_recall.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.metrics.streaming_recall.md @@ -1,47 +1,45 @@ -### `tf.contrib.metrics.streaming_recall(*args, **kwargs)` {#streaming_recall} +### `tf.contrib.metrics.streaming_recall(predictions, labels, weights=None, metrics_collections=None, updates_collections=None, name=None)` {#streaming_recall} -Computes the recall of the predictions with respect to the labels. (deprecated arguments) +Computes the recall of the predictions with respect to the labels. -SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19. -Instructions for updating: -`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`. +The `streaming_recall` function creates two local variables, `true_positives` +and `false_negatives`, that are used to compute the recall. This value is +ultimately returned as `recall`, an idempotent operation that simply divides +`true_positives` by the sum of `true_positives` and `false_negatives`. - The `streaming_recall` function creates two local variables, `true_positives` - and `false_negatives`, that are used to compute the recall. This value is - ultimately returned as `recall`, an idempotent operation that simply divides - `true_positives` by the sum of `true_positives` and `false_negatives`. +For estimation of the metric over a stream of data, the function creates an +`update_op` that updates these variables and returns the `recall`. `update_op` +weights each prediction by the corresponding value in `weights`. - For estimation of the metric over a stream of data, the function creates an - `update_op` that updates these variables and returns the `recall`. `update_op` - weights each prediction by the corresponding value in `weights`. +If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - Alternatively, if `ignore_mask` is not `None`, then mask values where - `ignore_mask` is `True`. +##### Args: - Args: - predictions: The predicted values, a `bool` `Tensor` of arbitrary shape. - labels: The ground truth values, a `bool` `Tensor` whose dimensions must - match `predictions`. - ignore_mask: An optional, `bool` `Tensor` whose shape matches `predictions`. - weights: An optional `Tensor` whose shape is broadcastable to `predictions`. - metrics_collections: An optional list of collections that `recall` should - be added to. - updates_collections: An optional list of collections that `update_op` should - be added to. - name: An optional variable_scope name. - Returns: - recall: Scalar float `Tensor` with the value of `true_positives` divided - by the sum of `true_positives` and `false_negatives`. - update_op: `Operation` that increments `true_positives` and - `false_negatives` variables appropriately and whose value matches - `recall`. +* `predictions`: The predicted values, a `bool` `Tensor` of arbitrary shape. +* `labels`: The ground truth values, a `bool` `Tensor` whose dimensions must + match `predictions`. +* `weights`: An optional `Tensor` whose shape is broadcastable to `predictions`. +* `metrics_collections`: An optional list of collections that `recall` should + be added to. +* `updates_collections`: An optional list of collections that `update_op` should + be added to. +* `name`: An optional variable_scope name. - Raises: - ValueError: If `predictions` and `labels` have mismatched shapes, or if - `ignore_mask` is not `None` and its shape doesn't match `predictions`, or - if `weights` is not `None` and its shape doesn't match `predictions`, or - if either `metrics_collections` or `updates_collections` are not a list or - tuple. +##### Returns: + + +* `recall`: Scalar float `Tensor` with the value of `true_positives` divided + by the sum of `true_positives` and `false_negatives`. +* `update_op`: `Operation` that increments `true_positives` and + `false_negatives` variables appropriately and whose value matches + `recall`. + +##### Raises: + + +* `ValueError`: If `predictions` and `labels` have mismatched shapes, or if + `weights` is not `None` and its shape doesn't match `predictions`, or if + either `metrics_collections` or `updates_collections` are not a list or + tuple. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.ExponentialWithSoftplusLam.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.ExponentialWithSoftplusLam.md index 6246dafbc56..7b1605162e6 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.ExponentialWithSoftplusLam.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.ExponentialWithSoftplusLam.md @@ -87,6 +87,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.ExponentialWithSoftplusLam.copy(**override_parameters_kwargs)` {#ExponentialWithSoftplusLam.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.ExponentialWithSoftplusLam.dtype` {#ExponentialWithSoftplusLam.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.MultivariateNormalFull.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.MultivariateNormalFull.md index 04fc0b64b28..47ba0396b4a 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.MultivariateNormalFull.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.MultivariateNormalFull.md @@ -134,6 +134,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.MultivariateNormalFull.copy(**override_parameters_kwargs)` {#MultivariateNormalFull.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.MultivariateNormalFull.dtype` {#MultivariateNormalFull.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.Normal.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.Normal.md index 9d6ad275ca8..c61b240e020 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.Normal.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.Normal.md @@ -137,6 +137,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Normal.copy(**override_parameters_kwargs)` {#Normal.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Normal.dtype` {#Normal.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.beta_aa.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.beta_aa.md index f064fb3f4d0..08032b9ac52 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.beta_aa.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.beta_aa.md @@ -94,6 +94,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.beta_aa.copy(**override_parameters_kwargs)` {#beta_aa.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.beta_aa.dtype` {#beta_aa.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.bijector.Inline.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.bijector.Inline.md index 38143ede1e5..0e590264273 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.bijector.Inline.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.bijector.Inline.md @@ -8,7 +8,7 @@ exp = Inline( inverse_fn=tf.log, inverse_log_det_jacobian_fn=( lambda y: -tf.reduce_sum(tf.log(y), reduction_indices=-1)), - name="Exp") + name="exp") ``` The above example is equivalent to the `Bijector` `Exp(event_ndims=1)`. @@ -29,8 +29,8 @@ Creates a `Bijector` from callables. log o det o jacobian of the forward transformation. * `is_constant_jacobian`: `Boolean` indicating that the Jacobian is constant for all input arguments. -* `validate_args`: `Boolean` indicated whether arguments should be checked for - correctness. +* `validate_args`: `Boolean` indicating whether arguments should be checked + for correctness. * `name`: `String`, name given to ops managed by this object. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.metrics.streaming_precision.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.metrics.streaming_precision.md index 0afe30d1899..61d1cfdcc05 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.metrics.streaming_precision.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.metrics.streaming_precision.md @@ -1,49 +1,47 @@ -### `tf.contrib.metrics.streaming_precision(*args, **kwargs)` {#streaming_precision} +### `tf.contrib.metrics.streaming_precision(predictions, labels, weights=None, metrics_collections=None, updates_collections=None, name=None)` {#streaming_precision} -Computes the precision of the predictions with respect to the labels. (deprecated arguments) +Computes the precision of the predictions with respect to the labels. -SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19. -Instructions for updating: -`ignore_mask` is being deprecated. Instead use `weights` with values 0.0 and 1.0 to mask values. For example, `weights=tf.logical_not(mask)`. +The `streaming_precision` function creates two local variables, +`true_positives` and `false_positives`, that are used to compute the +precision. This value is ultimately returned as `precision`, an idempotent +operation that simply divides `true_positives` by the sum of `true_positives` +and `false_positives`. - The `streaming_precision` function creates two local variables, - `true_positives` and `false_positives`, that are used to compute the - precision. This value is ultimately returned as `precision`, an idempotent - operation that simply divides `true_positives` by the sum of `true_positives` - and `false_positives`. +For estimation of the metric over a stream of data, the function creates an +`update_op` operation that updates these variables and returns the +`precision`. `update_op` weights each prediction by the corresponding value in +`weights`. - For estimation of the metric over a stream of data, the function creates an - `update_op` operation that updates these variables and returns the - `precision`. `update_op` weights each prediction by the corresponding value in - `weights`. +If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. - Alternatively, if `ignore_mask` is not `None`, then mask values where - `ignore_mask` is `True`. +##### Args: - Args: - predictions: The predicted values, a `bool` `Tensor` of arbitrary shape. - labels: The ground truth values, a `bool` `Tensor` whose dimensions must - match `predictions`. - ignore_mask: An optional, `bool` `Tensor` whose shape matches `predictions`. - weights: An optional `Tensor` whose shape is broadcastable to `predictions`. - metrics_collections: An optional list of collections that `precision` should - be added to. - updates_collections: An optional list of collections that `update_op` should - be added to. - name: An optional variable_scope name. - Returns: - precision: Scalar float `Tensor` with the value of `true_positives` - divided by the sum of `true_positives` and `false_positives`. - update_op: `Operation` that increments `true_positives` and - `false_positives` variables appropriately and whose value matches - `precision`. +* `predictions`: The predicted values, a `bool` `Tensor` of arbitrary shape. +* `labels`: The ground truth values, a `bool` `Tensor` whose dimensions must + match `predictions`. +* `weights`: An optional `Tensor` whose shape is broadcastable to `predictions`. +* `metrics_collections`: An optional list of collections that `precision` should + be added to. +* `updates_collections`: An optional list of collections that `update_op` should + be added to. +* `name`: An optional variable_scope name. - Raises: - ValueError: If `predictions` and `labels` have mismatched shapes, or if - `ignore_mask` is not `None` and its shape doesn't match `predictions`, or - if `weights` is not `None` and its shape doesn't match `predictions`, or - if either `metrics_collections` or `updates_collections` are not a list or - tuple. +##### Returns: + + +* `precision`: Scalar float `Tensor` with the value of `true_positives` + divided by the sum of `true_positives` and `false_positives`. +* `update_op`: `Operation` that increments `true_positives` and + `false_positives` variables appropriately and whose value matches + `precision`. + +##### Raises: + + +* `ValueError`: If `predictions` and `labels` have mismatched shapes, or if + `weights` is not `None` and its shape doesn't match `predictions`, or if + either `metrics_collections` or `updates_collections` are not a list or + tuple. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.train.inverse_time_decay.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.train.inverse_time_decay.md new file mode 100644 index 00000000000..fe85cb1b128 --- /dev/null +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.train.inverse_time_decay.md @@ -0,0 +1,56 @@ +### `tf.train.inverse_time_decay(learning_rate, global_step, decay_steps, decay_rate, staircase=False, name=None)` {#inverse_time_decay} + +Applies inverse time decay to the initial learning rate. + +When training a model, it is often recommended to lower the learning rate as +the training progresses. This function applies an inverse decay function +to a provided initial learning rate. It requires an `global_step` value to +compute the decayed learning rate. You can just pass a TensorFlow variable +that you increment at each training step. + +The function returns the decayed learning rate. It is computed as: + +```python +decayed_learning_rate = learning_rate / (1 + decay_rate * t) +``` + +Example: decay 1/t with a rate of 0.5: + +```python +... +global_step = tf.Variable(0, trainable=False) +learning_rate = 0.1 +k = 0.5 +learning_rate = tf.train.inverse_time_decay(learning_rate, global_step, k) + +# Passing global_step to minimize() will increment it at each step. +learning_step = ( + tf.train.GradientDescentOptimizer(learning_rate) + .minimize(...my loss..., global_step=global_step) +) +``` + +##### Args: + + +* `learning_rate`: A scalar `float32` or `float64` `Tensor` or a + Python number. The initial learning rate. +* `global_step`: A Python number. + Global step to use for the decay computation. Must not be negative. +* `decay_steps`: How often to apply decay. +* `decay_rate`: A Python number. The decay rate. +* `staircase`: Whether to apply decay in a discrete staircase, as opposed to + continuous, fashion. +* `name`: String. Optional name of the operation. Defaults to + 'InverseTimeDecay'. + +##### Returns: + + A scalar `Tensor` of the same type as `learning_rate`. The decayed + learning rate. + +##### Raises: + + +* `ValueError`: if `global_step` is not supplied. + diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.distributions.Mixture.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.distributions.Mixture.md index 133686cef52..b47fca09fce 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.distributions.Mixture.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.distributions.Mixture.md @@ -134,6 +134,29 @@ cdf(x) := P[X <= x] +- - - + +#### `tf.contrib.distributions.Mixture.copy(**override_parameters_kwargs)` {#Mixture.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Mixture.dtype` {#Mixture.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.distributions.NormalWithSoftplusSigma.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.distributions.NormalWithSoftplusSigma.md index e6a161f27a5..16e5bb2e9c9 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.distributions.NormalWithSoftplusSigma.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.distributions.NormalWithSoftplusSigma.md @@ -73,6 +73,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.NormalWithSoftplusSigma.copy(**override_parameters_kwargs)` {#NormalWithSoftplusSigma.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.NormalWithSoftplusSigma.dtype` {#NormalWithSoftplusSigma.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.image.per_image_standardization.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.image.per_image_standardization.md new file mode 100644 index 00000000000..8b7b8484432 --- /dev/null +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.image.per_image_standardization.md @@ -0,0 +1,25 @@ +### `tf.image.per_image_standardization(image)` {#per_image_standardization} + +Linearly scales `image` to have zero mean and unit norm. + +This op computes `(x - mean) / adjusted_stddev`, where `mean` is the average +of all values in image, and +`adjusted_stddev = max(stddev, 1.0/sqrt(image.NumElements()))`. + +`stddev` is the standard deviation of all values in `image`. It is capped +away from zero to protect against division by 0 when handling uniform images. + +##### Args: + + +* `image`: 3-D tensor of shape `[height, width, channels]`. + +##### Returns: + + The standardized image with same shape as `image`. + +##### Raises: + + +* `ValueError`: if the shape of 'image' is incompatible with this function. + diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.train.import_meta_graph.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.train.import_meta_graph.md index 5f53eacdfcf..d0fa7f551eb 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.train.import_meta_graph.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.train.import_meta_graph.md @@ -1,4 +1,4 @@ -### `tf.train.import_meta_graph(meta_graph_or_file, import_scope=None, **kwargs)` {#import_meta_graph} +### `tf.train.import_meta_graph(meta_graph_or_file, clear_devices=False, import_scope=None, **kwargs)` {#import_meta_graph} Recreates a Graph saved in a `MetaGraphDef` proto. @@ -55,6 +55,8 @@ device assignments have not changed. * `meta_graph_or_file`: `MetaGraphDef` protocol buffer or filename (including the path) containing a `MetaGraphDef`. +* `clear_devices`: Whether or not to clear the device field for an `Operation` + or `Tensor` during import. * `import_scope`: Optional `string`. Name scope to add. Only used when initializing from protocol buffer. * `**kwargs`: Optional keyed arguments. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.MultivariateNormalDiagWithSoftplusStDev.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.MultivariateNormalDiagWithSoftplusStDev.md index 42f96581068..5eae3e7ff5b 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.MultivariateNormalDiagWithSoftplusStDev.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.MultivariateNormalDiagWithSoftplusStDev.md @@ -73,6 +73,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.MultivariateNormalDiagWithSoftplusStDev.copy(**override_parameters_kwargs)` {#MultivariateNormalDiagWithSoftplusStDev.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.MultivariateNormalDiagWithSoftplusStDev.dtype` {#MultivariateNormalDiagWithSoftplusStDev.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.Poisson.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.Poisson.md index 2a2cdeb7d7d..9763d6ba473 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.Poisson.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.Poisson.md @@ -97,6 +97,29 @@ cdf(x) := P[X <= x] values of type `self.dtype`. +- - - + +#### `tf.contrib.distributions.Poisson.copy(**override_parameters_kwargs)` {#Poisson.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.Poisson.dtype` {#Poisson.dtype} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.WishartFull.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.WishartFull.md index 86f4f32cb4a..9781d8a33b4 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.WishartFull.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.distributions.WishartFull.md @@ -155,6 +155,29 @@ cdf(x) := P[X <= x] Boolean indicating if `Tensor` input/outputs are Cholesky factorized. +- - - + +#### `tf.contrib.distributions.WishartFull.copy(**override_parameters_kwargs)` {#WishartFull.copy} + +Creates a deep copy of the distribution. + +Note: the copy distribution may continue to depend on the original +intialization arguments. + +##### Args: + + +* `**override_parameters_kwargs`: String/value dictionary of initialization + arguments to override with new values. + +##### Returns: + + +* `distribution`: A new instance of `type(self)` intitialized from the union + of self.parameters and override_parameters_kwargs, i.e., + `dict(self.parameters, **override_parameters_kwargs)`. + + - - - #### `tf.contrib.distributions.WishartFull.df` {#WishartFull.df} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.train.StepCounterHook.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.train.StepCounterHook.md index ac2c09b5bde..10c7d249043 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.train.StepCounterHook.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.train.StepCounterHook.md @@ -1,7 +1,7 @@ Steps per second monitor. - - - -#### `tf.train.StepCounterHook.__init__(every_n_steps=100, output_dir=None, summary_writer=None)` {#StepCounterHook.__init__} +#### `tf.train.StepCounterHook.__init__(every_n_steps=100, every_n_secs=None, output_dir=None, summary_writer=None)` {#StepCounterHook.__init__} diff --git a/tensorflow/g3doc/api_docs/python/image.md b/tensorflow/g3doc/api_docs/python/image.md index a5107d0eb09..08d027688ae 100644 --- a/tensorflow/g3doc/api_docs/python/image.md +++ b/tensorflow/g3doc/api_docs/python/image.md @@ -1210,7 +1210,7 @@ picked in the interval `[lower, upper]`. - - - -### `tf.image.per_image_whitening(image)` {#per_image_whitening} +### `tf.image.per_image_standardization(image)` {#per_image_standardization} Linearly scales `image` to have zero mean and unit norm. @@ -1221,11 +1221,6 @@ of all values in image, and `stddev` is the standard deviation of all values in `image`. It is capped away from zero to protect against division by 0 when handling uniform images. -Note that this implementation is limited: - -* It only whitens based on the statistics of an individual image. -* It does not take into account the covariance structure. - ##### Args: @@ -1233,7 +1228,7 @@ Note that this implementation is limited: ##### Returns: - The whitened image with same shape as `image`. + The standardized image with same shape as `image`. ##### Raises: @@ -1417,3 +1412,12 @@ false and no bounding boxes are supplied, an error is raised. Provide as input to `tf.image.draw_bounding_boxes`. + +## Other Functions and Classes +- - - + +### `tf.image.per_image_whitening(image)` {#per_image_whitening} + + + + diff --git a/tensorflow/g3doc/api_docs/python/index.md b/tensorflow/g3doc/api_docs/python/index.md index 3f6130ad449..3d4031699cf 100644 --- a/tensorflow/g3doc/api_docs/python/index.md +++ b/tensorflow/g3doc/api_docs/python/index.md @@ -361,6 +361,7 @@ * [`hsv_to_rgb`](../../api_docs/python/image.md#hsv_to_rgb) * [`non_max_suppression`](../../api_docs/python/image.md#non_max_suppression) * [`pad_to_bounding_box`](../../api_docs/python/image.md#pad_to_bounding_box) + * [`per_image_standardization`](../../api_docs/python/image.md#per_image_standardization) * [`per_image_whitening`](../../api_docs/python/image.md#per_image_whitening) * [`random_brightness`](../../api_docs/python/image.md#random_brightness) * [`random_contrast`](../../api_docs/python/image.md#random_contrast) @@ -586,6 +587,7 @@ * [`gradients`](../../api_docs/python/train.md#gradients) * [`histogram_summary`](../../api_docs/python/train.md#histogram_summary) * [`image_summary`](../../api_docs/python/train.md#image_summary) + * [`inverse_time_decay`](../../api_docs/python/train.md#inverse_time_decay) * [`LoggingTensorHook`](../../api_docs/python/train.md#LoggingTensorHook) * [`LooperThread`](../../api_docs/python/train.md#LooperThread) * [`merge_all_summaries`](../../api_docs/python/train.md#merge_all_summaries) @@ -595,8 +597,11 @@ * [`MonitoredTrainingSession`](../../api_docs/python/train.md#MonitoredTrainingSession) * [`NanLossDuringTrainingError`](../../api_docs/python/train.md#NanLossDuringTrainingError) * [`NanTensorHook`](../../api_docs/python/train.md#NanTensorHook) + * [`natural_exp_decay`](../../api_docs/python/train.md#natural_exp_decay) * [`NewCheckpointReader`](../../api_docs/python/train.md#NewCheckpointReader) * [`Optimizer`](../../api_docs/python/train.md#Optimizer) + * [`piecewise_constant`](../../api_docs/python/train.md#piecewise_constant) + * [`polynomial_decay`](../../api_docs/python/train.md#polynomial_decay) * [`ProximalAdagradOptimizer`](../../api_docs/python/train.md#ProximalAdagradOptimizer) * [`ProximalGradientDescentOptimizer`](../../api_docs/python/train.md#ProximalGradientDescentOptimizer) * [`QueueRunner`](../../api_docs/python/train.md#QueueRunner) @@ -775,6 +780,7 @@ * **[Random variable transformations (contrib)](../../api_docs/python/contrib.distributions.bijector.md)**: * [`Bijector`](../../api_docs/python/contrib.distributions.bijector.md#Bijector) * [`Chain`](../../api_docs/python/contrib.distributions.bijector.md#Chain) + * [`CholeskyOuterProduct`](../../api_docs/python/contrib.distributions.bijector.md#CholeskyOuterProduct) * [`Exp`](../../api_docs/python/contrib.distributions.bijector.md#Exp) * [`Identity`](../../api_docs/python/contrib.distributions.bijector.md#Identity) * [`Inline`](../../api_docs/python/contrib.distributions.bijector.md#Inline) diff --git a/tensorflow/g3doc/api_docs/python/state_ops.md b/tensorflow/g3doc/api_docs/python/state_ops.md index 71f3563a545..237f6541436 100644 --- a/tensorflow/g3doc/api_docs/python/state_ops.md +++ b/tensorflow/g3doc/api_docs/python/state_ops.md @@ -3158,7 +3158,7 @@ a subgraph. - - - -### `tf.train.import_meta_graph(meta_graph_or_file, import_scope=None, **kwargs)` {#import_meta_graph} +### `tf.train.import_meta_graph(meta_graph_or_file, clear_devices=False, import_scope=None, **kwargs)` {#import_meta_graph} Recreates a Graph saved in a `MetaGraphDef` proto. @@ -3215,6 +3215,8 @@ device assignments have not changed. * `meta_graph_or_file`: `MetaGraphDef` protocol buffer or filename (including the path) containing a `MetaGraphDef`. +* `clear_devices`: Whether or not to clear the device field for an `Operation` + or `Tensor` during import. * `import_scope`: Optional `string`. Name scope to add. Only used when initializing from protocol buffer. * `**kwargs`: Optional keyed arguments. diff --git a/tensorflow/g3doc/api_docs/python/train.md b/tensorflow/g3doc/api_docs/python/train.md index 6c4e08ab3cf..7b367cf77a4 100644 --- a/tensorflow/g3doc/api_docs/python/train.md +++ b/tensorflow/g3doc/api_docs/python/train.md @@ -995,6 +995,249 @@ learning_step = ( * `ValueError`: if `global_step` is not supplied. +- - - + +### `tf.train.inverse_time_decay(learning_rate, global_step, decay_steps, decay_rate, staircase=False, name=None)` {#inverse_time_decay} + +Applies inverse time decay to the initial learning rate. + +When training a model, it is often recommended to lower the learning rate as +the training progresses. This function applies an inverse decay function +to a provided initial learning rate. It requires an `global_step` value to +compute the decayed learning rate. You can just pass a TensorFlow variable +that you increment at each training step. + +The function returns the decayed learning rate. It is computed as: + +```python +decayed_learning_rate = learning_rate / (1 + decay_rate * t) +``` + +Example: decay 1/t with a rate of 0.5: + +```python +... +global_step = tf.Variable(0, trainable=False) +learning_rate = 0.1 +k = 0.5 +learning_rate = tf.train.inverse_time_decay(learning_rate, global_step, k) + +# Passing global_step to minimize() will increment it at each step. +learning_step = ( + tf.train.GradientDescentOptimizer(learning_rate) + .minimize(...my loss..., global_step=global_step) +) +``` + +##### Args: + + +* `learning_rate`: A scalar `float32` or `float64` `Tensor` or a + Python number. The initial learning rate. +* `global_step`: A Python number. + Global step to use for the decay computation. Must not be negative. +* `decay_steps`: How often to apply decay. +* `decay_rate`: A Python number. The decay rate. +* `staircase`: Whether to apply decay in a discrete staircase, as opposed to + continuous, fashion. +* `name`: String. Optional name of the operation. Defaults to + 'InverseTimeDecay'. + +##### Returns: + + A scalar `Tensor` of the same type as `learning_rate`. The decayed + learning rate. + +##### Raises: + + +* `ValueError`: if `global_step` is not supplied. + + +- - - + +### `tf.train.natural_exp_decay(learning_rate, global_step, decay_steps, decay_rate, staircase=False, name=None)` {#natural_exp_decay} + +Applies natural exponential decay to the initial learning rate. + +When training a model, it is often recommended to lower the learning rate as +the training progresses. This function applies an exponential decay function +to a provided initial learning rate. It requires an `global_step` value to +compute the decayed learning rate. You can just pass a TensorFlow variable +that you increment at each training step. + +The function returns the decayed learning rate. It is computed as: + +```python +decayed_learning_rate = learning_rate * exp(-decay_rate * global_step) +``` + +Example: decay exponentially with a base of 0.96: + +```python +... +global_step = tf.Variable(0, trainable=False) +learning_rate = 0.1 +k = 0.5 +learning_rate = tf.train.exponential_time_decay(learning_rate, global_step, k) + +# Passing global_step to minimize() will increment it at each step. +learning_step = ( + tf.train.GradientDescentOptimizer(learning_rate) + .minimize(...my loss..., global_step=global_step) +) +``` + +##### Args: + + +* `learning_rate`: A scalar `float32` or `float64` `Tensor` or a + Python number. The initial learning rate. +* `global_step`: A Python number. + Global step to use for the decay computation. Must not be negative. +* `decay_steps`: How often to apply decay. +* `decay_rate`: A Python number. The decay rate. +* `staircase`: Whether to apply decay in a discrete staircase, as opposed to + continuous, fashion. +* `name`: String. Optional name of the operation. Defaults to + 'ExponentialTimeDecay'. + +##### Returns: + + A scalar `Tensor` of the same type as `learning_rate`. The decayed + learning rate. + +##### Raises: + + +* `ValueError`: if `global_step` is not supplied. + + +- - - + +### `tf.train.piecewise_constant(x, boundaries, values, name=None)` {#piecewise_constant} + +Piecewise constant from boundaries and interval values. + +Example: use a learning rate that's 1.0 for the first 100000 steps, 0.5 + for steps 100001 to 110000, and 0.1 for any additional steps. + +```python +global_step = tf.Variable(0, trainable=False) +boundaries = [100000, 110000] +values = [1.0, 0.5, 0.1] +learning_rate = tf.train.piecewise_constant(global_step, boundaries, values) + +# Later, whenever we perform an optimization step, we increment global_step. +``` + +##### Args: + + +* `x`: A 0-D scalar `Tensor`. Must be one of the following types: `float32`, + `float64`, `uint8`, `int8`, `int16`, `int32`, `int64`. +* `boundaries`: A list of `Tensor`s or `int`s or `float`s with strictly + increasing entries, and with all elements having the same type as `x`. +* `values`: A list of `Tensor`s or float`s or `int`s that specifies the values + for the intervals defined by `boundaries`. It should have one more element + than `boundaries`, and all elements should have the same type. +* `name`: A string. Optional name of the operation. Defaults to + 'PiecewiseConstant'. + +##### Returns: + + A 0-D Tensor. Its value is `values[0]` when `x <= boundaries[0]`, + `values[1]` when `x > boundaries[0]` and `x <= boundaries[1]`, ..., + and values[-1] when `x > boundaries[-1]`. + +##### Raises: + + +* `ValueError`: if types of `x` and `buondaries` do not match, or types of all + `values` do not match. + + +- - - + +### `tf.train.polynomial_decay(learning_rate, global_step, decay_steps, end_learning_rate=0.0001, power=1.0, cycle=False, name=None)` {#polynomial_decay} + +Applies a polynomial decay to the learning rate. + +It is commonly observed that a monotonically decreasing learning rate, whose +degree of change is carefully chosen, results in a better performing model. +This function applies a polynomial decay function to a provided initial +`learning_rate` to reach an `end_learning_rate` in the given `decay_steps`. + +It requires a `global_step` value to compute the decayed learning rate. You +can just pass a TensorFlow variable that you increment at each training step. + +The function returns the decayed learning rate. It is computed as: + +```python +global_step = min(global_step, decay_steps) +decayed_learning_rate = (learning_rate - end_learning_rate) * + (1 - global_step / decay_steps) ^ (power) + + end_learning_rate + +``` + +If `cycle` is True then a multiple of `decay_steps` is used, the first one +that is bigger than `global_steps`. + +```python +decay_steps = decay_steps * ceil(global_step / decay_steps) +decayed_learning_rate = (learning_rate - end_learning_rate) * + (1 - global_step / decay_steps) ^ (power) + + end_learning_rate + +``` + +Example: decay from 0.1 to 0.01 in 10000 steps using sqrt (i.e. power=0.5): + +```python +... +global_step = tf.Variable(0, trainable=False) +starter_learning_rate = 0.1 +end_learning_rate = 0.01 +decay_steps = 10000 +learning_rate = tf.train.polynomial_decay(starter_learning_rate, global_step, + decay_steps, end_learning_rate, + power=0.5) +# Passing global_step to minimize() will increment it at each step. +learning_step = ( + tf.train.GradientDescentOptimizer(learning_rate) + .minimize(...my loss..., global_step=global_step) +) +``` + +##### Args: + + +* `learning_rate`: A scalar `float32` or `float64` `Tensor` or a + Python number. The initial learning rate. +* `global_step`: A scalar `int32` or `int64` `Tensor` or a Python number. + Global step to use for the decay computation. Must not be negative. +* `decay_steps`: A scalar `int32` or `int64` `Tensor` or a Python number. + Must be positive. See the decay computation above. +* `end_learning_rate`: A scalar `float32` or `float64` `Tensor` or a + Python number. The minimal end learning rate. +* `power`: A scalar `float32` or `float64` `Tensor` or a + Python number. The power of the polynomial. Defaults to sqrt, i.e. 0.5. +* `cycle`: A boolean, whether or not it should cycle beyond decay_steps. +* `name`: String. Optional name of the operation. Defaults to + 'PolynomialDecay'. + +##### Returns: + + A scalar `Tensor` of the same type as `learning_rate`. The decayed + learning rate. + +##### Raises: + + +* `ValueError`: if `global_step` is not supplied. + + ## Moving Averages @@ -4417,7 +4660,7 @@ Initialize CheckpointSaverHook monitor. Steps per second monitor. - - - -#### `tf.train.StepCounterHook.__init__(every_n_steps=100, output_dir=None, summary_writer=None)` {#StepCounterHook.__init__} +#### `tf.train.StepCounterHook.__init__(every_n_steps=100, every_n_secs=None, output_dir=None, summary_writer=None)` {#StepCounterHook.__init__} diff --git a/tensorflow/g3doc/how_tos/meta_graph/index.md b/tensorflow/g3doc/how_tos/meta_graph/index.md index a7bce5101cd..7ff89972756 100644 --- a/tensorflow/g3doc/how_tos/meta_graph/index.md +++ b/tensorflow/g3doc/how_tos/meta_graph/index.md @@ -32,24 +32,37 @@ to and from `MetaGraphDef`, the Python class must implement `to_proto()` and For example, ```Python - def to_proto(self): + def to_proto(self, export_scope=None): + """Converts a `Variable` to a `VariableDef` protocol buffer. + Args: + export_scope: Optional `string`. Name scope to remove. + Returns: - A `VariableDef` protocol buffer. + A `VariableDef` protocol buffer, or `None` if the `Variable` is not + in the specified name scope. """ - var_def = variable_pb2.VariableDef() - var_def.variable_name = self._variable.name - var_def.initializer_name = self.initializer.name - var_def.snapshot_name = self._snapshot.name - if self._save_slice_info: - var_def.save_slice_info_def.MergeFrom(self._save_slice_info.to_proto()) - return var_def + if (export_scope is None or + self._variable.name.startswith(export_scope)): + var_def = variable_pb2.VariableDef() + var_def.variable_name = ops.strip_name_scope( + self._variable.name, export_scope) + var_def.initializer_name = ops.strip_name_scope( + self.initializer.name, export_scope) + var_def.snapshot_name = ops.strip_name_scope( + self._snapshot.name, export_scope) + if self._save_slice_info: + var_def.save_slice_info_def.MergeFrom(self._save_slice_info.to_proto( + export_scope=export_scope)) + return var_def + else: + return None @staticmethod - def from_proto(variable_def): + def from_proto(variable_def, import_scope=None): """Returns a `Variable` object created from `variable_def`.""" - return Variable(variable_def=variable_def) + return Variable(variable_def=variable_def, import_scope=import_scope) ops.register_proto_function(ops.GraphKeys.VARIABLES, proto_type=variable_pb2.VariableDef, @@ -228,6 +241,40 @@ Here are some of the typical usage models: sess.run(train_op) ``` +* Import a graph with preset devices. + + Sometimes an exported meta graph is from a training environment that the + importer doesn't have. For example, the model might have been trained + on GPUs, or in a distributed environment with replicas. When importing + such models, it's useful to be able to clear the device settings in + the graph so that we can run it on locally available devices. This can + be achieved by calling `import_meta_graph` with the `clear_devices` + option set to `True`. + + ```Python + with tf.Session() as sess: + new_saver = tf.train.import_meta_graph('my-save-dir/my-model-10000.meta', + clear_devices=True) + new_saver.restore(sess, 'my-save-dir/my-model-10000') + ... + ``` + +* Import within the default graph. + + Sometimes you might want to run `export_meta_graph` and `import_meta_graph` + in codelab using the default graph. In that case, you need to reset + the default graph by calling `tf.reset_default_graph()` first before + running import. + + ```Python + meta_graph_def = tf.train.export_meta_graph() + ... + tf.reset_default_graph() + ... + tf.train.import_meta_graph(meta_graph_def) + ... + ``` + * Retrieve Hyper Parameters ```Python diff --git a/tensorflow/g3doc/tutorials/deep_cnn/index.md b/tensorflow/g3doc/tutorials/deep_cnn/index.md index 89ba53ac6fc..a5302df9147 100644 --- a/tensorflow/g3doc/tutorials/deep_cnn/index.md +++ b/tensorflow/g3doc/tutorials/deep_cnn/index.md @@ -122,7 +122,7 @@ The images are processed as follows: * They are cropped to 24 x 24 pixels, centrally for evaluation or [randomly](../../api_docs/python/constant_op.md#random_crop) for training. -* They are [approximately whitened](../../api_docs/python/image.md#per_image_whitening) +* They are [approximately whitened](../../api_docs/python/image.md#per_image_standardization) to make the model insensitive to dynamic range. For training, we additionally apply a series of random distortions to diff --git a/tensorflow/g3doc/tutorials/mnist/pros/index.md b/tensorflow/g3doc/tutorials/mnist/pros/index.md index 72792c6fbe0..6237d7e048e 100644 --- a/tensorflow/g3doc/tutorials/mnist/pros/index.md +++ b/tensorflow/g3doc/tutorials/mnist/pros/index.md @@ -292,7 +292,7 @@ def max_pool_2x2(x): ### First Convolutional Layer We can now implement our first layer. It will consist of convolution, followed -by max pooling. The convolutional will compute 32 features for each 5x5 patch. +by max pooling. The convolution will compute 32 features for each 5x5 patch. Its weight tensor will have a shape of `[5, 5, 1, 32]`. The first two dimensions are the patch size, the next is the number of input channels, and the last is the number of output channels. We will also have a bias vector with @@ -312,7 +312,8 @@ x_image = tf.reshape(x, [-1,28,28,1]) ``` We then convolve `x_image` with the weight tensor, add the -bias, apply the ReLU function, and finally max pool. +bias, apply the ReLU function, and finally max pool. The `max_pool_2x2` method will +reduce the image size to 14x14. ```python h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) diff --git a/tensorflow/go/example_inception_inference_test.go b/tensorflow/go/example_inception_inference_test.go index b58942aefb3..09c70044688 100644 --- a/tensorflow/go/example_inception_inference_test.go +++ b/tensorflow/go/example_inception_inference_test.go @@ -28,6 +28,7 @@ import ( "os" "path/filepath" + "github.com/tensorflow/tensorflow/tensorflow/go/op" tf "github.com/tensorflow/tensorflow/tensorflow/go" ) @@ -53,8 +54,14 @@ func Example() { // This example: // - Loads the serialized representation of the pre-trained model into a Graph // - Creates a Session to execute operations on the Graph - // - Converts an image file to a Tensor to provide as input for Graph execution - // - Exectues the graph and prints out the label with the highest probability + // - Converts an image file to a Tensor to provide as input to a Session run + // - Executes the Session and prints out the label with the highest probability + // + // To convert an image file to a Tensor suitable for input to the Inception model, + // this example: + // - Constructs another TensorFlow graph to normalize the image into a + // form suitable for the model (for example, resizing the image) + // - Creates an executes a Session to obtain a Tensor in this normalized form. modeldir := flag.String("dir", "", "Directory containing the trained model files. The directory will be created and the model downloaded into it if necessary") imagefile := flag.String("image", "", "Path of the image to extract labels for") flag.Parse() @@ -89,7 +96,7 @@ func Example() { // For multiple images, session.Run() can be called in a loop (and // concurrently). Furthermore, images can be batched together since the // model accepts batches of image data as input. - tensor, err := makeTensorFromImageForInception(*imagefile) + tensor, err := makeTensorFromImage(*imagefile) if err != nil { log.Fatal(err) } @@ -136,54 +143,102 @@ func printBestLabel(probabilities []float32, labelsFile string) { fmt.Printf("BEST MATCH: (%2.0f%% likely) %s\n", probabilities[bestIdx]*100.0, labels[bestIdx]) } -// Given an image stored in filename, returns a Tensor which is suitable for -// providing the image data to the pre-defined model. -func makeTensorFromImageForInception(filename string) (*tf.Tensor, error) { - const ( - // Some constants specific to the pre-trained model at: - // https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip - // - // - The model was trained after with images scaled to 224x224 pixels. - // - The colors, represented as R, G, B in 1-byte each were converted to - // float using (value - Mean)/Std. - // - // If using a different pre-trained model, the values will have to be adjusted. - H, W = 224, 224 - Mean = 117 - Std = float32(1) - ) +// Conver the image in filename to a Tensor suitable as input to the Inception model. +func makeTensorFromImage(filename string) (*tf.Tensor, error) { + // Load the pixels from the file file, err := os.Open(filename) if err != nil { return nil, err } - defer file.Close() img, _, err := image.Decode(file) + file.Close() if err != nil { return nil, err } - sz := img.Bounds().Size() - if sz.X != W || sz.Y != H { - return nil, fmt.Errorf("input image is required to be %dx%d pixels, was %dx%d", W, H, sz.X, sz.Y) - } - // 4-dimensional input: - // - 1st dimension: Batch size (the model takes a batch of images as - // input, here the "batch size" is 1) - // - 2nd dimension: Rows of the image - // - 3rd dimension: Columns of the row - // - 4th dimension: Colors of the pixel as (B, G, R) - // Thus, the shape is [1, 224, 224, 3] - var ret [1][H][W][3]float32 - for y := 0; y < H; y++ { - for x := 0; x < W; x++ { + // Represent the image as [H][W][B,G,R]byte + contents := make([][][3]byte, img.Bounds().Size().Y) + for y := 0; y < len(contents); y++ { + contents[y] = make([][3]byte, img.Bounds().Size().X) + for x := 0; x < len(contents[y]); x++ { px := x + img.Bounds().Min.X py := y + img.Bounds().Min.Y r, g, b, _ := img.At(px, py).RGBA() - ret[0][y][x][0] = float32((int(b>>8) - Mean)) / Std - ret[0][y][x][1] = float32((int(g>>8) - Mean)) / Std - ret[0][y][x][2] = float32((int(r>>8) - Mean)) / Std + // image.Image uses 16-bits for each color. + // We want 8-bits. + contents[y][x][0] = byte(b >> 8) + contents[y][x][1] = byte(g >> 8) + contents[y][x][2] = byte(r >> 8) } } - return tf.NewTensor(ret) + tensor, err := tf.NewTensor(contents) + if err != nil { + return nil, err + } + // Construct a graph to normalize the image + graph, input, output, err := constructGraphToNormalizeImage() + if err != nil { + return nil, err + } + // Execute that graph to normalize this one image + session, err := tf.NewSession(graph, nil) + if err != nil { + return nil, err + } + defer session.Close() + normalized, err := session.Run( + map[tf.Output]*tf.Tensor{input: tensor}, + []tf.Output{output}, + nil) + if err != nil { + return nil, err + } + return normalized[0], nil +} + +// The inception model takes as input the image described by a Tensor in a very +// specific normalized format (a particular image size, shape of the input tensor, +// normalized pixel values etc.). +// +// This function constructs a graph of TensorFlow operations which takes as input +// the raw pixel values of an image in the form of a Tensor of shape [Height, Width, 3] +// and returns a tensor suitable for input to the inception model. +// +// T[y][x] is the (Blue, Green, Red) values of the pixel at position (x, y) in the image, +// with each color value represented as a single byte. +func constructGraphToNormalizeImage() (graph *tf.Graph, input, output tf.Output, err error) { + // Some constants specific to the pre-trained model at: + // https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip + // + // - The model was trained after with images scaled to 224x224 pixels. + // - The colors, represented as R, G, B in 1-byte each were converted to + // float using (value - Mean)/Scale. + // + // If using a different pre-trained model, the values will have to be adjusted. + const ( + H, W = 224, 224 + Mean = float32(117) + Scale = float32(1) + ) + // - input is a 3D tensor of shape [Height, Width, Colors=3], where + // each pixel is represented as a triplet of 1-byte colors + // - ResizeBilinear (and the inception model) takes a 4D tensor of shape + // [BatchSize, Height, Width, Colors=3], where each pixel is + // represented as a triplet of floats + // - Apply normalization on each pixel and use ExpandDims to make + // this single image be a "batch" of size 1 for ResizeBilinear. + s := op.NewScope() + input = op.Placeholder(s, tf.Uint8) + output = op.Div(s, + op.Sub(s, + op.ResizeBilinear(s, + op.ExpandDims(s, + op.Cast(s, input, tf.Float), + op.Const(s.SubScope("make_batch"), int32(0))), + op.Const(s.SubScope("size"), []int32{H, W})), + op.Const(s.SubScope("mean"), Mean)), + op.Const(s.SubScope("scale"), Scale)) + graph, err = s.Finalize() + return graph, input, output, err } func modelFiles(dir string) (modelfile, labelsfile string, err error) { diff --git a/tensorflow/go/genop/internal/genop.go b/tensorflow/go/genop/internal/genop.go index fdc55f5ebce..5d5aa269929 100644 --- a/tensorflow/go/genop/internal/genop.go +++ b/tensorflow/go/genop/internal/genop.go @@ -244,10 +244,14 @@ func {{.Op.Name}} {{if .OptionalAttrs}}, optional ...{{.Op.Name}}Attr{{end -}} ) -{{- /* Construct outputs: len(OpDef.OutputArg) + 1 (for error) */ -}} +{{- /* Construct outputs: len(OpDef.OutputArg) */ -}} -({{range $i,$a := .Op.OutputArg}}{{if $i}}, {{end}}{{Identifier $a.Name}} {{if IsListArg $a}}[]{{end}}tf.Output{{end -}} -{{if .Op.OutputArg}}, {{end}}err error) { +{{if .Op.OutputArg -}} +({{range $i,$a := .Op.OutputArg}}{{if $i}}, {{end}}{{Identifier $a.Name}} {{if IsListArg $a}}[]{{end}}tf.Output{{end -}}) +{{- end }} { + if scope.Err() != nil { + return + } {{if .HasAttrs -}} attrs := map[string]interface{}{ {{- range .RequiredAttrs}}{{printf "%q" .Name}}: {{Identifier .Name}},{{end}}} {{if .OptionalAttrs -}} @@ -262,25 +266,37 @@ func {{.Op.Name}} Input: []tf.Input{ {{range .Op.InputArg}}{{if IsListArg .}}tf.OutputList({{Identifier .Name}}){{else}}{{Identifier .Name}}{{end}}, {{end}} }, - {{end}} - {{- if .HasAttrs}}Attrs: attrs,{{end}} + {{- end}} + {{- if .HasAttrs}} + Attrs: attrs, + {{- end}} } - {{if .Op.OutputArg}}op, err :={{else}}_, err ={{end}} scope.Graph().AddOperation(opspec) + {{- if .Op.OutputArg}} {{- if .HasListOutput}} + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } var idx int + var err error {{- range $i, $a := .Op.OutputArg}} {{- if IsListArg $a}} if {{Identifier .Name}}, idx, err = makeOutputList(op, idx, {{printf "%q" .Name}}); err != nil { - return {{range $.Op.OutputArg}}{{Identifier .Name}}, {{end}}err + scope.UpdateErr({{printf "%q" $.Op.Name}}, err) + return } {{- else }} {{Identifier .Name}} = op.Output(idx) - {{- end }} - {{- end }} - return {{range .Op.OutputArg}}{{Identifier .Name}}, {{end}}err + {{- end }}{{- /* if IsListArg */}} + {{- end }}{{- /* range .Op.OutputArg */}} + return {{range $i, $a := .Op.OutputArg}}{{if $i}}, {{end}}{{Identifier .Name}}{{end}} {{- else }} - return {{range $i, $a := .Op.OutputArg}}op.Output({{$i}}), {{end}}err - {{- end }} + op := scope.AddOperation(opspec) + return {{range $i, $a := .Op.OutputArg}}{{if $i}}, {{end}}op.Output({{$i}}){{end}} + {{- end }}{{- /* if .HasListOutput */}} + {{- else }} + scope.AddOperation(opspec) + {{- end }}{{- /* if .Op.OutputArg */}} } `)) ) diff --git a/tensorflow/go/genop/internal/genop_test.go b/tensorflow/go/genop/internal/genop_test.go index dade7ce48f7..b3bcd9db052 100644 --- a/tensorflow/go/genop/internal/genop_test.go +++ b/tensorflow/go/genop/internal/genop_test.go @@ -39,12 +39,14 @@ summary: "No. Op." `, wanted: ` // No. Op. -func NoOp(scope *Scope) (err error) { +func NoOp(scope *Scope) { + if scope.Err() != nil { + return + } opspec := tf.OpSpec{ Type: "NoOp", } - _, err = scope.Graph().AddOperation(opspec) - return err + scope.AddOperation(opspec) } `, }, @@ -81,15 +83,18 @@ description: "Blah blah", // Returns x + y element-wise. // // Blah blah -func Add(scope *Scope, x tf.Output, y tf.Output) (z tf.Output, err error) { +func Add(scope *Scope, x tf.Output, y tf.Output) (z tf.Output) { + if scope.Err() != nil { + return + } opspec := tf.OpSpec{ Type: "Add", Input: []tf.Input{ x, y, }, } - op, err := scope.Graph().AddOperation(opspec) - return op.Output(0), err + op := scope.AddOperation(opspec) + return op.Output(0) } `, }, @@ -117,7 +122,10 @@ summary: "Cast x of type SrcT to y of DstT." `, wanted: ` // Cast x of type SrcT to y of DstT. -func Cast(scope *Scope, x tf.Output, DstT tf.DataType) (y tf.Output, err error) { +func Cast(scope *Scope, x tf.Output, DstT tf.DataType) (y tf.Output) { + if scope.Err() != nil { + return + } attrs := map[string]interface{}{"DstT": DstT} opspec := tf.OpSpec{ Type: "Cast", @@ -126,8 +134,8 @@ func Cast(scope *Scope, x tf.Output, DstT tf.DataType) (y tf.Output, err error) }, Attrs: attrs, } - op, err := scope.Graph().AddOperation(opspec) - return op.Output(0), err + op := scope.AddOperation(opspec) + return op.Output(0) } `, }, @@ -218,7 +226,10 @@ func DecodeJpegAcceptableFraction(value float32) DecodeJpegAttr { // contents: 0-D. The JPEG-encoded image. // // Returns 3-D with shape [height, width, channels] -func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (image tf.Output, err error) { +func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (image tf.Output) { + if scope.Err() != nil { + return + } attrs := map[string]interface{}{} for _, a := range optional { a(attrs) @@ -230,8 +241,47 @@ func DecodeJpeg(scope *Scope, contents tf.Output, optional ...DecodeJpegAttr) (i }, Attrs: attrs, } - op, err := scope.Graph().AddOperation(opspec) - return op.Output(0), err + op := scope.AddOperation(opspec) + return op.Output(0) +} +`, + }, + { + tag: "MultipleOutputs", + opdef: ` +name: "TwoOutputs" +input_arg: < + name: "input" + type_attr: "T" +> +output_arg < + name: "x" + type_attr: "T" +> +output_arg < + name: "y" + type_attr: "T" +> +attr: < + name: "T" + type: "type" +> +summary: "Op that produces multiple outputs" +`, + wanted: ` +// Op that produces multiple outputs +func TwoOutputs(scope *Scope, input tf.Output) (x tf.Output, y tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "TwoOutputs", + Input: []tf.Input{ + input, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0), op.Output(1) } `, }, @@ -290,7 +340,10 @@ func ShapeNOutType(value tf.DataType) ShapeNAttr { // Returns shape of tensors. // // Some description here. -func ShapeN(scope *Scope, input []tf.Output, optional ...ShapeNAttr) (output []tf.Output, err error) { +func ShapeN(scope *Scope, input []tf.Output, optional ...ShapeNAttr) (output []tf.Output) { + if scope.Err() != nil { + return + } attrs := map[string]interface{}{} for _, a := range optional { a(attrs) @@ -302,12 +355,17 @@ func ShapeN(scope *Scope, input []tf.Output, optional ...ShapeNAttr) (output []t }, Attrs: attrs, } - op, err := scope.Graph().AddOperation(opspec) - var idx int - if output, idx, err = makeOutputList(op, idx, "output"); err != nil { - return output, err + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return } - return output, err + var idx int + var err error + if output, idx, err = makeOutputList(op, idx, "output"); err != nil { + scope.UpdateErr("ShapeN", err) + return + } + return output } `, }, @@ -325,11 +383,11 @@ func ShapeN(scope *Scope, input []tf.Output, optional ...ShapeNAttr) (output []t } got, err := format.Source(buf.Bytes()) if err != nil { - t.Fatal(err) + t.Fatalf("Unable to format: %v\n%s", err, buf.Bytes()) } want, err := format.Source([]byte(test.wanted)) if err != nil { - t.Fatal(err) + t.Fatalf("Unable to format: %v\n%s", err, test.wanted) } if !bytes.Equal(got, want) { t.Fatalf("Got:\n%s\nWant:\n%s\n", got, want) diff --git a/tensorflow/go/op/op.go b/tensorflow/go/op/op.go index dd79c2076ac..3d820a60e69 100644 --- a/tensorflow/go/op/op.go +++ b/tensorflow/go/op/op.go @@ -28,24 +28,23 @@ import ( ) // Const adds an operation to graph that produces value as output. -func Const(scope *Scope, value interface{}) (tf.Output, error) { - if t, ok := value.(*tf.Tensor); ok { - return makeConst(scope, t) +func Const(scope *Scope, value interface{}) (output tf.Output) { + if scope.Err() != nil { + return } - t, err := tf.NewTensor(value) - if err != nil { - return tf.Output{}, err + t, ok := value.(*tf.Tensor) + if !ok { + var err error + if t, err = tf.NewTensor(value); err != nil { + scope.UpdateErr("Const", err) + return + } } - return makeConst(scope, t) -} - -func makeConst(scope *Scope, t *tf.Tensor) (tf.Output, error) { - op, err := scope.Graph().AddOperation(tf.OpSpec{ + return scope.AddOperation(tf.OpSpec{ Name: scope.opName("Const"), Type: "Const", Attrs: map[string]interface{}{ "dtype": t.DataType(), "value": t, - }}) - return op.Output(0), err + }}).Output(0) } diff --git a/tensorflow/go/op/scope.go b/tensorflow/go/op/scope.go index 25ebbae70f6..346c756f563 100644 --- a/tensorflow/go/op/scope.go +++ b/tensorflow/go/op/scope.go @@ -16,33 +16,60 @@ package op import ( "fmt" + "runtime/debug" tf "github.com/tensorflow/tensorflow/tensorflow/go" ) -// Scope encapsulates common properties of operations being added to a Graph. +// Scope encapsulates common operation properties when building a Graph. // -// Scopes allow common properties (such as a name prefix) to be specified -// once for multiple operations being added to a graph. The With* methods -// create derivative scopes that encapsulate the same set of properties -// as the parent Scope, except for the one being changed by the specific -// With* method. +// A Scope object (and its derivates, e.g., obtained from Scope.SubScope) +// act as a builder for graphs. They allow common properties (such as +// a name prefix) to be specified for multiple operations being added +// to the graph. // -// Scopes are NOT safe for concurrent use by multiple goroutines. +// A Scope object and all its derivates (e.g., obtained from Scope.SubScope) +// are not safe for concurrent use by multiple goroutines. type Scope struct { graph *tf.Graph namemap map[string]int namespace string + err *scopeErr +} + +// scopeErr is used to share errors between all derivatives of a root scope. +type scopeErr struct { + err error } // NewScope creates a Scope initialized with an empty Graph. func NewScope() *Scope { - return &Scope{graph: tf.NewGraph(), namemap: make(map[string]int)} + return &Scope{graph: tf.NewGraph(), namemap: make(map[string]int), err: new(scopeErr)} } -// Graph returns the Graph which this Scope and its children are -func (s *Scope) Graph() *tf.Graph { - return s.graph +// Finalize returns the Graph on which this scope operates on and renders s +// unusable. If there was an error during graph construction, that error is +// returned instead. +func (s *Scope) Finalize() (*tf.Graph, error) { + if err := s.Err(); err != nil { + return nil, err + } + s.err.err = fmt.Errorf("Scope has been finalized and is no longer usable") + return s.graph, nil +} + +// AddOperation adds the operation to the Graph managed by s. +// +// See Graph.AddOperation. +func (s *Scope) AddOperation(args tf.OpSpec) *tf.Operation { + if s.Err() != nil { + return nil + } + op, err := s.graph.AddOperation(args) + if err != nil { + s.UpdateErr(args.Type, err) + } + return op } // SubScope returns a new Scope which will cause all operations added to the @@ -57,6 +84,25 @@ func (s *Scope) SubScope(namespace string) *Scope { graph: s.graph, namemap: make(map[string]int), namespace: namespace, + err: s.err, + } +} + +// Err returns the error, if any, encountered during the construction +// of the Graph managed by s. +// +// Once Err returns a non-nil error, all future calls will do the same, +// indicating that the scope should be discarded as the graph could not +// be constructed. +func (s *Scope) Err() error { + return s.err.err +} + +// UpdateErr is used to notify Scope of any graph construction errors +// while creating the operation op. +func (s *Scope) UpdateErr(op string, err error) { + if s.err.err == nil { + s.err.err = fmt.Errorf("failed to add operation %q: %v (Stacktrace: %s)", op, err, debug.Stack()) } } diff --git a/tensorflow/go/op/scope_test.go b/tensorflow/go/op/scope_test.go index ba0a183bb9c..4fcb1a56d56 100644 --- a/tensorflow/go/op/scope_test.go +++ b/tensorflow/go/op/scope_test.go @@ -22,13 +22,6 @@ import ( ) func TestScopeSubScope(t *testing.T) { - constant := func(s *Scope) string { - c, err := Const(s, int64(1)) - if err != nil { - t.Fatal(err) - } - return c.Op.Name() - } var ( root = NewScope() sub1 = root.SubScope("x") @@ -37,54 +30,89 @@ func TestScopeSubScope(t *testing.T) { sub2a = sub2.SubScope("y") ) testdata := []struct { - got, want string + scope *Scope + name string }{ - {constant(root), "Const"}, - {constant(sub1), "x/Const"}, - {constant(sub1a), "x/y/Const"}, - {constant(sub2), "x_1/Const"}, - {constant(sub2a), "x_1/y/Const"}, + {root, "Const"}, + {sub1, "x/Const"}, + {sub1a, "x/y/Const"}, + {sub2, "x_1/Const"}, + {sub2a, "x_1/y/Const"}, } - for idx, test := range testdata { - if test.got != test.want { - t.Errorf("#%d: Got %q, want %q", idx, test.got, test.want) + for _, test := range testdata { + c := Const(test.scope, int64(1)) + if err := test.scope.Err(); err != nil { + t.Fatalf("%q: %v", test.name, err) + } + if got := c.Op.Name(); got != test.name { + t.Errorf("%q: Got %q", test.name, got) } } +} +func TestScopeSubScopeErrors(t *testing.T) { + var ( + root = NewScope() + sub = root.SubScope("x") + ) + // Error on the root, even after sub has been created should be propagated. + // Force an error by creating a Const which has a type that does not + // translate to the TensorFlow type system. + Const(root, int(1)) + if err := root.Err(); err == nil { + t.Fatal("Expected error") + } + if err := sub.Err(); err == nil { + t.Errorf("Root scope had error [%v], but sub-scope did not", root.Err()) + } +} + +func TestScopeFinalize(t *testing.T) { + var ( + root = NewScope() + sub1 = root.SubScope("x") + sub2 = sub1.SubScope("y") + ) + if _, err := sub1.Finalize(); err != nil { + t.Fatal(err) + } + if err := root.Err(); err == nil { + t.Error("Root scope's Err() should be non-nil once Finalize has been called") + } + if err := sub2.Err(); err == nil { + t.Error("Sub scope's Err() should be non-nil once Finalize has been called") + } } func Example() { // This example creates a Graph that multiplies a constant matrix with // a matrix to be provided during graph execution (via // tensorflow.Session). - scope := NewScope() - var m1, m2, product tf.Output - var err error - // A constant 2x1 matrix - if m1, err = Const(scope, [][]float32{{10}, {20}}); err != nil { - panic(err) - } - // A placeholder for another matrix - if m2, err = Placeholder(scope, tf.Float); err != nil { - panic(err) - } - // product = m1 x transpose(m2) - if product, err = MatMul(scope, m1, m2, MatMulTransposeB(true)); err != nil {// m1 x transpose(m2) - panic(err) + s := NewScope() + input := Placeholder(s, tf.Float) // Matrix to be provided to Session.Run + output := MatMul(s, + Const(s, [][]float32{{10}, {20}}), // Constant 2x1 matrix + input, + MatMulTransposeB(true)) + if s.Err() != nil { + panic(s.Err()) } // Shape of the product: The number of rows is fixed by m1, but the // number of columns will depend on m2, which is unknown. - shape, _ := product.Shape() + shape, _ := output.Shape() fmt.Println(shape) // Output: [2 -1] } func ExampleScope_SubScope() { var ( - s = NewScope() - c1, _ = Const(s.SubScope("x"), int64(1)) - c2, _ = Const(s.SubScope("x"), int64(1)) + s = NewScope() + c1 = Const(s.SubScope("x"), int64(1)) + c2 = Const(s.SubScope("x"), int64(1)) ) + if s.Err() != nil { + panic(s.Err()) + } fmt.Println(c1.Op.Name(), c2.Op.Name()) // Output: x/Const x_1/Const } diff --git a/tensorflow/models/image/alexnet/alexnet_benchmark.py b/tensorflow/models/image/alexnet/alexnet_benchmark.py index 18ac4e13292..af13a075b55 100644 --- a/tensorflow/models/image/alexnet/alexnet_benchmark.py +++ b/tensorflow/models/image/alexnet/alexnet_benchmark.py @@ -36,6 +36,7 @@ from __future__ import print_function import argparse from datetime import datetime import math +import sys import time from six.moves import xrange # pylint: disable=redefined-builtin @@ -241,6 +242,5 @@ if __name__ == '__main__': default=100, help='Number of batches to run.' ) - FLAGS = parser.parse_args() - - tf.app.run() + FLAGS, unparsed = parser.parse_known_args() + tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/models/image/imagenet/classify_image.py b/tensorflow/models/image/imagenet/classify_image.py index 3759e88b791..9014ced0267 100644 --- a/tensorflow/models/image/imagenet/classify_image.py +++ b/tensorflow/models/image/imagenet/classify_image.py @@ -223,6 +223,5 @@ if __name__ == '__main__': default=5, help='Display this many predictions.' ) - FLAGS = parser.parse_args() - - tf.app.run() + FLAGS, unparsed = parser.parse_known_args() + tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/models/image/mnist/convolutional.py b/tensorflow/models/image/mnist/convolutional.py index 7630c59c99b..6108139d1dd 100644 --- a/tensorflow/models/image/mnist/convolutional.py +++ b/tensorflow/models/image/mnist/convolutional.py @@ -118,7 +118,7 @@ def error_rate(predictions, labels): predictions.shape[0]) -def main(argv=None): # pylint: disable=unused-argument +def main(_): if FLAGS.self_test: print('Running self-test.') train_data, train_labels = fake_data(256) @@ -326,14 +326,12 @@ if __name__ == '__main__': '--use_fp16', default=False, help='Use half floats instead of full floats if True.', - action='store_true' - ) + action='store_true') parser.add_argument( '--self_test', default=False, action='store_true', - help='True if running a self test.' - ) - FLAGS = parser.parse_args() + help='True if running a self test.') - tf.app.run() + FLAGS, unparsed = parser.parse_known_args() + tf.app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index e3313b43352..6bb86a552a8 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -1861,6 +1861,7 @@ tf_py_wrap_cc( "//tensorflow/c:tf_status_helper", "//tensorflow/core:lib", "//tensorflow/core/distributed_runtime:server_lib", + "//tensorflow/tools/tfprof/internal:print_model_analysis", "//util/python:python_headers", ] + tf_additional_lib_deps(), ) @@ -1930,7 +1931,6 @@ py_library( # Just used by tests. tf_cuda_library( name = "construction_fails_op", - testonly = 1, srcs = ["client/test_construction_fails_op.cc"], deps = [ "//tensorflow/core", @@ -2130,7 +2130,6 @@ py_tests( "summary/event_multiplexer_test.py", "summary/impl/directory_watcher_test.py", "summary/impl/event_file_loader_test.py", - "summary/impl/gcs_file_loader_test.py", "summary/impl/reservoir_test.py", "summary/summary_test.py", "summary/writer/writer_test.py", diff --git a/tensorflow/python/debug/session_debug_test.py b/tensorflow/python/debug/session_debug_test.py index 56f1fcdc6a3..48d7e944844 100644 --- a/tensorflow/python/debug/session_debug_test.py +++ b/tensorflow/python/debug/session_debug_test.py @@ -131,6 +131,72 @@ class SessionDebugTest(test_util.TensorFlowTestCase): self.assertGreaterEqual( dump.get_rel_timestamps("%s/read" % v_name, 0, "DebugIdentity")[0], 0) + def testDifferentWatchesOnDifferentRuns(self): + """Test watching different tensors on different runs of the same graph.""" + + with session.Session() as sess: + u_init_val = np.array([[5.0, 3.0], [-1.0, 0.0]]) + v_init_val = np.array([[2.0], [-1.0]]) + + # Use node names with overlapping namespace (i.e., parent directory) to + # test concurrent, non-racing directory creation. + u_name = "diff_Watch/u" + v_name = "diff_Watch/v" + + u_init = constant_op.constant(u_init_val, shape=[2, 2]) + u = variables.Variable(u_init, name=u_name) + v_init = constant_op.constant(v_init_val, shape=[2, 1]) + v = variables.Variable(v_init, name=v_name) + + w = math_ops.matmul(u, v, name="diff_Watch/matmul") + + u.initializer.run() + v.initializer.run() + + for i in xrange(2): + run_options = config_pb2.RunOptions(output_partition_graphs=True) + + run_dump_root = os.path.join(self._dump_root, "run_%d" % i) + debug_url = "file://%s" % run_dump_root + + if i == 0: + # First debug run: Add debug tensor watch for u. + self._addDebugTensorWatch( + run_options, "%s/read" % u_name, 0, debug_urls=[debug_url]) + else: + # Second debug run: Add debug tensor watch for v. + self._addDebugTensorWatch( + run_options, "%s/read" % v_name, 0, debug_urls=[debug_url]) + + run_metadata = config_pb2.RunMetadata() + + # Invoke Session.run(). + sess.run(w, options=run_options, run_metadata=run_metadata) + + self.assertEqual(self._expected_partition_graph_count, + len(run_metadata.partition_graphs)) + + dump = debug_data.DebugDumpDir( + run_dump_root, partition_graphs=run_metadata.partition_graphs) + + # Each run should have generated only one dumped tensor, not two. + self.assertEqual(1, dump.size) + + if i == 0: + self.assertAllClose([u_init_val], + dump.get_tensors("%s/read" % u_name, 0, + "DebugIdentity")) + self.assertGreaterEqual( + dump.get_rel_timestamps("%s/read" % u_name, 0, + "DebugIdentity")[0], 0) + else: + self.assertAllClose([v_init_val], + dump.get_tensors("%s/read" % v_name, 0, + "DebugIdentity")) + self.assertGreaterEqual( + dump.get_rel_timestamps("%s/read" % v_name, 0, + "DebugIdentity")[0], 0) + def testDumpStringTensorsToFileSystem(self): with session.Session() as sess: str1_init_val = np.array(b"abc") diff --git a/tensorflow/python/framework/cpp_shape_inference.cc b/tensorflow/python/framework/cpp_shape_inference.cc index 0d8703fe8fe..bb5a57e617c 100644 --- a/tensorflow/python/framework/cpp_shape_inference.cc +++ b/tensorflow/python/framework/cpp_shape_inference.cc @@ -73,8 +73,10 @@ Status RunCppShapeInferenceImpl( } // Run shape inference. - tensorflow::shape_inference::InferenceContext c(&node, op_reg_data->op_def, - input_shapes, input_tensors); + // TODO(cwhipkey): pass a value for input_tensors_as_shapes. + tensorflow::shape_inference::InferenceContext c( + &node, op_reg_data->op_def, input_shapes, input_tensors, + {} /* input_tensors_as_shapes */); TF_RETURN_IF_ERROR(c.construction_status()); TF_RETURN_IF_ERROR(c.Run(op_reg_data->shape_inference_fn)); diff --git a/tensorflow/python/framework/importer.py b/tensorflow/python/framework/importer.py index 13021d885ba..e7f63d3e80a 100644 --- a/tensorflow/python/framework/importer.py +++ b/tensorflow/python/framework/importer.py @@ -400,18 +400,26 @@ def import_graph_def(graph_def, input_map=None, return_elements=None, # would cause graphs to fail if imported after correcting. # # This can be removed after 2017/03/08. - if op.type not in ['RandomShuffleQueue', 'PaddingFIFOQueue', - 'FIFOQueue', 'PriorityQueue', 'QueueSize', - 'Stack', 'Barrier', 'BarrierReadySize', - 'BarrierIncompleteSize', 'HashTable', - 'MutableHashTable', - 'MutableHashTableOfTensors', 'Mutex', - 'CuckooTable', 'IndexTable', - 'WholeFileReader', 'TextLineReader', - 'FixedLengthRecordReader', - 'TFRecordReader', 'IdentityReader', - 'RefSwitch', 'RefEnter', 'RefNextIteration', - 'RefMerge', 'RefIdentity']: + if op.type in ['RandomShuffleQueue', 'PaddingFIFOQueue', + 'FIFOQueue', 'PriorityQueue', 'QueueSize', + 'Stack', 'Barrier', 'BarrierReadySize', + 'BarrierIncompleteSize', 'HashTable', + 'MutableHashTable', + 'MutableHashTableOfTensors', 'Mutex', + 'CuckooTable', 'IndexTable', + 'WholeFileReader', 'TextLineReader', + 'FixedLengthRecordReader', + 'TFRecordReader', 'IdentityReader', + 'RefSwitch', 'RefEnter', 'RefNextIteration', + 'RefMerge', 'RefIdentity']: + pass + elif op.type in [ + 'ConditionalAccumulator', 'SparseConditionalAccumulator', + 'Table' + ]: + # This can be removed after 2017/04/24. + pass + else: raise e del op.node_def.attr['_output_shapes'] diff --git a/tensorflow/python/framework/meta_graph_test.py b/tensorflow/python/framework/meta_graph_test.py index f6c1db6f2af..e654331271d 100644 --- a/tensorflow/python/framework/meta_graph_test.py +++ b/tensorflow/python/framework/meta_graph_test.py @@ -384,6 +384,32 @@ class ScopedMetaGraphTest(tf.test.TestCase): orig_meta_graph, import_scope="new_hidden1", input_map={"$unbound_inputs_MatMul": tf.constant(4.0, shape=[2, 2])}) + def testClearDevices(self): + graph1 = tf.Graph() + with graph1.as_default(): + with tf.device("/device:CPU:0"): + a = tf.Variable(tf.constant(1.0, shape=[2, 2]), name="a") + with tf.device("/job:ps/replica:0/task:0/gpu:0"): + b = tf.Variable(tf.constant(2.0, shape=[2, 2]), name="b") + with tf.device("/job:localhost/replica:0/task:0/cpu:0"): + tf.matmul(a, b, name="matmul") + + self.assertEqual("/device:CPU:0", str(graph1.as_graph_element("a").device)) + self.assertEqual("/job:ps/replica:0/task:0/device:GPU:0", + str(graph1.as_graph_element("b").device)) + self.assertEqual("/job:localhost/replica:0/task:0/device:CPU:0", + str(graph1.as_graph_element("matmul").device)) + + orig_meta_graph, _ = meta_graph.export_scoped_meta_graph(graph=graph1) + + graph2 = tf.Graph() + with graph2.as_default(): + meta_graph.import_scoped_meta_graph(orig_meta_graph, clear_devices=True) + + self.assertEqual("", str(graph2.as_graph_element("a").device)) + self.assertEqual("", str(graph2.as_graph_element("b").device)) + self.assertEqual("", str(graph2.as_graph_element("matmul").device)) + if __name__ == "__main__": tf.test.main() diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 0b028c28390..635d592912f 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -4053,6 +4053,7 @@ class GraphKeys(object): READY_FOR_LOCAL_INIT_OP = "ready_for_local_init_op" SUMMARY_OP = "summary_op" GLOBAL_STEP = "global_step" + TRAIN_OP = "train_op" # Key for control flow context. COND_CONTEXT = "cond_context" diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py index 8c0021c3cb0..7e56bb5843b 100644 --- a/tensorflow/python/kernel_tests/array_ops_test.py +++ b/tensorflow/python/kernel_tests/array_ops_test.py @@ -775,22 +775,23 @@ class StridedSliceBenchmark(tf.test.Benchmark): class StridedSliceAssignChecker(object): - def __init__(self, test, x, tensor_type=tf.int32): + def __init__(self, test, x, tensor_type=tf.float32): self.tensor_type = tensor_type self.test = test self.x = tf.cast(tf.constant(x, dtype=tf.float32), dtype=tensor_type) self.x_np = np.array(x) def __setitem__(self, index, value): - with self.test.test_session() as sess: - var = tf.Variable(self.x) - sess.run(tf.initialize_variables([var])) - val = sess.run(var[index].assign( - tf.constant( - value, dtype=self.tensor_type))) - valnp = np.copy(self.x_np) - valnp[index] = np.array(value) - self.test.assertAllEqual(val, valnp) + for use_gpu in [False, True]: + with self.test.test_session(use_gpu=use_gpu) as sess: + var = tf.Variable(self.x) + sess.run(tf.initialize_variables([var])) + val = sess.run(var[index].assign( + tf.constant( + value, dtype=self.tensor_type))) + valnp = np.copy(self.x_np) + valnp[index] = np.array(value) + self.test.assertAllEqual(val, valnp) class SliceAssignTest(test_util.TensorFlowTestCase): diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py index 14fe95dea66..0ba17208e77 100644 --- a/tensorflow/python/kernel_tests/constant_op_test.py +++ b/tensorflow/python/kernel_tests/constant_op_test.py @@ -322,7 +322,7 @@ class ZerosTest(tf.test.TestCase): class ZerosLikeTest(tf.test.TestCase): def _compareZeros(self, dtype, use_gpu): - with self.test_session(use_gpu=False): + with self.test_session(use_gpu=use_gpu): # Creates a tensor of non-zero values with shape 2 x 3. numpy_dtype = dtype.as_numpy_dtype d = tf.constant(np.ones((2, 3), dtype=numpy_dtype), dtype=dtype) @@ -342,7 +342,7 @@ class ZerosLikeTest(tf.test.TestCase): self._compareZeros(dtype, False) def testZerosLikeGPU(self): - for dtype in [tf.float32, tf.float64, tf.int32]: + for dtype in [tf.float32, tf.float64, tf.int32, tf.bool]: self._compareZeros(dtype, True) def testZerosLikePartialShape(self): diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py index 47e8029a9b7..da5d51b0e19 100644 --- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py +++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py @@ -252,6 +252,15 @@ class ControlFlowTest(tf.test.TestCase): result = exit_i.eval() self.assertAllEqual(10, result) + def testDifferentFrame(self): + with self.test_session(): + data = tf.placeholder(tf.float32, shape=[]) + enter_1 = control_flow_ops.enter(data, "foo_1", False) + enter_2 = control_flow_ops.enter(data, "foo_2", False) + res = tf.add(enter_1, enter_2) + with self.assertRaisesOpError("has inputs from different frames"): + res.eval(feed_dict={data: 1.0}) + def testCondBool(self): values = tf.constant(10) fn1 = lambda: tf.add(values, 1) @@ -507,7 +516,7 @@ class ControlFlowTest(tf.test.TestCase): ] self.assertAllEqual(dense_gv, [0.0, 2.0]) - # Microbenchmark: 250,000 iterations/s. + # Microbenchmark: 256,000 iterations/s. def testWhile_1(self): with self.test_session(): n = tf.constant(0) diff --git a/tensorflow/python/kernel_tests/decode_raw_op_test.py b/tensorflow/python/kernel_tests/decode_raw_op_test.py index f3cf0643fa0..bb707b32f7e 100644 --- a/tensorflow/python/kernel_tests/decode_raw_op_test.py +++ b/tensorflow/python/kernel_tests/decode_raw_op_test.py @@ -65,10 +65,7 @@ class DecodeRawOpTest(tf.test.TestCase): self.assertEqual([None, None], decode.get_shape().as_list()) expected_result = np.matrix([[1, -2, -3, 4]], dtype=np.float16) - result = decode.eval( - feed_dict={ - in_bytes: [expected_result.tobytes()] - }) + result = decode.eval(feed_dict={in_bytes: [expected_result.tostring()]}) self.assertAllEqual(expected_result, result) diff --git a/tensorflow/python/kernel_tests/functional_ops_test.py b/tensorflow/python/kernel_tests/functional_ops_test.py index e73d61d2617..fe20ec7ebc0 100644 --- a/tensorflow/python/kernel_tests/functional_ops_test.py +++ b/tensorflow/python/kernel_tests/functional_ops_test.py @@ -114,6 +114,13 @@ class FunctionalOpsTest(tf.test.TestCase): r = tf.map_fn(lambda x: tf.mul(tf.add(x, 3), 2), elems) self.assertAllEqual(np.array([(x + 3) * 2 for x in nums]), r.eval()) + def testMapSparseTensor(self): + with self.test_session(): + with self.assertRaises(TypeError): + tf.map_fn(lambda x: x, tf.SparseTensor(indices=[[0, 0], [0, 1], [1, 0]], + values=tf.constant([0, 1, 2]), + shape=[2, 2])) + def testMap_Scoped(self): with self.test_session() as sess: diff --git a/tensorflow/python/kernel_tests/softmax_op_test.py b/tensorflow/python/kernel_tests/softmax_op_test.py index 42201f7ae19..7c591707e99 100644 --- a/tensorflow/python/kernel_tests/softmax_op_test.py +++ b/tensorflow/python/kernel_tests/softmax_op_test.py @@ -120,9 +120,8 @@ class SoftmaxTest(tf.test.TestCase): def testDouble(self): self._testSoftmax( - np.array([[1., 1., 1., 1.], [1., 2., 3., 4.]]).astype(np.float64), - use_gpu=False) - self._testOverflow(use_gpu=False) + np.array([[1., 1., 1., 1.], [1., 2., 3., 4.]]).astype(np.float64)) + self._testOverflow() def test1DTesnorAsInput(self): self._testSoftmax( diff --git a/tensorflow/python/kernel_tests/sparse_matmul_op_test.py b/tensorflow/python/kernel_tests/sparse_matmul_op_test.py index c6a11ee4cc9..9f789798b0c 100644 --- a/tensorflow/python/kernel_tests/sparse_matmul_op_test.py +++ b/tensorflow/python/kernel_tests/sparse_matmul_op_test.py @@ -64,6 +64,20 @@ class SparseMatMulTest(tf.test.TestCase): for y_dtype in (tf.float32, tf.bfloat16): self._testCpuMatmul(x, y, x_dtype=x_dtype, y_dtype=y_dtype) + def testZeroDim(self): + x = np.ones((4, 0)).astype(np.float32) + y = np.ones((0, 3)).astype(np.float32) + for x_dtype in (tf.float32, tf.bfloat16): + for y_dtype in (tf.float32, tf.bfloat16): + self._testCpuMatmul(x, y, x_dtype=x_dtype, y_dtype=y_dtype) + + def testEmpty(self): + x = np.ones((0, 0)).astype(np.float32) + y = np.ones((0, 0)).astype(np.float32) + for x_dtype in (tf.float32, tf.bfloat16): + for y_dtype in (tf.float32, tf.bfloat16): + self._testCpuMatmul(x, y, x_dtype=x_dtype, y_dtype=y_dtype) + # Tests setting one dimension to be a high value. def testLarge(self): r1 = np.random.randint(6000, 20000) diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py index 1e2db3e565e..5010b79a6a7 100644 --- a/tensorflow/python/kernel_tests/variable_scope_test.py +++ b/tensorflow/python/kernel_tests/variable_scope_test.py @@ -637,19 +637,19 @@ class VariableScopeTest(tf.test.TestCase): def testGetVarWithDevice(self): g = tf.Graph() - varname_shape = [] + varname_type = [] def device_func(op): if op.type == "Variable": - varname_shape.append((op.name, tf.TensorShape(op.get_attr("shape")))) + varname_type.append((op.name, op.get_attr("dtype"))) return "/gpu:0" with g.as_default(): with tf.device(device_func): - _ = tf.get_variable("x", (100, 200)) # init fn - _ = tf.get_variable("y", initializer=numpy.arange(73)) # init constant - self.assertEqual(varname_shape[0], ("x", tf.TensorShape([100, 200]))) - self.assertEqual(varname_shape[1], ("y", tf.TensorShape([73]))) + _ = tf.get_variable("x", (100, 200)) + _ = tf.get_variable("y", dtype=tf.int64, initializer=numpy.arange(73)) + self.assertEqual(varname_type[0], ("x", tf.float32)) + self.assertEqual(varname_type[1], ("y", tf.int64)) def axis0_into1_partitioner(shape=None, **unused_kwargs): diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 6abce62ecc2..dcb57d7e0c3 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -1905,7 +1905,6 @@ def _EditDistanceShape(op): return common_shapes.call_cpp_shape_fn(op, input_tensors_needed=[2, 5]) -# The remaining ops do not change the shape of their inputs. @ops.RegisterShape("Quantize") @ops.RegisterShape("Dequantize") def _QuantizeDequantizeShape(op): @@ -1914,6 +1913,45 @@ def _QuantizeDequantizeShape(op): return common_shapes.unchanged_shape(op) +@ops.RegisterShape("FakeQuantWithMinMaxArgs") +def _FakeQuantWithMinMaxArgsShape(op): + """Shape function for FakeQuantWithMinMaxArgs op: preserve the input shape.""" + return [op.inputs[0].get_shape()] + + +@ops.RegisterGradient("FakeQuantWithMinMaxArgs") +def _FakeQuantWithMinMaxArgsGradient(op, grad): + """Gradient for FakeQuantWithMinMaxArgs op.""" + return fake_quant_with_min_max_args_gradient(grad, op.inputs[0]) + + +@ops.RegisterShape("FakeQuantWithMinMaxVars") +def _FakeQuantWithMinMaxVarsShape(op): + """Shape function for FakeQuantWithMinMaxVars op: preserve the input shape.""" + return [op.inputs[0].get_shape()] + + +@ops.RegisterGradient("FakeQuantWithMinMaxVars") +def _FakeQuantWithMinMaxVarsGradient(op, grad): + """Gradient for FakeQuantWithMinMaxVars op.""" + return fake_quant_with_min_max_vars_gradient(grad, op.inputs[0], op.inputs[1], + op.inputs[2]) + + +@ops.RegisterShape("FakeQuantWithMinMaxVarsPerChannel") +def _FakeQuantWithMinMaxVarsPerChannelShape(op): + """Shape function for FakeQuantWithMinMaxVarsPerChannel op: input shape.""" + return [op.inputs[0].get_shape()] + + +@ops.RegisterGradient("FakeQuantWithMinMaxVarsPerChannel") +def _FakeQuantWithMinMaxVarsPerChannelGradient(op, grad): + """Gradient for FakeQuantWithMinMaxVarsPerChannel op.""" + return fake_quant_with_min_max_vars_per_channel_gradient(grad, op.inputs[0], + op.inputs[1], + op.inputs[2]) + + ops.RegisterShape("ExtractImagePatches")(common_shapes.call_cpp_shape_fn) diff --git a/tensorflow/python/ops/data_flow_ops.py b/tensorflow/python/ops/data_flow_ops.py index 06d7308b384..d2de88a9ca9 100644 --- a/tensorflow/python/ops/data_flow_ops.py +++ b/tensorflow/python/ops/data_flow_ops.py @@ -1455,14 +1455,14 @@ class SparseConditionalAccumulator(ConditionalAccumulatorBase): dense_shape=return_val.shape) -ops.RegisterShape("AccumulatorNumAccumulated")(common_shapes.scalar_shape) -ops.RegisterShape("AccumulatorSetGlobalStep")(common_shapes.no_outputs) - -ops.RegisterShape("ConditionalAccumulator")(common_shapes.scalar_shape) - -ops.RegisterShape("AccumulatorApplyGradient")(common_shapes.no_outputs) -ops.RegisterShape("AccumulatorTakeGradient")(common_shapes.unknown_shape) - -ops.RegisterShape("SparseConditionalAccumulator")(common_shapes.scalar_shape) -ops.RegisterShape("SparseAccumulatorApplyGradient")(common_shapes.no_outputs) -ops.RegisterShape("SparseAccumulatorTakeGradient")(common_shapes.unknown_shape) +ops.RegisterShape("AccumulatorNumAccumulated")(common_shapes.call_cpp_shape_fn) +ops.RegisterShape("AccumulatorSetGlobalStep")(common_shapes.call_cpp_shape_fn) +ops.RegisterShape("ConditionalAccumulator")(common_shapes.call_cpp_shape_fn) +ops.RegisterShape("AccumulatorApplyGradient")(common_shapes.call_cpp_shape_fn) +ops.RegisterShape("AccumulatorTakeGradient")(common_shapes.call_cpp_shape_fn) +ops.RegisterShape("SparseConditionalAccumulator")( + common_shapes.call_cpp_shape_fn) +ops.RegisterShape("SparseAccumulatorApplyGradient")( + common_shapes.call_cpp_shape_fn) +ops.RegisterShape("SparseAccumulatorTakeGradient")( + common_shapes.call_cpp_shape_fn) diff --git a/tensorflow/python/ops/functional_ops.py b/tensorflow/python/ops/functional_ops.py index d765989f497..8ef05b03344 100644 --- a/tensorflow/python/ops/functional_ops.py +++ b/tensorflow/python/ops/functional_ops.py @@ -234,6 +234,22 @@ def map_fn(fn, elems, dtype=None, parallel_iterations=10, back_prop=True, the `dtype` parameter is not optional: `dtype` must be a type or (possibly nested) tuple of types matching the output of `fn`. + To apply a functional operation to the nonzero elements of a SparseTensor + one of the following methods is recommended. First, if the function is + expressible as TensorFlow ops, use + + ```python + result = SparseTensor(input.indices, fn(input.values), input.shape) + ``` + + If, however, the function is not expressible as a TensorFlow op, then use + + ```python + result = SparseTensor(input.indices, map_fn(fn, input.values), input.shape) + ``` + + instead. + Args: fn: The callable to be performed. It accepts one argument, which will have the same (possibly nested) structure as `elems`. Its output @@ -259,7 +275,7 @@ def map_fn(fn, elems, dtype=None, parallel_iterations=10, back_prop=True, Raises: TypeError: if `fn` is not callable or the structure of the output of - `fn` and `dtype` do not match. + `fn` and `dtype` do not match, or if elems is a SparseTensor. ValueError: if the lengths of the output of `fn` and `dtype` do not match. Examples: @@ -285,6 +301,12 @@ def map_fn(fn, elems, dtype=None, parallel_iterations=10, back_prop=True, if not callable(fn): raise TypeError("fn must be callable.") + if isinstance(elems, ops.SparseTensor): + raise TypeError( + "To perform a map on the values of a sparse tensor use either " + " SparseTensor(input.indices, fn(input.values), input.shape) or " + " SparseTensor(input.indices, map_fn(fn, input.values), input.shape)") + input_is_sequence = nest.is_sequence(elems) input_flatten = lambda x: nest.flatten(x) if input_is_sequence else [x] def input_pack(x): diff --git a/tensorflow/python/ops/image_ops.py b/tensorflow/python/ops/image_ops.py index 451b3e5bf09..2836fbabdc0 100644 --- a/tensorflow/python/ops/image_ops.py +++ b/tensorflow/python/ops/image_ops.py @@ -152,7 +152,7 @@ type and representation (RGB or HSV). @@adjust_saturation @@random_saturation -@@per_image_whitening +@@per_image_standardization ## Working with Bounding Boxes @@ -827,7 +827,7 @@ def resize_images(images, return images -def per_image_whitening(image): +def per_image_standardization(image): """Linearly scales `image` to have zero mean and unit norm. This op computes `(x - mean) / adjusted_stddev`, where `mean` is the average @@ -837,16 +837,11 @@ def per_image_whitening(image): `stddev` is the standard deviation of all values in `image`. It is capped away from zero to protect against division by 0 when handling uniform images. - Note that this implementation is limited: - - * It only whitens based on the statistics of an individual image. - * It does not take into account the covariance structure. - Args: image: 3-D tensor of shape `[height, width, channels]`. Returns: - The whitened image with same shape as `image`. + The standardized image with same shape as `image`. Raises: ValueError: if the shape of 'image' is incompatible with this function. @@ -873,6 +868,11 @@ def per_image_whitening(image): return image +# TODO(skye): remove once users switch to per_image_standardization() +def per_image_whitening(image): + return per_image_standardization(image) + + def random_brightness(image, max_delta, seed=None): """Adjust the brightness of images by a random factor. @@ -1380,3 +1380,6 @@ ops.RegisterShape('NonMaxSuppression')(common_shapes.call_cpp_shape_fn) __all__ = make_all(__name__) # ResizeMethod is not documented, but is documented in functions that use it. __all__.append('ResizeMethod') +# TODO(skye): per_image_whitening() will be removed once all callers switch to +# per_image_standardization() +__all__.append('per_image_whitening') diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 215731a5cb8..1a34634cf28 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -2021,3 +2021,5 @@ def reduced_shape(input_shape, axes): ops.RegisterShape("QuantizedMatMul")(common_shapes.call_cpp_shape_fn) +ops.RegisterShape("Requantize")(common_shapes.call_cpp_shape_fn) +ops.RegisterShape("RequantizationRange")(common_shapes.call_cpp_shape_fn) diff --git a/tensorflow/python/ops/state_ops.py b/tensorflow/python/ops/state_ops.py index f869301873f..636acc3e2ad 100644 --- a/tensorflow/python/ops/state_ops.py +++ b/tensorflow/python/ops/state_ops.py @@ -116,6 +116,7 @@ from __future__ import print_function from tensorflow.python.framework import common_shapes from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import gen_state_ops # go/tf-wildcard-import # pylint: disable=wildcard-import @@ -146,6 +147,8 @@ def variable_op(shape, dtype, name="Variable", set_shape=True, container="", Returns: A variable tensor. """ + if not set_shape: + shape = tensor_shape.unknown_shape() ret = gen_state_ops._variable(shape=shape, dtype=dtype, name=name, container=container, shared_name=shared_name) # TODO(mrry): Move this to where it is used, so we can get rid of this op diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py index b03a49988c4..05f780ccaac 100644 --- a/tensorflow/python/ops/variables.py +++ b/tensorflow/python/ops/variables.py @@ -20,6 +20,7 @@ from __future__ import print_function from tensorflow.core.framework import variable_pb2 from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops @@ -280,6 +281,7 @@ class Variable(object): "or set. Got %s of type %s" % (collections, type(collections))) if trainable and ops.GraphKeys.TRAINABLE_VARIABLES not in collections: collections = list(collections) + [ops.GraphKeys.TRAINABLE_VARIABLES] + expected_shape = tensor_shape.as_shape(expected_shape) with ops.control_dependencies(None): with ops.name_scope(name, "Variable", [] if init_from_fn else [initial_value]) as name: @@ -287,6 +289,13 @@ class Variable(object): # Get the initial value from a callable function. The real shape of the # variable will be set later, since under the init_from_fn case, the # shape won't be known until after the function is invoked. + # + # NOTE: The current Variable OpKernel does not support + # partially defined shapes, so we only set the shape if it is + # fully defined. For historical reasons, we use the scalar + # shape (`[]`) to represent an unknown or partially known + # shape. A future version of the Variable ops will remove this + # limitation. def full_shape_to_list(shape): """Returns shape as a list if shape is fully defined.""" if shape and shape.is_fully_defined(): @@ -302,8 +311,10 @@ class Variable(object): if init_from_fn: expected_shape_list = full_shape_to_list(expected_shape) + set_shape = validate_shape and expected_shape.is_fully_defined() self._variable = state_ops.variable_op( - expected_shape_list, dtype.base_dtype, set_shape=False, name=name) + expected_shape_list, dtype.base_dtype, set_shape=set_shape, + name=name) with ops.colocate_with(self._variable.op): with ops.name_scope("Initializer"): # Colocate the tensors created by the initial_value() function @@ -317,12 +328,14 @@ class Variable(object): self._initial_value = ops.convert_to_tensor( initial_value, name="initial_value", dtype=dtype) assert_expected_shape() + set_shape = (validate_shape + and self._initial_value.get_shape().is_fully_defined()) # In this case, the variable op can't be created until after the # initial_value has been converted to a Tensor with a known type. self._variable = state_ops.variable_op( full_shape_to_list(self._initial_value.get_shape()), self._initial_value.dtype.base_dtype, - set_shape=False, + set_shape=set_shape, name=name) # Manually overrides the variable's shape with the initial value's. @@ -976,13 +989,8 @@ class PartitionedVariable(object): Returns: `Tensor` containing the concatenated value. """ - if self._as_tensor is None: - # Be sure to cache the concatenated tensor to not do extraneous - # computations. - with ops.control_dependencies(None): - self._as_tensor = self._concat() - - return self._as_tensor + with ops.control_dependencies(None): + return self._concat() @staticmethod def _TensorConversionFunction(v, dtype=None, name=None, as_ref=False): diff --git a/tensorflow/python/platform/app.py b/tensorflow/python/platform/app.py index b82a6987eca..bd58db7b45d 100644 --- a/tensorflow/python/platform/app.py +++ b/tensorflow/python/platform/app.py @@ -23,10 +23,21 @@ import sys from tensorflow.python.platform import flags -def run(main=None): +def run(main=None, argv=None): + """Runs the program with an optional 'main' function and 'argv' list.""" f = flags.FLAGS + + # Extract the args from the optional `argv` list. + args = argv[1:] if argv else None + + # Parse the known flags from that list, or from the command + # line otherwise. # pylint: disable=protected-access - flags_passthrough = f._parse_flags() + flags_passthrough = f._parse_flags(args=args) # pylint: enable=protected-access + main = main or sys.modules['__main__'].main + + # Call the main function, passing through any arguments + # to the final program. sys.exit(main(sys.argv[:1] + flags_passthrough)) diff --git a/tensorflow/python/platform/flags.py b/tensorflow/python/platform/flags.py index 0522f76b9c3..3e417ab3213 100644 --- a/tensorflow/python/platform/flags.py +++ b/tensorflow/python/platform/flags.py @@ -31,8 +31,8 @@ class _FlagValues(object): self.__dict__['__flags'] = {} self.__dict__['__parsed'] = False - def _parse_flags(self): - result, unparsed = _global_parser.parse_known_args() + def _parse_flags(self, args=None): + result, unparsed = _global_parser.parse_known_args(args=args) for flag_name, val in vars(result).items(): self.__dict__['__flags'][flag_name] = val self.__dict__['__parsed'] = True diff --git a/tensorflow/python/platform/flags_test.py b/tensorflow/python/platform/flags_test.py index d2b7da7ad25..0dbaafd1fab 100644 --- a/tensorflow/python/platform/flags_test.py +++ b/tensorflow/python/platform/flags_test.py @@ -12,20 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== - """Tests for our flags implementation.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function -import argparse import sys import unittest from tensorflow.python.platform import app from tensorflow.python.platform import flags - flags.DEFINE_string("string_foo", "default_val", "HelpString") flags.DEFINE_integer("int_foo", 42, "HelpString") flags.DEFINE_float("float_foo", 42.0, "HelpString") @@ -40,6 +37,7 @@ flags.DEFINE_bool("bool_e", True, "HelpString") FLAGS = flags.FLAGS + class FlagsTest(unittest.TestCase): def testString(self): @@ -82,17 +80,7 @@ class FlagsTest(unittest.TestCase): self.assertEqual(-1.0, FLAGS.float_foo) -def main(argv): - # Test that argparse can parse flags that aren't registered - # with tf.flags. - parser = argparse.ArgumentParser() - parser.add_argument("--argparse_val", type=int, default=1000, - help="Test flag") - argparse_flags, _ = parser.parse_known_args(argv) - if argparse_flags.argparse_val != 10: - raise ValueError("argparse flag was not parsed: got %d", - argparse_flags.argparse_val) - +def main(_): # unittest.main() tries to interpret the unknown flags, so use the # direct functions instead. runner = unittest.TextTestRunner() @@ -102,9 +90,9 @@ def main(argv): if __name__ == "__main__": # Test command lines - sys.argv.extend(["--bool_a", "--nobool_negation", - "--bool_c=True", "--bool_d=False", - "--unknown_flag", "--argparse_val=10", - "and_argument"]) + sys.argv.extend([ + "--bool_a", "--nobool_negation", "--bool_c=True", "--bool_d=False", + "and_argument" + ]) app.run() diff --git a/tensorflow/python/saved_model/builder.py b/tensorflow/python/saved_model/builder.py index fcca5aa5e85..43b97cf70c6 100644 --- a/tensorflow/python/saved_model/builder.py +++ b/tensorflow/python/saved_model/builder.py @@ -86,8 +86,12 @@ class SavedModelBuilder(object): constants.SAVED_MODEL_SCHEMA_VERSION) self._export_dir = export_dir - if not file_io.file_exists(export_dir): - file_io.recursive_create_dir(self._export_dir) + if file_io.file_exists(export_dir): + raise AssertionError( + "Export directory already exists. Please specify a different export " + "directory.") + + file_io.recursive_create_dir(self._export_dir) # Boolean to track whether variables and assets corresponding to the # SavedModel have been saved. Specifically, the first meta graph to be added @@ -163,8 +167,12 @@ class SavedModelBuilder(object): asset_destination_filepath = os.path.join( compat.as_bytes(assets_destination_dir), compat.as_bytes(asset_source_filename)) - file_io.copy( - asset_source_filepath, asset_destination_filepath, overwrite=True) + + # Only copy the asset file to the destination if it does not already + # exist. This is to ensure that an asset with the same name defined as + # part of multiple graphs is only copied the first time. + if not file_io.file_exists(asset_destination_filepath): + file_io.copy(asset_source_filepath, asset_destination_filepath) tf_logging.info("Assets written to: %s", assets_destination_dir) @@ -271,8 +279,8 @@ class SavedModelBuilder(object): "Variables and assets have not been saved yet. " "Please invoke `add_meta_graph_and_variables()` first.") - # Save asset files, if any. - self._maybe_save_assets(assets_collection) + # Save asset files and write them to disk, if any. + self._save_and_write_assets(assets_collection) # Add legacy init op to the SavedModel. self._maybe_add_legacy_init_op(legacy_init_op) diff --git a/tensorflow/python/saved_model/example/saved_model_half_plus_two.py b/tensorflow/python/saved_model/example/saved_model_half_plus_two.py index 9ba37e42fae..7c25a7ec1ef 100644 --- a/tensorflow/python/saved_model/example/saved_model_half_plus_two.py +++ b/tensorflow/python/saved_model/example/saved_model_half_plus_two.py @@ -97,6 +97,12 @@ def _generate_saved_model_for_half_plus_two(export_dir, as_text=False): # Set up the assets collection. assets_filepath = tf.constant(original_assets_filepath) tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, assets_filepath) + filename_tensor = tf.Variable( + original_assets_filename, + name="filename_tensor", + trainable=False, + collections=[]) + assign_filename_op = filename_tensor.assign(original_assets_filename) # Set up the signature for regression with input and output tensor # specification. @@ -118,7 +124,8 @@ def _generate_saved_model_for_half_plus_two(export_dir, as_text=False): signature_def_map={ signature_constants.REGRESS_METHOD_NAME: signature_def }, - assets_collection=tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS)) + assets_collection=tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS), + legacy_init_op=tf.group(assign_filename_op)) builder.save(as_text) diff --git a/tensorflow/python/saved_model/saved_model_test.py b/tensorflow/python/saved_model/saved_model_test.py index ff6e86a2092..a50620e113c 100644 --- a/tensorflow/python/saved_model/saved_model_test.py +++ b/tensorflow/python/saved_model/saved_model_test.py @@ -38,6 +38,39 @@ def tearDownModule(): class SavedModelTest(tf.test.TestCase): + def _init_and_validate_variable(self, sess, variable_name, variable_value): + v = tf.Variable(variable_value, name=variable_name) + sess.run(tf.initialize_all_variables()) + self.assertEqual(variable_value, v.eval()) + + def _build_asset_collection(self, asset_file_name, asset_file_contents, + asset_file_tensor_name): + asset_filepath = os.path.join( + compat.as_bytes(tf.test.get_temp_dir()), + compat.as_bytes(asset_file_name)) + file_io.write_string_to_file(asset_filepath, asset_file_contents) + asset_file_tensor = tf.constant(asset_filepath, name=asset_file_tensor_name) + tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, asset_file_tensor) + asset_collection = tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS) + return asset_collection + + def _validate_asset_collection(self, export_dir, graph_collection_def, + expected_asset_file_name, + expected_asset_file_contents, + expected_asset_tensor_name): + assets_any = graph_collection_def[constants.ASSETS_KEY].any_list.value + asset = meta_graph_pb2.AssetFileDef() + assets_any[0].Unpack(asset) + assets_path = os.path.join( + compat.as_bytes(export_dir), + compat.as_bytes(constants.ASSETS_DIRECTORY), + compat.as_bytes(expected_asset_file_name)) + actual_asset_contents = file_io.read_file_to_string(assets_path) + self.assertEqual(expected_asset_file_contents, + compat.as_text(actual_asset_contents)) + self.assertEqual(expected_asset_file_name, asset.filename) + self.assertEqual(expected_asset_tensor_name, asset.tensor_info.name) + def testSequence(self): export_dir = os.path.join(tf.test.get_temp_dir(), "test_sequence") builder = saved_model_builder.SavedModelBuilder(export_dir) @@ -50,9 +83,7 @@ class SavedModelTest(tf.test.TestCase): # Expect an assertion error for multiple calls of # add_meta_graph_and_variables() since weights should be saved exactly once. with self.test_session(graph=tf.Graph()) as sess: - v = tf.Variable(42, name="v") - sess.run(tf.initialize_all_variables()) - self.assertEqual(42, v.eval()) + self._init_and_validate_variable(sess, "v", 42) builder.add_meta_graph_and_variables(sess, ["bar"]) self.assertRaises(AssertionError, builder.add_meta_graph_and_variables, sess, ["baz"]) @@ -65,27 +96,21 @@ class SavedModelTest(tf.test.TestCase): # - add with weights. # - a single tag (from predefined constants). with self.test_session(graph=tf.Graph()) as sess: - v = tf.Variable(42, name="v") - sess.run(tf.initialize_all_variables()) - self.assertEqual(42, v.eval()) + self._init_and_validate_variable(sess, "v", 42) builder.add_meta_graph_and_variables(sess, [tag_constants.TRAINING]) # Graph that updates the single variable. SavedModel invoked to: # - simply add the model (weights are not updated). # - a single tag (from predefined constants). with self.test_session(graph=tf.Graph()) as sess: - v = tf.Variable(43, name="v") - sess.run(tf.initialize_all_variables()) - self.assertEqual(43, v.eval()) + self._init_and_validate_variable(sess, "v", 43) builder.add_meta_graph([tag_constants.SERVING]) # Graph that updates the single variable. SavedModel is invoked: # - to add the model (weights are not updated). # - multiple custom tags. with self.test_session(graph=tf.Graph()) as sess: - v = tf.Variable(44, name="v") - sess.run(tf.initialize_all_variables()) - self.assertEqual(44, v.eval()) + self._init_and_validate_variable(sess, "v", 44) builder.add_meta_graph(["foo", "bar"]) # Save the SavedModel to disk. @@ -128,29 +153,22 @@ class SavedModelTest(tf.test.TestCase): # Graph with two variables. SavedModel invoked to: # - add with weights. with self.test_session(graph=tf.Graph()) as sess: - v1 = tf.Variable(1, name="v1") - v2 = tf.Variable(2, name="v2") - sess.run(tf.initialize_all_variables()) - self.assertEqual(1, v1.eval()) - self.assertEqual(2, v2.eval()) + self._init_and_validate_variable(sess, "v1", 1) + self._init_and_validate_variable(sess, "v2", 2) builder.add_meta_graph_and_variables(sess, ["foo"]) # Graph with a single variable (subset of the variables from the previous # graph whose weights were saved). SavedModel invoked to: # - simply add the model (weights are not updated). with self.test_session(graph=tf.Graph()) as sess: - v2 = tf.Variable(3, name="v2") - sess.run(tf.initialize_all_variables()) - self.assertEqual(3, v2.eval()) + self._init_and_validate_variable(sess, "v2", 3) builder.add_meta_graph(["bar"]) # Graph with a single variable (disjoint set of variables from the previous # graph whose weights were saved). SavedModel invoked to: # - simply add the model (weights are not updated). with self.test_session(graph=tf.Graph()) as sess: - v3 = tf.Variable(4, name="v3") - sess.run(tf.initialize_all_variables()) - self.assertEqual(4, v3.eval()) + self._init_and_validate_variable(sess, "v3", 4) builder.add_meta_graph(["baz"]) # Save the SavedModel to disk. @@ -180,6 +198,29 @@ class SavedModelTest(tf.test.TestCase): self.assertRaises(errors.NotFoundError, loader.load, sess, ["baz"], export_dir) + def testNoOverwrite(self): + export_dir = os.path.join(tf.test.get_temp_dir(), "test_no_overwrite") + builder = saved_model_builder.SavedModelBuilder(export_dir) + + # Graph with a single variable. SavedModel invoked to: + # - add with weights. + with self.test_session(graph=tf.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) + builder.add_meta_graph_and_variables(sess, ["foo"]) + + # Save the SavedModel to disk in text format. + builder.save(as_text=True) + + # Restore the graph with tag "foo", whose variables were saved. + with self.test_session(graph=tf.Graph()) as sess: + loader.load(sess, ["foo"], export_dir) + self.assertEqual(42, tf.get_collection(tf.GraphKeys.VARIABLES)[0].eval()) + + # An attempt to create another builder with the same export directory should + # result in an assertion error. + self.assertRaises(AssertionError, saved_model_builder.SavedModelBuilder, + export_dir) + def testSaveAsText(self): export_dir = os.path.join(tf.test.get_temp_dir(), "test_astext") builder = saved_model_builder.SavedModelBuilder(export_dir) @@ -187,17 +228,13 @@ class SavedModelTest(tf.test.TestCase): # Graph with a single variable. SavedModel invoked to: # - add with weights. with self.test_session(graph=tf.Graph()) as sess: - v = tf.Variable(42, name="v") - sess.run(tf.initialize_all_variables()) - self.assertEqual(42, v.eval()) + self._init_and_validate_variable(sess, "v", 42) builder.add_meta_graph_and_variables(sess, ["foo"]) # Graph with the same single variable. SavedModel invoked to: # - simply add the model (weights are not updated). with self.test_session(graph=tf.Graph()) as sess: - v = tf.Variable(43, name="v") - sess.run(tf.initialize_all_variables()) - self.assertEqual(43, v.eval()) + self._init_and_validate_variable(sess, "v", 43) builder.add_meta_graph(["bar"]) # Save the SavedModel to disk in text format. @@ -270,9 +307,7 @@ class SavedModelTest(tf.test.TestCase): # Graph with a single variable and a single entry in the signature def map. # SavedModel is invoked to add with weights. with self.test_session(graph=tf.Graph()) as sess: - v = tf.Variable(42, name="v") - sess.run(tf.initialize_all_variables()) - self.assertEqual(42, v.eval()) + self._init_and_validate_variable(sess, "v", 42) # Build and populate an empty SignatureDef for testing. foo_signature = utils.build_signature_def(dict(), dict(), "foo") builder.add_meta_graph_and_variables( @@ -281,10 +316,7 @@ class SavedModelTest(tf.test.TestCase): # Graph with the same single variable and multiple entries in the signature # def map. No weights are saved by SavedModel. with self.test_session(graph=tf.Graph()) as sess: - v = tf.Variable(43, name="v") - sess.run(tf.initialize_all_variables()) - self.assertEqual(43, v.eval()) - + self._init_and_validate_variable(sess, "v", 43) # Build and populate a different SignatureDef for testing. bar_signature = utils.build_signature_def(dict(), dict(), "bar") # Also, build a different SignatureDef corresponding to "foo_key" defined @@ -325,24 +357,17 @@ class SavedModelTest(tf.test.TestCase): builder = saved_model_builder.SavedModelBuilder(export_dir) with self.test_session(graph=tf.Graph()) as sess: - v = tf.Variable(42, name="v") - sess.run(tf.initialize_all_variables()) - self.assertEqual(42, v.eval()) + self._init_and_validate_variable(sess, "v", 42) # Build an asset collection. - asset_filepath = os.path.join( - compat.as_bytes(tf.test.get_temp_dir()), - compat.as_bytes("hello42.txt")) - file_io.write_string_to_file(asset_filepath, "foo bar baz") - asset_file_tensor = tf.constant(asset_filepath, name="asset_file_tensor") - tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, asset_file_tensor) - ignored_filepath = os.path.join( compat.as_bytes(tf.test.get_temp_dir()), compat.as_bytes("ignored.txt")) file_io.write_string_to_file(ignored_filepath, "will be ignored") - asset_collection = tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS) + asset_collection = self._build_asset_collection("hello42.txt", + "foo bar baz", + "asset_file_tensor") builder.add_meta_graph_and_variables( sess, ["foo"], assets_collection=asset_collection) @@ -352,21 +377,9 @@ class SavedModelTest(tf.test.TestCase): with self.test_session(graph=tf.Graph()) as sess: foo_graph = loader.load(sess, ["foo"], export_dir) - - # Validate the assets. - collection_def = foo_graph.collection_def - assets_any = collection_def[constants.ASSETS_KEY].any_list.value - self.assertEqual(len(assets_any), 1) - asset = meta_graph_pb2.AssetFileDef() - assets_any[0].Unpack(asset) - assets_path = os.path.join( - compat.as_bytes(export_dir), - compat.as_bytes(constants.ASSETS_DIRECTORY), - compat.as_bytes("hello42.txt")) - asset_contents = file_io.read_file_to_string(assets_path) - self.assertEqual("foo bar baz", compat.as_text(asset_contents)) - self.assertEqual("hello42.txt", asset.filename) - self.assertEqual("asset_file_tensor:0", asset.tensor_info.name) + self._validate_asset_collection(export_dir, foo_graph.collection_def, + "hello42.txt", "foo bar baz", + "asset_file_tensor:0") ignored_asset_path = os.path.join( compat.as_bytes(export_dir), compat.as_bytes(constants.ASSETS_DIRECTORY), @@ -407,6 +420,96 @@ class SavedModelTest(tf.test.TestCase): # the legacy_init_op, following a restore. self.assertEqual(3, tf.get_collection("v")[2].eval()) + def testMultipleAssets(self): + export_dir = os.path.join(tf.test.get_temp_dir(), "test_multiple_assets") + builder = saved_model_builder.SavedModelBuilder(export_dir) + + with self.test_session(graph=tf.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) + + # Build an asset collection specific to `foo` graph. + asset_collection = self._build_asset_collection("foo.txt", "content_foo", + "asset_file_tensor") + + # Add the asset collection as part of the graph with tag "foo". + builder.add_meta_graph_and_variables( + sess, ["foo"], assets_collection=asset_collection) + + with self.test_session(graph=tf.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) + + # Build an asset collection specific to `bar` graph. + asset_collection = self._build_asset_collection("bar.txt", "content_bar", + "asset_file_tensor") + + # Add the asset collection as part of the graph with tag "bar". + builder.add_meta_graph(["bar"], assets_collection=asset_collection) + + # Save the SavedModel to disk. + builder.save() + + # Check assets restored for graph with tag "foo". + with self.test_session(graph=tf.Graph()) as sess: + foo_graph = loader.load(sess, ["foo"], export_dir) + self._validate_asset_collection(export_dir, foo_graph.collection_def, + "foo.txt", "content_foo", + "asset_file_tensor:0") + + # Check assets restored for graph with tag "bar". + with self.test_session(graph=tf.Graph()) as sess: + bar_graph = loader.load(sess, ["bar"], export_dir) + self._validate_asset_collection(export_dir, bar_graph.collection_def, + "bar.txt", "content_bar", + "asset_file_tensor:0") + + def testDuplicateAssets(self): + export_dir = os.path.join(tf.test.get_temp_dir(), "test_duplicate_assets") + builder = saved_model_builder.SavedModelBuilder(export_dir) + + with self.test_session(graph=tf.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) + + # Build an asset collection with `foo.txt` that has `foo` specific + # content. + asset_collection = self._build_asset_collection("foo.txt", "content_foo", + "asset_file_tensor") + + # Add the asset collection as part of the graph with tag "foo". + builder.add_meta_graph_and_variables( + sess, ["foo"], assets_collection=asset_collection) + + with self.test_session(graph=tf.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) + + # Build an asset collection with `foo.txt` that has `bar` specific + # content. + asset_collection = self._build_asset_collection("foo.txt", "content_bar", + "asset_file_tensor") + + # Add the asset collection as part of the graph with tag "bar". + builder.add_meta_graph(["bar"], assets_collection=asset_collection) + + # Save the SavedModel to disk. + builder.save() + + # Check assets restored for graph with tag "foo". + with self.test_session(graph=tf.Graph()) as sess: + foo_graph = loader.load(sess, ["foo"], export_dir) + self._validate_asset_collection(export_dir, foo_graph.collection_def, + "foo.txt", "content_foo", + "asset_file_tensor:0") + + # Check assets restored for graph with tag "bar". + with self.test_session(graph=tf.Graph()) as sess: + bar_graph = loader.load(sess, ["bar"], export_dir) + + # Validate the assets for `bar` graph. `foo.txt` should contain the + # original contents corresponding to `foo` graph since an asset with the + # same name across multiple graphs is only stored the first time + self._validate_asset_collection(export_dir, bar_graph.collection_def, + "foo.txt", "content_foo", + "asset_file_tensor:0") + def testOp(self): export_dir = os.path.join(tf.test.get_temp_dir(), "test_op") builder = saved_model_builder.SavedModelBuilder(export_dir) diff --git a/tensorflow/python/summary/event_accumulator.py b/tensorflow/python/summary/event_accumulator.py index a4bc93344cd..063f100b94f 100644 --- a/tensorflow/python/summary/event_accumulator.py +++ b/tensorflow/python/summary/event_accumulator.py @@ -31,7 +31,7 @@ from tensorflow.python.framework import tensor_util from tensorflow.python.platform import tf_logging as logging from tensorflow.python.summary import summary from tensorflow.python.summary.impl import directory_watcher -from tensorflow.python.summary.impl import io_wrapper +from tensorflow.python.summary.impl import event_file_loader from tensorflow.python.summary.impl import reservoir from tensorflow.python.util import compat @@ -664,10 +664,10 @@ def _GetPurgeMessage(most_recent_step, most_recent_wall_time, event_step, def _GeneratorFromPath(path): """Create an event generator for file or directory at given path string.""" if IsTensorFlowEventsFile(path): - return io_wrapper.CreateFileLoader(path) + return event_file_loader.EventFileLoader(path) else: - return directory_watcher.DirectoryWatcher(path, io_wrapper.CreateFileLoader, - IsTensorFlowEventsFile) + return directory_watcher.DirectoryWatcher( + path, event_file_loader.EventFileLoader, IsTensorFlowEventsFile) def _ParseFileVersion(file_version): diff --git a/tensorflow/python/summary/event_multiplexer.py b/tensorflow/python/summary/event_multiplexer.py index 85de6350d27..d3a14804d34 100644 --- a/tensorflow/python/summary/event_multiplexer.py +++ b/tensorflow/python/summary/event_multiplexer.py @@ -23,6 +23,7 @@ import threading import six +from tensorflow.python.platform import gfile from tensorflow.python.platform import tf_logging as logging from tensorflow.python.summary import event_accumulator from tensorflow.python.summary.impl import directory_watcher @@ -386,7 +387,7 @@ class EventMultiplexer(object): def GetLogdirSubdirectories(path): """Returns subdirectories with event files on path.""" - if io_wrapper.Exists(path) and not io_wrapper.IsDirectory(path): + if gfile.Exists(path) and not gfile.IsDirectory(path): raise ValueError('GetLogdirSubdirectories: path exists and is not a ' 'directory, %s' % path) diff --git a/tensorflow/python/summary/impl/directory_watcher.py b/tensorflow/python/summary/impl/directory_watcher.py index 56a08b11eaf..799e01a8366 100644 --- a/tensorflow/python/summary/impl/directory_watcher.py +++ b/tensorflow/python/summary/impl/directory_watcher.py @@ -21,8 +21,8 @@ from __future__ import print_function import bisect from tensorflow.python.framework import errors +from tensorflow.python.platform import gfile from tensorflow.python.platform import tf_logging as logging -from tensorflow.python.summary.impl import gcs from tensorflow.python.summary.impl import io_wrapper @@ -88,7 +88,7 @@ class DirectoryWatcher(object): for event in self._LoadInternal(): yield event except errors.OpError: - if not io_wrapper.Exists(self._directory): + if not gfile.Exists(self._directory): raise DirectoryDeletedError( 'Directory %s has been permanently deleted' % self._directory) @@ -178,10 +178,10 @@ class DirectoryWatcher(object): path: The full path of the file to watch. """ old_path = self._path - if old_path and not gcs.IsGCSPath(old_path): + if old_path and not io_wrapper.IsGCSPath(old_path): try: # We're done with the path, so store its size. - size = io_wrapper.Size(old_path) + size = gfile.Stat(old_path).length logging.debug('Setting latest size of %s to %d', old_path, size) self._finalized_sizes[old_path] = size except errors.OpError as e: @@ -210,7 +210,7 @@ class DirectoryWatcher(object): # Don't bother checking if the paths are GCS (which we can't check) or if # we've already detected an OOO write. - if not gcs.IsGCSPath(paths[0]) and not self._ooo_writes_detected: + if not io_wrapper.IsGCSPath(paths[0]) and not self._ooo_writes_detected: # Check the previous _OOO_WRITE_CHECK_COUNT paths for out of order writes. current_path_index = bisect.bisect_left(paths, self._path) ooo_check_start = max(0, current_path_index - self._OOO_WRITE_CHECK_COUNT) @@ -230,7 +230,7 @@ class DirectoryWatcher(object): def _HasOOOWrite(self, path): """Returns whether the path has had an out-of-order write.""" # Check the sizes of each path before the current one. - size = io_wrapper.Size(path) + size = gfile.Stat(path).length old_size = self._finalized_sizes.get(path, None) if size != old_size: if old_size is None: diff --git a/tensorflow/python/summary/impl/directory_watcher_test.py b/tensorflow/python/summary/impl/directory_watcher_test.py index b4e5f03daec..b6ecc158493 100644 --- a/tensorflow/python/summary/impl/directory_watcher_test.py +++ b/tensorflow/python/summary/impl/directory_watcher_test.py @@ -23,6 +23,7 @@ import os import shutil from tensorflow.python.framework import test_util +from tensorflow.python.platform import gfile from tensorflow.python.platform import googletest from tensorflow.python.summary.impl import directory_watcher from tensorflow.python.summary.impl import io_wrapper @@ -193,10 +194,12 @@ class DirectoryWatcherTest(test_util.TensorFlowTestCase): FakeFactory.has_been_called = False - for stub_name in ['ListDirectoryAbsolute', 'ListRecursively', 'IsDirectory', - 'Exists', 'Size']: + for stub_name in ['ListDirectoryAbsolute', 'ListRecursively']: self.stubs.Set(io_wrapper, stub_name, FakeFactory(getattr(io_wrapper, stub_name))) + for stub_name in ['IsDirectory', 'Exists', 'Stat']: + self.stubs.Set(gfile, stub_name, + FakeFactory(getattr(gfile, stub_name))) with self.assertRaises((IOError, OSError)): self._LoadAllEvents() diff --git a/tensorflow/python/summary/impl/gcs.py b/tensorflow/python/summary/impl/gcs.py deleted file mode 100644 index cf2c61067f6..00000000000 --- a/tensorflow/python/summary/impl/gcs.py +++ /dev/null @@ -1,132 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Functions for communicating with Google Cloud Storage.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import subprocess - -from tensorflow.python.platform import tf_logging as logging - -# All GCS paths should start with this. -PATH_PREFIX = 'gs://' - -# TODO(phurst): We should use the GCS Python API. - - -def CopyContents(gcs_path, byte_offset, local_file): - """Copies the contents of gcs_path from byte_offset onwards to local_file. - - Args: - gcs_path: The path to the GCS object. - byte_offset: The byte offset to start appending from. - local_file: The file object to write into. - - Raises: - ValueError: If offset is negative or gcs_path is not a valid GCS path. - CalledProcessError: If the gsutil command failed. - """ - if byte_offset < 0: - raise ValueError('byte_offset must not be negative') - command = ['gsutil', 'cat', '-r', '%d-' % byte_offset, gcs_path] - subprocess.check_call(command, stdout=local_file) - local_file.flush() - - -def ListDirectory(directory): - """Lists all files in the given directory.""" - command = ['gsutil', 'ls', directory] - return subprocess.check_output(command).splitlines() - - -def ListRecursively(top): - """Walks a directory tree, yielding (dir_path, file_paths) tuples. - - For each top |top| and its subdirectories, yields a tuple containing the path - to the directory and the path to each of the contained files. Note that - unlike os.Walk()/gfile.Walk(), this does not list subdirectories and the file - paths are all absolute. - - Args: - top: A path to a GCS directory. - Returns: - A list of (dir_path, file_paths) tuples. - - """ - if top.endswith('/'): - wildcard = top + '**' - else: - wildcard = top + '/**' - tuples = [] - try: - file_paths = ListDirectory(wildcard) - except subprocess.CalledProcessError as e: - logging.info('%s, assuming it means no files were found', e) - return [] - for file_path in file_paths: - dir_path = os.path.dirname(file_path) - if tuples and tuples[-1][0] == dir_path: - tuples[-1][1].append(file_path) - else: - tuples.append((dir_path, [file_path])) - return tuples - - -def IsDirectory(path): - """Returns true if path exists and is a directory.""" - path = path.rstrip('/') - try: - ls = ListDirectory(path) - except subprocess.CalledProcessError: - # Doesn't exist. - return False - if len(ls) == 1: - # Either it's a file (which ls-es as itself) or it's a dir with one file. - return ls[0] != path - else: - return True - - -def Exists(path): - """Returns true if path exists.""" - try: - ListDirectory(path) - return True - except subprocess.CalledProcessError: - return False - - -def IsGCSPath(path): - return path.startswith(PATH_PREFIX) - - -def CheckIsSupported(): - """Raises an OSError if the system isn't set up for Google Cloud Storage. - - Raises: - OSError: If the system hasn't been set up so that TensorBoard can access - Google Cloud Storage. The error's message contains installation - instructions. - """ - try: - subprocess.check_output(['gsutil', 'version']) - except OSError as e: - logging.error('Error while checking for gsutil: %s', e) - raise OSError( - 'Unable to execute the gsutil binary, which is required for Google ' - 'Cloud Storage support. You can find installation instructions at ' - 'https://goo.gl/sST520') diff --git a/tensorflow/python/summary/impl/gcs_file_loader.py b/tensorflow/python/summary/impl/gcs_file_loader.py deleted file mode 100644 index c46534dbb52..00000000000 --- a/tensorflow/python/summary/impl/gcs_file_loader.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Loads events from a file stored on Google Cloud Storage.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tempfile - -from tensorflow.core.util import event_pb2 -from tensorflow.python import pywrap_tensorflow -from tensorflow.python.framework import errors -from tensorflow.python.platform import app -from tensorflow.python.platform import tf_logging as logging -from tensorflow.python.summary.impl import gcs -from tensorflow.python.util import compat - - -class GCSFileLoader(object): - """A GCSFileLoader loads Event protos from a path to GCS storage. - - The GCSFileLoader keeps track of the offset in the file, copies the contents - of the file to local disk, reads it, and then immediately deletes the file. - """ - - def __init__(self, gcs_path): - if not gcs.IsGCSPath(gcs_path): - raise ValueError('A GCS path is required') - self._gcs_path = gcs_path - self._gcs_offset = 0 - - def Load(self): - # Create a temp file to hold the contents that we haven't seen yet. - with tempfile.NamedTemporaryFile(prefix='tf-gcs-') as temp_file: - name = temp_file.name - logging.debug('Temp file created at %s', name) - gcs.CopyContents(self._gcs_path, self._gcs_offset, temp_file) - with errors.raise_exception_on_not_ok_status() as status: - reader = pywrap_tensorflow.PyRecordReader_New( - compat.as_bytes(name), 0, compat.as_bytes(''), status) - while reader.GetNext(): - event = event_pb2.Event() - event.ParseFromString(reader.record()) - yield event - logging.debug('No more events in %s', name) - self._gcs_offset += reader.offset() - - -def main(argv): - if len(argv) != 2: - print('Usage: gcs_file_loader ') - return 1 - loader = GCSFileLoader(argv[1]) - for event in loader.Load(): - print(event) - - -if __name__ == '__main__': - app.run() diff --git a/tensorflow/python/summary/impl/gcs_file_loader_test.py b/tensorflow/python/summary/impl/gcs_file_loader_test.py deleted file mode 100644 index d35f3df4fc5..00000000000 --- a/tensorflow/python/summary/impl/gcs_file_loader_test.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from tensorflow.python.platform import googletest -from tensorflow.python.summary.impl import gcs -from tensorflow.python.summary.impl import gcs_file_loader - - -class GCSFileLoaderTest(tf.test.TestCase): - - def setUp(self): - self._append_contents_call_count = 0 - # A record containing a simple event. - self._stubs = googletest.StubOutForTesting() - self._stubs.Set(gcs, 'CopyContents', self._MockCopyContents) - - def tearDown(self): - self._stubs.CleanUp() - - def testLoad(self): - loader = gcs_file_loader.GCSFileLoader('gs://some-fake-url') - events = list(loader.Load()) - self.assertEqual(len(events), 1) - self.assertEqual(events[0].file_version, 'brain.Event:1') - events = list(loader.Load()) - self.assertEqual(len(events), 1) - self.assertEqual(events[0].file_version, 'brain.Event:2') - events = list(loader.Load()) - self.assertEqual(len(events), 0) - self.assertEqual(self._append_contents_call_count, 3) - - # A couple of simple records. - MOCK_RECORDS = [ - b'\x18\x00\x00\x00\x00\x00\x00\x00\xa3\x7fK"\t\x00\x00\xc0%\xddu' - b'\xd5A\x1a\rbrain.Event:1\xec\xf32\x8d', - b'\x18\x00\x00\x00\x00\x00\x00\x00\xa3\x7fK"\t\x00\x00\x00\'\xe6' - b'\xb3\xd5A\x1a\rbrain.Event:2jM\x0b\x15' - ] - - def _MockCopyContents(self, gcs_path, offset, local_file): - if self._append_contents_call_count == 0: - self.assertEqual(offset, 0) - elif self._append_contents_call_count == 1: - self.assertEqual(offset, len(self.MOCK_RECORDS[0])) - else: - self.assertEqual(offset, - len(self.MOCK_RECORDS[0]) + len(self.MOCK_RECORDS[1])) - - if self._append_contents_call_count < len(self.MOCK_RECORDS): - local_file.write(self.MOCK_RECORDS[self._append_contents_call_count]) - local_file.flush() - self._append_contents_call_count += 1 - - -if __name__ == '__main__': - tf.test.main() diff --git a/tensorflow/python/summary/impl/io_wrapper.py b/tensorflow/python/summary/impl/io_wrapper.py index f7138833d6b..258fe8c804f 100644 --- a/tensorflow/python/summary/impl/io_wrapper.py +++ b/tensorflow/python/summary/impl/io_wrapper.py @@ -12,13 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Functions that wrap both gfile and gcs. - -This module is *not* intended to be a general-purpose IO wrapper library; it -only implements the operations that are necessary for loading event files. The -functions either dispatch to the gcs library or to gfile, depending on whether -the path is a GCS 'pseudo-path' (i.e., it satisfies gcs.IsGCSPath) or not. -""" +"""IO helper functions.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -26,32 +20,16 @@ from __future__ import print_function import os from tensorflow.python.platform import gfile -from tensorflow.python.summary.impl import event_file_loader -from tensorflow.python.summary.impl import gcs -from tensorflow.python.summary.impl import gcs_file_loader -def CreateFileLoader(path): - """Creates a file loader for the given path. - - Args: - path: A string representing either a normal path or a GCS - Returns: - An object with a Load() method that yields event_pb2.Event protos. - """ - if gcs.IsGCSPath(path): - return gcs_file_loader.GCSFileLoader(path) - else: - return event_file_loader.EventFileLoader(path) +def IsGCSPath(path): + return path.startswith("gs://") def ListDirectoryAbsolute(directory): """Yields all files in the given directory. The paths are absolute.""" - if gcs.IsGCSPath(directory): - return gcs.ListDirectory(directory) - else: - return (os.path.join(directory, path) - for path in gfile.ListDirectory(directory)) + return (os.path.join(directory, path) + for path in gfile.ListDirectory(directory)) def ListRecursively(top): @@ -69,33 +47,6 @@ def ListRecursively(top): Yields: A list of (dir_path, file_paths) tuples. """ - if gcs.IsGCSPath(top): - for x in gcs.ListRecursively(top): - yield x - else: - for dir_path, _, filenames in gfile.Walk(top): - yield (dir_path, (os.path.join(dir_path, filename) - for filename in filenames)) - - -def IsDirectory(path): - """Returns true if path exists and is a directory.""" - if gcs.IsGCSPath(path): - return gcs.IsDirectory(path) - else: - return gfile.IsDirectory(path) - - -def Exists(path): - if gcs.IsGCSPath(path): - return gcs.Exists(path) - else: - return gfile.Exists(path) - - -def Size(path): - """Returns the number of bytes in the given file. Doesn't work on GCS.""" - if gcs.IsGCSPath(path): - raise NotImplementedError("io_wrapper.Size doesn't support GCS paths") - else: - return gfile.Open(path).size() + for dir_path, _, filenames in gfile.Walk(top): + yield (dir_path, (os.path.join(dir_path, filename) + for filename in filenames)) diff --git a/tensorflow/python/summary/summary.py b/tensorflow/python/summary/summary.py index 5dbde1c5477..a6b348cc991 100644 --- a/tensorflow/python/summary/summary.py +++ b/tensorflow/python/summary/summary.py @@ -33,6 +33,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import re as _re + import six from google.protobuf import json_format as _json_format @@ -56,16 +58,22 @@ def _collect(val, collections, default_collections): _ops.add_to_collection(key, val) +_INVALID_TAG_CHARACTERS = _re.compile(r'[^-/\w\.]') + def _clean_tag(name): # In the past, the first argument to summary ops was a tag, which allowed - # spaces. Since now we pass in the name, spaces are disallowed; to ease the - # transition and support backwards compatbility, we will convert the spaces - # to underscores (and also warn about it). - if name is not None and ' ' in name: - _logging.warning( - 'Summary tag name %s contains spaces; replacing with underscores.' % - name) - name = name.replace(' ', '_') + # arbitrary characters. Now we are changing the first argument to be the node + # name. This has a number of advantages (users of summary ops now can + # take advantage of the tf name scope system) but risks breaking existing + # usage, because a much smaller set of characters are allowed in node names. + # This function replaces all illegal characters with _s, and logs a warning. + if name is not None: + new_name = _INVALID_TAG_CHARACTERS.sub('_', name) + if new_name != name: + _logging.warning( + 'Summary tag name %s has illegal chars; replacing with underscores.' % + name) + name = new_name return name diff --git a/tensorflow/python/summary/summary_test.py b/tensorflow/python/summary/summary_test.py index bd819bbdfed..8acdcb0906b 100644 --- a/tensorflow/python/summary/summary_test.py +++ b/tensorflow/python/summary/summary_test.py @@ -85,6 +85,9 @@ class ScalarSummaryTest(tf.test.TestCase): s = tf.summary.scalar('name with spaces', c) self.assertEqual(s.op.name, 'name_with_spaces') + s2 = tf.summary.scalar('name with many $#illegal^: characters!', c) + self.assertEqual(s2.op.name, 'name_with_many___illegal___characters_') + if __name__ == '__main__': tf.test.main() diff --git a/tensorflow/python/training/basic_session_run_hooks.py b/tensorflow/python/training/basic_session_run_hooks.py index da930f2bdb9..d986a7b4263 100644 --- a/tensorflow/python/training/basic_session_run_hooks.py +++ b/tensorflow/python/training/basic_session_run_hooks.py @@ -88,8 +88,28 @@ class _SecondOrStepTimer(object): return False def update_last_triggered_step(self, step): - self._last_triggered_time = time.time() + """Update the last triggered time and step number. + + Args: + step: The current step. + + Returns: + A pair `(elapsed_time, elapsed_steps)`, where `elapsed_time` is the number + of seconds between the current trigger and the last one (a float), and + `elapsed_steps` is the number of steps between the current trigger and + the last one. Both values will be set to `None` on the first trigger. + """ + current_time = time.time() + if self._last_triggered_time is None: + elapsed_secs = None + elapsed_steps = None + else: + elapsed_secs = current_time - self._last_triggered_time + elapsed_steps = step - self._last_triggered_step + + self._last_triggered_time = current_time self._last_triggered_step = step + return (elapsed_secs, elapsed_steps) def last_triggered_step(self): return self._last_triggered_step @@ -272,16 +292,24 @@ class CheckpointSaverHook(session_run_hook.SessionRunHook): class StepCounterHook(session_run_hook.SessionRunHook): """Steps per second monitor.""" - def __init__(self, every_n_steps=100, output_dir=None, summary_writer=None): + def __init__(self, + every_n_steps=100, + every_n_secs=None, + output_dir=None, + summary_writer=None): self._summary_tag = "global_step/sec" - self._every_n_steps = every_n_steps + + if (every_n_steps is None) == (every_n_secs is None): + raise ValueError( + "exactly one of every_n_steps and every_n_secs should be provided.") + self._timer = _SecondOrStepTimer(every_steps=every_n_steps, + every_secs=every_n_secs) + self._summary_writer = summary_writer if summary_writer is None and output_dir: self._summary_writer = SummaryWriterCache.get(output_dir) def begin(self): - self._last_reported_time = None - self._last_reported_step = None self._global_step_tensor = training_util.get_global_step() if self._global_step_tensor is None: raise RuntimeError( @@ -294,22 +322,16 @@ class StepCounterHook(session_run_hook.SessionRunHook): _ = run_context global_step = run_values.results - current_time = time.time() - if self._last_reported_time is None: - self._last_reported_step = global_step - self._last_reported_time = current_time - else: - if global_step >= self._every_n_steps + self._last_reported_step: - added_steps = global_step - self._last_reported_step - elapsed_time = current_time - self._last_reported_time - steps_per_sec = added_steps / elapsed_time + if self._timer.should_trigger_for_step(global_step): + elapsed_time, elapsed_steps = self._timer.update_last_triggered_step( + global_step) + if elapsed_time is not None: + steps_per_sec = elapsed_steps / elapsed_time if self._summary_writer is not None: summary = Summary(value=[Summary.Value( tag=self._summary_tag, simple_value=steps_per_sec)]) self._summary_writer.add_summary(summary, global_step) logging.info("%s: %g", self._summary_tag, steps_per_sec) - self._last_reported_step = global_step - self._last_reported_time = current_time class NanLossDuringTrainingError(RuntimeError): diff --git a/tensorflow/python/training/basic_session_run_hooks_test.py b/tensorflow/python/training/basic_session_run_hooks_test.py index 77be27a4ff3..fbf0394c5a4 100644 --- a/tensorflow/python/training/basic_session_run_hooks_test.py +++ b/tensorflow/python/training/basic_session_run_hooks_test.py @@ -62,6 +62,21 @@ class SecondOrStepTimerTest(tf.test.TestCase): self.assertFalse(timer.should_trigger_for_step(3)) self.assertTrue(timer.should_trigger_for_step(4)) + def test_update_last_triggered_step(self): + timer = basic_session_run_hooks._SecondOrStepTimer(every_steps=1) + + elapsed_secs, elapsed_steps = timer.update_last_triggered_step(1) + self.assertEqual(None, elapsed_secs) + self.assertEqual(None, elapsed_steps) + + elapsed_secs, elapsed_steps = timer.update_last_triggered_step(5) + self.assertLess(0, elapsed_secs) + self.assertEqual(4, elapsed_steps) + + elapsed_secs, elapsed_steps = timer.update_last_triggered_step(7) + self.assertLess(0, elapsed_secs) + self.assertEqual(2, elapsed_steps) + class StopAtStepTest(tf.test.TestCase): @@ -297,7 +312,7 @@ class StepCounterHookTest(tf.test.TestCase): def tearDown(self): shutil.rmtree(self.log_dir, ignore_errors=True) - def test_step_counter(self): + def test_step_counter_every_n_steps(self): with tf.Graph().as_default() as g, tf.Session() as sess: global_step = tf.contrib.framework.get_or_create_global_step() train_op = tf.assign_add(global_step, 1) @@ -316,11 +331,41 @@ class StepCounterHookTest(tf.test.TestCase): expected_logdir=self.log_dir, expected_graph=g, expected_summaries={}) + self.assertItemsEqual([11, 21], summary_writer.summaries.keys()) for step in [11, 21]: summary_value = summary_writer.summaries[step][0].value[0] - self.assertTrue(summary_value.tag, 'global_step/sec') - # check at least 10 steps per sec is recorded. - self.assertGreater(summary_value.simple_value, 10) + self.assertEqual('global_step/sec', summary_value.tag) + self.assertGreater(summary_value.simple_value, 0) + + def test_step_counter_every_n_secs(self): + with tf.Graph().as_default() as g, tf.Session() as sess: + global_step = tf.contrib.framework.get_or_create_global_step() + train_op = tf.assign_add(global_step, 1) + summary_writer = testing.FakeSummaryWriter(self.log_dir, g) + hook = tf.train.StepCounterHook( + summary_writer=summary_writer, every_n_steps=None, every_n_secs=0.1) + + hook.begin() + sess.run(tf.initialize_all_variables()) + mon_sess = monitored_session._HookedSession(sess, [hook]) + mon_sess.run(train_op) + time.sleep(0.2) + mon_sess.run(train_op) + time.sleep(0.2) + mon_sess.run(train_op) + hook.end(sess) + + summary_writer.assert_summaries( + test_case=self, + expected_logdir=self.log_dir, + expected_graph=g, + expected_summaries={}) + self.assertTrue(summary_writer.summaries, 'No summaries were created.') + self.assertItemsEqual([2, 3], summary_writer.summaries.keys()) + for summary in summary_writer.summaries.values(): + summary_value = summary[0].value[0] + self.assertEqual('global_step/sec', summary_value.tag) + self.assertGreater(summary_value.simple_value, 0) class SummarySaverHookTest(tf.test.TestCase): diff --git a/tensorflow/python/training/input.py b/tensorflow/python/training/input.py index 2bdfb211608..c976f19775a 100644 --- a/tensorflow/python/training/input.py +++ b/tensorflow/python/training/input.py @@ -647,7 +647,6 @@ def batch(tensors, batch_size, num_threads=1, capacity=32, # TODO(josh11b,mrry): Switch to BatchQueue once it is written. queue = _which_queue(dynamic_pad)( capacity=capacity, dtypes=types, shapes=shapes, shared_name=shared_name) - print("Enqueueing: ", enqueue_many, tensor_list, shapes) _enqueue(queue, tensor_list, num_threads, enqueue_many) summary.scalar("queue/%s/fraction_of_%d_full" % (queue.name, capacity), math_ops.cast(queue.size(), dtypes.float32) * diff --git a/tensorflow/python/training/input_test.py b/tensorflow/python/training/input_test.py index 8cc15849398..07b8ac3ccf2 100644 --- a/tensorflow/python/training/input_test.py +++ b/tensorflow/python/training/input_test.py @@ -701,37 +701,37 @@ class BatchTest(tf.test.TestCase): def testBatchedSparseTensorInferredShape(self): sparse = tf.SparseTensor(indices=[[0]], values=[1.0], shape=[1]) - self.assertAllEqual(sparse.shape.get_shape().as_list(), [1]) + self.assertAllEqual((1,), sparse.shape.get_shape().as_list()) batched = tf.train.batch([sparse], batch_size=2) - self.assertAllEqual(batched.shape.get_shape().as_list(), [2]) + self.assertAllEqual((2,), batched.shape.get_shape().as_list()) def testBatchedSparseTensorInferredShapeEnqueueMany(self): sparse = tf.SparseTensor(indices=[[0]], values=[1.0], shape=[1]) - self.assertAllEqual(sparse.shape.get_shape().as_list(), [1]) + self.assertAllEqual((1,), sparse.shape.get_shape().as_list()) batched = tf.train.batch([sparse], batch_size=2, enqueue_many=True) - self.assertAllEqual(batched.shape.get_shape().as_list(), [1]) + self.assertAllEqual((1,), batched.shape.get_shape().as_list()) def testBatchedSparseTensorInferredShapeUnknownRank(self): sparse = tf.SparseTensor( indices=tf.placeholder(tf.int64), values=tf.placeholder(tf.float32), shape=tf.placeholder(tf.int64)) - self.assertIs(sparse.shape.get_shape().num_elements(), None) + self.assertIs(None, sparse.shape.get_shape().num_elements()) batched = tf.train.batch([sparse], batch_size=2) - self.assertIs(batched.shape.get_shape().num_elements(), None) + self.assertIs(None, batched.shape.get_shape().num_elements()) def testBatchedSparseTensorInferredShapeUnknownRankEnqueueMany(self): sparse = tf.SparseTensor( indices=tf.placeholder(tf.int64), values=tf.placeholder(tf.float32), shape=tf.placeholder(tf.int64)) - self.assertIs(sparse.shape.get_shape().num_elements(), None) + self.assertIs(None, sparse.shape.get_shape().num_elements()) batched = tf.train.batch([sparse], batch_size=2, enqueue_many=True) - self.assertIs(batched.shape.get_shape().num_elements(), None) + self.assertIs(None, batched.shape.get_shape().num_elements()) def testSingleElementDict(self): x = tf.train.batch({"c": [12, 12]}, batch_size=8) - self.assertEqual([8, 2], x["c"].get_shape().as_list()) + self.assertAllEqual((8, 2), x["c"].get_shape().as_list()) class BatchJoinTest(tf.test.TestCase): @@ -771,6 +771,17 @@ class BatchJoinTest(tf.test.TestCase): [ninety_nine, sparse_ninety_nine, "b"]], batch_size=batch_size) batched_fetch = batched + + # Shapes. + self.assertEqual(3, len(batched_fetch)) + self.assertAllEqual((batch_size,), batched_fetch[0].get_shape().as_list()) + self.assertAllEqual( + (None, 2), batched_fetch[1].indices.get_shape().as_list()) + self.assertAllEqual( + (None,), batched_fetch[1].values.get_shape().as_list()) + self.assertAllEqual((2,), batched_fetch[1].shape.get_shape().as_list()) + self.assertAllEqual((batch_size,), batched_fetch[2].get_shape().as_list()) + tf.initialize_all_variables().run() tf.initialize_local_variables().run() threads = tf.train.start_queue_runners() @@ -782,9 +793,9 @@ class BatchJoinTest(tf.test.TestCase): num_batches = (num_a + num_b) // batch_size for i in range(num_batches): results = sess.run(batched_fetch) - tf.logging.info("Batch %d: %s", i, results[0]) - self.assertEqual(len(results[0]), batch_size) - self.assertEqual(len(results[2]), batch_size) + self.assertEqual(3, len(results)) + self.assertEqual(batch_size, len(results[0])) + self.assertEqual(batch_size, len(results[2])) self.assertAllEqual(results[0], results[1].values) self.assertAllEqual( results[1].indices, @@ -846,6 +857,12 @@ class BatchJoinTest(tf.test.TestCase): [[counter, a], [ninety_nine, b]], batch_size=batch_size, dynamic_pad=True) + + # Shapes. + self.assertEqual(2, len(batched)) + self.assertAllEqual((batch_size,), batched[0].get_shape().as_list()) + self.assertAllEqual((batch_size, None), batched[1].get_shape().as_list()) + tf.initialize_all_variables().run() tf.initialize_local_variables().run() threads = tf.train.start_queue_runners() @@ -858,7 +875,7 @@ class BatchJoinTest(tf.test.TestCase): num_batches = (num_a + num_b) // batch_size for i in range(num_batches): results = sess.run(batched) - tf.logging.info("Batch %d: %s", i, results[0]) + self.assertEqual(2, len(results)) self.assertEqual(len(results[0]), batch_size) self.assertEqual(len(results[1]), batch_size) for s in results[1]: @@ -920,6 +937,14 @@ class BatchJoinTest(tf.test.TestCase): batch_size=batch_size, allow_smaller_final_batch=True) + # Shapes. + self.assertEqual(3, len(batched)) + self.assertAllEqual((None,), batched[0].get_shape().as_list()) + self.assertAllEqual((None, 2), batched[1].indices.get_shape().as_list()) + self.assertAllEqual((None,), batched[1].values.get_shape().as_list()) + self.assertAllEqual((2,), batched[1].shape.get_shape().as_list()) + self.assertAllEqual((None,), batched[2].get_shape().as_list()) + tf.initialize_all_variables().run() tf.initialize_local_variables().run() threads = tf.train.start_queue_runners() @@ -1003,6 +1028,12 @@ class BatchJoinTest(tf.test.TestCase): batch_size=batch_size, dynamic_pad=True, allow_smaller_final_batch=True) + + # Shapes. + self.assertEqual(2, len(batched)) + self.assertAllEqual((None,), batched[0].get_shape().as_list()) + self.assertAllEqual((None, None), batched[1].get_shape().as_list()) + tf.initialize_all_variables().run() tf.initialize_local_variables().run() threads = tf.train.start_queue_runners() @@ -1075,6 +1106,11 @@ class BatchJoinTest(tf.test.TestCase): [[counter, "string"]], batch_size=batch_size, shared_name="SHARED_NAME_XYZ", name="Q") + # Shapes. + self.assertEqual(2, len(batched)) + self.assertAllEqual((batch_size,), batched[0].get_shape().as_list()) + self.assertAllEqual((batch_size,), batched[1].get_shape().as_list()) + self.assertProtoEquals( "s: 'SHARED_NAME_XYZ'", batched[0].op.inputs[0].op.node_def.attr["shared_name"]) @@ -1087,7 +1123,7 @@ class BatchJoinTest(tf.test.TestCase): def testSingleElementDict(self): x = tf.train.batch_join([{"c": [12, 12]}], batch_size=8) - self.assertEqual([8, 2], x["c"].get_shape().as_list()) + self.assertAllEqual((8, 2), x["c"].get_shape().as_list()) class ShuffleBatchTest(tf.test.TestCase): @@ -1356,6 +1392,16 @@ class ShuffleBatchJoinTest(tf.test.TestCase): min_after_dequeue=16, seed=223607) batched_fetch = batched + # Shapes. + self.assertEqual(3, len(batched_fetch)) + self.assertAllEqual((batch_size,), batched_fetch[0].get_shape().as_list()) + self.assertAllEqual( + (None, 2), batched_fetch[1].indices.get_shape().as_list()) + self.assertAllEqual( + (None,), batched_fetch[1].values.get_shape().as_list()) + self.assertAllEqual((2,), batched_fetch[1].shape.get_shape().as_list()) + self.assertAllEqual((batch_size,), batched_fetch[2].get_shape().as_list()) + tf.initialize_all_variables().run() tf.initialize_local_variables().run() threads = tf.train.start_queue_runners() @@ -1367,7 +1413,7 @@ class ShuffleBatchJoinTest(tf.test.TestCase): num_batches = (num_a + num_b) // batch_size for i in range(num_batches): results = sess.run(batched_fetch) - tf.logging.info("Batch %d: %s", i, results[0]) + self.assertEqual(3, len(results)) self.assertEqual(len(results[0]), batch_size) self.assertEqual(len(results[2]), batch_size) self.assertAllEqual(results[0], results[1].values) @@ -1436,6 +1482,14 @@ class ShuffleBatchJoinTest(tf.test.TestCase): batch_size=batch_size, capacity=32, min_after_dequeue=16, seed=223607, allow_smaller_final_batch=True) + # Shapes. + self.assertEqual(3, len(batched)) + self.assertAllEqual((None,), batched[0].get_shape().as_list()) + self.assertAllEqual((None, 2), batched[1].indices.get_shape().as_list()) + self.assertAllEqual((None,), batched[1].values.get_shape().as_list()) + self.assertAllEqual((2,), batched[1].shape.get_shape().as_list()) + self.assertAllEqual((None,), batched[2].get_shape().as_list()) + tf.initialize_all_variables().run() tf.initialize_local_variables().run() threads = tf.train.start_queue_runners() @@ -1518,6 +1572,11 @@ class ShuffleBatchJoinTest(tf.test.TestCase): min_after_dequeue=10, shared_name="SHARED_NAME_XYZ", name="Q") + # Shapes. + self.assertEqual(2, len(batched)) + self.assertAllEqual((batch_size,), batched[0].get_shape().as_list()) + self.assertAllEqual((batch_size,), batched[1].get_shape().as_list()) + self.assertProtoEquals( "s: 'SHARED_NAME_XYZ'", batched[0].op.inputs[0].op.node_def.attr["shared_name"]) diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py index 0a08a5b5aac..9a331e69a79 100644 --- a/tensorflow/python/training/saver.py +++ b/tensorflow/python/training/saver.py @@ -1000,6 +1000,7 @@ class Saver(object): self.build() if self.saver_def: self._check_saver_def() + self._write_version = self.saver_def.version def build(self): """Builds saver_def.""" @@ -1461,8 +1462,8 @@ def latest_checkpoint(checkpoint_dir, latest_filename=None): return None -def import_meta_graph(meta_graph_or_file, import_scope=None, - **kwargs): +def import_meta_graph(meta_graph_or_file, clear_devices=False, + import_scope=None, **kwargs): """Recreates a Graph saved in a `MetaGraphDef` proto. This function takes a `MetaGraphDef` protocol buffer as input. If @@ -1516,6 +1517,8 @@ def import_meta_graph(meta_graph_or_file, import_scope=None, Args: meta_graph_or_file: `MetaGraphDef` protocol buffer or filename (including the path) containing a `MetaGraphDef`. + clear_devices: Whether or not to clear the device field for an `Operation` + or `Tensor` during import. import_scope: Optional `string`. Name scope to add. Only used when initializing from protocol buffer. **kwargs: Optional keyed arguments. @@ -1532,6 +1535,7 @@ def import_meta_graph(meta_graph_or_file, import_scope=None, meta_graph_def = meta_graph_or_file meta_graph.import_scoped_meta_graph(meta_graph_def, + clear_devices=clear_devices, import_scope=import_scope, **kwargs) if meta_graph_def.HasField("saver_def"): diff --git a/tensorflow/python/training/saver_large_variable_test.py b/tensorflow/python/training/saver_large_variable_test.py index 40f0a47e430..1e6d9e0c770 100644 --- a/tensorflow/python/training/saver_large_variable_test.py +++ b/tensorflow/python/training/saver_large_variable_test.py @@ -37,7 +37,8 @@ class SaverLargeVariableTest(tf.test.TestCase): with tf.device("/cpu:0"): var = tf.Variable( tf.constant(False, shape=[2, 1024, 1024, 1024], dtype=tf.bool)) - save = tf.train.Saver({var.op.name: var}) + save = tf.train.Saver({var.op.name: var}, + write_version=tf.train.SaverDef.V1) var.initializer.run() with self.assertRaisesRegexp( tf.errors.InvalidArgumentError, diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py index 987b7164d65..23bd61c384e 100644 --- a/tensorflow/python/training/saver_test.py +++ b/tensorflow/python/training/saver_test.py @@ -1590,12 +1590,40 @@ class MetaGraphTest(tf.test.TestCase): new_saver = tf.train.import_meta_graph( filename + ".meta", graph=graph, import_scope="new_model") new_saver.restore(sess, filename) - tf.train.write_graph(graph, "/tmp", "new_graph.pbtxt", as_text=True) - label = [0] * 10 - label[4] = 4 sess.run(["new_model/optimize"], {"new_model/image:0": np.random.random([1, 784]), - "new_model/label:0": np.reshape(label, [1, 10])}) + "new_model/label:0": + np.random.random_integers(10, size=[1, 10])}) + + def testClearDevices(self): + # Test that we import a graph without its devices and run successfully. + with tf.Graph().as_default(): + with tf.device("/job:ps/replica:0/task:0/device:GPU:0"): + image = tf.placeholder(tf.float32, [None, 784], name="image") + label = tf.placeholder(tf.float32, [None, 10], name="label") + weights = tf.Variable(tf.random_uniform([784, 10]), name="weights") + bias = tf.Variable(tf.zeros([10]), name="bias") + logit = tf.nn.relu(tf.matmul(image, weights) + bias) + tf.nn.softmax(logit, name="prediction") + cost = tf.nn.softmax_cross_entropy_with_logits(logit, label) + tf.train.AdamOptimizer().minimize(cost, name="optimize") + meta_graph_def = tf.train.export_meta_graph() + + with tf.Session(graph=tf.Graph()) as sess: + tf.train.import_meta_graph( + meta_graph_def, clear_devices=False, import_scope="new_model") + with self.assertRaisesRegexp(tf.errors.InvalidArgumentError, + "Cannot assign a device to node"): + sess.run(tf.initialize_all_variables()) + + with tf.Session(graph=tf.Graph()) as sess: + tf.train.import_meta_graph( + meta_graph_def, clear_devices=True, import_scope="new_model") + sess.run(tf.initialize_all_variables()) + sess.run(["new_model/optimize"], + {"new_model/image:0": np.random.random([1, 784]), + "new_model/label:0": + np.random.random_integers(10, size=[1, 10])}) class CheckpointReaderTest(tf.test.TestCase): diff --git a/tensorflow/python/training/training.py b/tensorflow/python/training/training.py index a8484c4ae17..1a11eb86f8d 100644 --- a/tensorflow/python/training/training.py +++ b/tensorflow/python/training/training.py @@ -67,6 +67,10 @@ gradients. ## Decaying the learning rate @@exponential_decay +@@inverse_time_decay +@@natural_exp_decay +@@piecewise_constant +@@polynomial_decay ## Moving Averages diff --git a/tensorflow/tensorboard/TAG b/tensorflow/tensorboard/TAG index f5c89552bd3..bb95160cb6e 100644 --- a/tensorflow/tensorboard/TAG +++ b/tensorflow/tensorboard/TAG @@ -1 +1 @@ -32 +33 diff --git a/tensorflow/tensorboard/backend/server.py b/tensorflow/tensorboard/backend/server.py index 6c2f51e14a2..f590b5e02f4 100644 --- a/tensorflow/tensorboard/backend/server.py +++ b/tensorflow/tensorboard/backend/server.py @@ -32,7 +32,7 @@ from six.moves import socketserver from tensorflow.python.platform import tf_logging as logging from tensorflow.python.summary import event_accumulator -from tensorflow.python.summary.impl import gcs +from tensorflow.python.summary.impl import io_wrapper from tensorflow.tensorboard.backend import handler # How many elements to store per tag, by tag type @@ -69,7 +69,8 @@ def ParseEventFilesSpec(logdir): return files for specification in logdir.split(','): # If it's a gcs or hdfs path, don't split on colon - if gcs.IsGCSPath(specification) or specification.startswith('hdfs://'): + if (io_wrapper.IsGCSPath(specification) or + specification.startswith('hdfs://')): run_name = None path = specification # If the spec looks like /foo:bar/baz, then we assume it's a path with a @@ -80,7 +81,7 @@ def ParseEventFilesSpec(logdir): else: run_name = None path = specification - if not (gcs.IsGCSPath(path) or path.startswith('hdfs://')): + if not (io_wrapper.IsGCSPath(path) or path.startswith('hdfs://')): path = os.path.realpath(path) files[path] = run_name return files @@ -120,14 +121,6 @@ def StartMultiplexerReloadingThread(multiplexer, path_to_run, load_interval): """ # We don't call multiplexer.Reload() here because that would make # AddRunsFromDirectory block until the runs have all loaded. - for path in path_to_run.keys(): - if gcs.IsGCSPath(path): - gcs.CheckIsSupported() - logging.info( - 'Assuming %s is intended to be a Google Cloud Storage path because ' - 'it starts with %s. If it isn\'t, prefix it with \'/.\' (i.e., use ' - '/.%s instead)', path, gcs.PATH_PREFIX, path) - def _ReloadForever(): while True: ReloadMultiplexer(multiplexer, path_to_run) diff --git a/tensorflow/tensorboard/components/tf_dashboard_common/tf-multi-checkbox.html b/tensorflow/tensorboard/components/tf_dashboard_common/tf-multi-checkbox.html index e934e8a9181..dadad81a343 100644 --- a/tensorflow/tensorboard/components/tf_dashboard_common/tf-multi-checkbox.html +++ b/tensorflow/tensorboard/components/tf_dashboard_common/tf-multi-checkbox.html @@ -43,7 +43,8 @@ handle these situations gracefully. id="runs-regex" no-label-float label="Write a regex to filter runs" - value="{{regexInput}}" + value="[[regexInput]]" + on-bind-value-changed="_debouncedRegexChange" >
+ - diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.html b/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.html index eec1afe9e66..7e9c20294a2 100644 --- a/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.html +++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.html @@ -86,7 +86,7 @@ paper-dropdown-menu paper-item { color: black; display: flex; font-weight: 500; - height: 50px; + height: 59px; padding-left: 20px; } diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.ts index 068035148c5..342144c245a 100644 --- a/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.ts +++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-data-panel.ts @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -import {ColorOption, ColumnStats} from './data'; -import {CheckpointInfo, DataProvider, parseRawMetadata, parseRawTensors} from './data-loader'; +import {ColorOption, ColumnStats, MetadataInfo} from './data'; +import {CheckpointInfo, DataProvider, parseRawMetadata, parseRawTensors} from './data-provider'; import {Projector} from './vz-projector'; import {ColorLegendRenderInfo, ColorLegendThreshold} from './vz-projector-legend'; // tslint:disable-next-line:no-unused-variable @@ -75,8 +75,8 @@ export class DataPanel extends DataPanelPolymer { // Get all the runs. this.dataProvider.retrieveRuns(runs => { this.runNames = runs; - // If there is only 1 run, choose that one by default. - if (this.runNames.length === 1) { + // Choose the first run by default. + if (this.runNames.length > 0) { this.selectedRun = runs[0]; } }); @@ -86,23 +86,23 @@ export class DataPanel extends DataPanelPolymer { return isSeparator ? 'separator' : null; } - updateMetadataUI(columnStats: ColumnStats[], metadataFile: string) { + metadataChanged(metadata: MetadataInfo, metadataFile: string) { + this.updateMetadataUI(metadata.stats, metadataFile); + } + + private updateMetadataUI(columnStats: ColumnStats[], metadataFile: string) { this.dom.select('#metadata-file') .text(metadataFile) .attr('title', metadataFile); // Label by options. let labelIndex = -1; - if (columnStats.length > 1) { - this.labelOptions = columnStats.map((stats, i) => { - // Make the default label by the first non-numeric column. - if (!stats.isNumeric && labelIndex === -1) { - labelIndex = i; - } - return stats.name; - }); - } else { - this.labelOptions = ['label']; - } + this.labelOptions = columnStats.map((stats, i) => { + // Make the default label by the first non-numeric column. + if (!stats.isNumeric && labelIndex === -1) { + labelIndex = i; + } + return stats.name; + }); this.selectedLabelOption = this.labelOptions[Math.max(0, labelIndex)]; // Color by options. @@ -170,11 +170,10 @@ export class DataPanel extends DataPanelPolymer { if (metadataFile) { this.dataProvider.retrieveMetadata( this.selectedRun, this.selectedTensor, metadata => { - this.projector.updateDataSet(ds, metadata); - this.updateMetadataUI(metadata.stats, metadataFile); + this.projector.updateDataSet(ds, metadata, metadataFile); }); } else { - this.projector.updateDataSet(ds, null); + this.projector.updateDataSet(ds); } }); this.projector.setSelectedTensor( @@ -208,7 +207,13 @@ export class DataPanel extends DataPanelPolymer { .text(this.checkpointInfo.checkpointFile) .attr('title', this.checkpointInfo.checkpointFile); this.dataProvider.getDefaultTensor(this.selectedRun, defaultTensor => { - this.selectedTensor = defaultTensor; + if (this.selectedTensor === defaultTensor) { + // Explicitly call the observer. Polymer won't call it if the previous + // string matches the current string. + this._selectedTensorChanged(); + } else { + this.selectedTensor = defaultTensor; + } }); }); } @@ -254,14 +259,13 @@ export class DataPanel extends DataPanelPolymer { this.dom.select('#checkpoint-file') .text(fileName) .attr('title', fileName); - this.projector.updateDataSet(ds, null); + this.projector.updateDataSet(ds); }); } private metadataWasReadFromFile(rawContents: string, fileName: string) { parseRawMetadata(rawContents, metadata => { - this.projector.updateDataSet(this.projector.currentDataSet, metadata); - this.updateMetadataUI(metadata.stats, fileName); + this.projector.updateDataSet(this.projector.dataSet, metadata, fileName); }); } diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-input.ts b/tensorflow/tensorboard/components/vz_projector/vz-projector-input.ts index 35630412606..6270185dd4a 100644 --- a/tensorflow/tensorboard/components/vz_projector/vz-projector-input.ts +++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-input.ts @@ -27,51 +27,53 @@ export interface InputChangedListener { /** Input control with custom capabilities (e.g. regex). */ export class ProjectorInput extends PolymerClass { private dom: d3.Selection; - private inputChangedListeners: InputChangedListener[]; + private textChangedListeners: InputChangedListener[]; private paperInput: HTMLInputElement; + private inRegexModeButton: HTMLButtonElement; private inRegexMode: boolean; /** Message that will be displayed at the bottom of the input control. */ message: string; - /** Placeholder text for the input control. */ - label: string; /** Subscribe to be called everytime the input changes. */ - onInputChanged(listener: InputChangedListener) { - this.inputChangedListeners.push(listener); + registerInputChangedListener(listener: InputChangedListener) { + this.textChangedListeners.push(listener); } ready() { this.inRegexMode = false; - this.inputChangedListeners = []; + this.textChangedListeners = []; this.dom = d3.select(this); this.paperInput = this.querySelector('paper-input') as HTMLInputElement; - let paperButton = this.querySelector('paper-button') as HTMLButtonElement; + this.inRegexModeButton = + this.querySelector('paper-button') as HTMLButtonElement; this.paperInput.setAttribute('error-message', 'Invalid regex'); this.paperInput.addEventListener('input', () => { - this.inputChanged(); + this.onTextChanged(); }); this.paperInput.addEventListener('keydown', event => { event.stopPropagation(); }); - // Setup the regex mode button. - paperButton.addEventListener('click', () => { - this.inRegexMode = (paperButton as any).active; - this.showHideSlashes(); - this.inputChanged(); - }); - this.showHideSlashes(); - this.inputChanged(); + this.inRegexModeButton.addEventListener( + 'click', () => this.onClickRegexModeButton()); + this.updateRegexModeDisplaySlashes(); + this.onTextChanged(); + } + + private onClickRegexModeButton() { + this.inRegexMode = (this.inRegexModeButton as any).active; + this.updateRegexModeDisplaySlashes(); + this.onTextChanged(); } private notifyInputChanged(value: string, inRegexMode: boolean) { - this.inputChangedListeners.forEach(l => l(value, inRegexMode)); + this.textChangedListeners.forEach(l => l(value, inRegexMode)); } - private inputChanged() { + private onTextChanged() { try { if (this.inRegexMode) { new RegExp(this.paperInput.value); @@ -86,7 +88,7 @@ export class ProjectorInput extends PolymerClass { this.notifyInputChanged(this.paperInput.value, this.inRegexMode); } - private showHideSlashes() { + private updateRegexModeDisplaySlashes() { d3.select(this.paperInput) .selectAll('.slash') .style('display', this.inRegexMode ? null : 'none'); @@ -99,6 +101,12 @@ export class ProjectorInput extends PolymerClass { getInRegexMode(): boolean { return this.inRegexMode; } + + set(value: string, inRegexMode: boolean) { + (this.inRegexModeButton as any).active = inRegexMode; + this.paperInput.value = value; + this.onClickRegexModeButton(); + } } document.registerElement(ProjectorInput.prototype.is, ProjectorInput); diff --git a/tensorflow/tensorboard/components/vz_projector/vz-projector-inspector-panel.html b/tensorflow/tensorboard/components/vz_projector/vz-projector-inspector-panel.html index cd888369ea0..7554c322cef 100644 --- a/tensorflow/tensorboard/components/vz_projector/vz-projector-inspector-panel.html +++ b/tensorflow/tensorboard/components/vz_projector/vz-projector-inspector-panel.html @@ -54,6 +54,19 @@ limitations under the License. margin-right: 0; } +.nn { + display: flex; + flex-direction: column; +} + +.nn > * { + padding: 0 20px; +} + +.nn-list { + overflow-y: auto; +} + .nn-list .neighbor { font-size: 12px; margin-bottom: 8px; @@ -154,6 +167,10 @@ limitations under the License. margin-right: 10px; } +.matches-list { + padding: 0 20px; +} + .matches-list .row { border-bottom: 1px solid #ddd; cursor: pointer; @@ -164,8 +181,8 @@ limitations under the License. } .results { - overflow-y: auto; - padding: 0 20px; + display: flex; + flex-direction: column; }