diff --git a/configure b/configure index 426071e48d0..08078d29d5c 100755 --- a/configure +++ b/configure @@ -109,7 +109,7 @@ fi ## Find swig path if [ -z "$SWIG_PATH" ]; then - SWIG_PATH=`type -p swig 2> /dev/null` + SWIG_PATH=`type -p swig 2> /dev/null || true` fi if [[ ! -e "$SWIG_PATH" ]]; then echo "Can't find swig. Ensure swig is in \$PATH or set \$SWIG_PATH." diff --git a/tensorflow/cc/saved_model/BUILD b/tensorflow/cc/saved_model/BUILD index e0cdf06938c..bb2a6063b5c 100644 --- a/tensorflow/cc/saved_model/BUILD +++ b/tensorflow/cc/saved_model/BUILD @@ -28,7 +28,6 @@ cc_library( deps = [ ":constants", "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core:tensorflow", @@ -45,7 +44,9 @@ tf_cc_test( deps = [ ":constants", ":loader", + ":signature_constants", "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", diff --git a/tensorflow/cc/saved_model/constants.h b/tensorflow/cc/saved_model/constants.h index fba1dca5345..b97f6c84faf 100644 --- a/tensorflow/cc/saved_model/constants.h +++ b/tensorflow/cc/saved_model/constants.h @@ -18,6 +18,12 @@ limitations under the License. namespace tensorflow { +// SavedModel assets directory. +constexpr char kSavedModelAssetsDirectory[] = "assets"; + +// SavedModel assets key for graph collection-def. +constexpr char kSavedModelAssetsKey[] = "saved_model_assets"; + // SavedModel proto filename. constexpr char kSavedModelFilenamePb[] = "saved_model.pb"; diff --git a/tensorflow/cc/saved_model/loader_test.cc b/tensorflow/cc/saved_model/loader_test.cc index b3366dec4a3..fc21266518f 100644 --- a/tensorflow/cc/saved_model/loader_test.cc +++ b/tensorflow/cc/saved_model/loader_test.cc @@ -16,6 +16,9 @@ limitations under the License. #include "tensorflow/cc/saved_model/loader.h" #include "tensorflow/cc/saved_model/constants.h" +#include "tensorflow/cc/saved_model/signature_constants.h" +#include "tensorflow/core/example/example.pb.h" +#include "tensorflow/core/example/feature.pb.h" #include "tensorflow/core/framework/tensor_testutil.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/status_test_util.h" @@ -34,17 +37,35 @@ class LoaderTest : public ::testing::Test { protected: LoaderTest() {} - void CheckSavedModelBundle(const SavedModelBundle& bundle) { - // Validate the half plus two behavior. - Tensor input = test::AsTensor({0, 1, 2, 3}, TensorShape({4, 1})); + string MakeSerializedExample(float x) { + tensorflow::Example example; + auto* feature_map = example.mutable_features()->mutable_feature(); + (*feature_map)["x"].mutable_float_list()->add_value(x); + return example.SerializeAsString(); + } + + void CheckSavedModelBundle(const string& export_dir, + const SavedModelBundle& bundle) { + const string asset_path = + io::JoinPath(export_dir, kSavedModelAssetsDirectory, "foo.txt"); + EXPECT_TRUE(Env::Default()->FileExists(asset_path)); // Retrieve the regression signature from meta graph def. const auto signature_def_map = bundle.meta_graph_def.signature_def(); - const auto signature_def = signature_def_map.at("regression"); + const auto signature_def = signature_def_map.at(kRegressMethodName); - const string input_name = signature_def.inputs().at("input").name(); - const string output_name = signature_def.outputs().at("output").name(); + const string input_name = signature_def.inputs().at(kRegressInputs).name(); + const string output_name = + signature_def.outputs().at(kRegressOutputs).name(); + std::vector serialized_examples; + for (float x : {0, 1, 2, 3}) { + serialized_examples.push_back(MakeSerializedExample(x)); + } + + // Validate the half plus two behavior. + Tensor input = + test::AsTensor(serialized_examples, TensorShape({4})); std::vector outputs; TF_ASSERT_OK(bundle.session->Run({{input_name, input}}, {output_name}, {}, &outputs)); @@ -65,11 +86,11 @@ TEST_F(LoaderTest, ResourceLeakTest) { RunOptions run_options; const string export_dir = - io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataPb); + io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataSharded); for (int i = 0; i < 100; ++i) { TF_ASSERT_OK(LoadSavedModel(session_options, run_options, export_dir, {kSavedModelTagServe}, &bundle)); - CheckSavedModelBundle(bundle); + CheckSavedModelBundle(export_dir, bundle); } } @@ -79,10 +100,10 @@ TEST_F(LoaderTest, TagMatch) { RunOptions run_options; const string export_dir = - io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataPb); + io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataSharded); TF_ASSERT_OK(LoadSavedModel(session_options, run_options, export_dir, {kSavedModelTagServe}, &bundle)); - CheckSavedModelBundle(bundle); + CheckSavedModelBundle(export_dir, bundle); } TEST_F(LoaderTest, NoTagMatch) { @@ -91,7 +112,7 @@ TEST_F(LoaderTest, NoTagMatch) { SessionOptions session_options; const string export_dir = - io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataPb); + io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataSharded); Status st = LoadSavedModel(session_options, run_options, export_dir, {"missing-tag"}, &bundle); EXPECT_FALSE(st.ok()); @@ -107,7 +128,7 @@ TEST_F(LoaderTest, NoTagMatchMultiple) { SessionOptions session_options; const string export_dir = - io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataPb); + io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataSharded); Status st = LoadSavedModel(session_options, run_options, export_dir, {kSavedModelTagServe, "missing-tag"}, &bundle); EXPECT_FALSE(st.ok()); @@ -126,19 +147,19 @@ TEST_F(LoaderTest, PbtxtFormat) { io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataPbTxt); TF_ASSERT_OK(LoadSavedModel(session_options, run_options, export_dir, {kSavedModelTagServe}, &bundle)); - CheckSavedModelBundle(bundle); + CheckSavedModelBundle(export_dir, bundle); } -TEST_F(LoaderTest, ShardedVariables) { +TEST_F(LoaderTest, SingleShardVariables) { SavedModelBundle bundle; SessionOptions session_options; RunOptions run_options; const string export_dir = - io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataSharded); + io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataPb); TF_ASSERT_OK(LoadSavedModel(session_options, run_options, export_dir, {kSavedModelTagServe}, &bundle)); - CheckSavedModelBundle(bundle); + CheckSavedModelBundle(export_dir, bundle); } TEST_F(LoaderTest, InvalidExportPath) { @@ -156,7 +177,7 @@ TEST_F(LoaderTest, InvalidExportPath) { TEST_F(LoaderTest, MaybeSavedModelDirectory) { // Valid SavedModel directory. const string export_dir = - io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataPb); + io::JoinPath(testing::TensorFlowSrcRoot(), kTestDataSharded); EXPECT_TRUE(MaybeSavedModelDirectory(export_dir)); // Directory that does not exist. diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two/assets/foo.txt b/tensorflow/cc/saved_model/testdata/half_plus_two/assets/foo.txt new file mode 100644 index 00000000000..f9ff0366880 --- /dev/null +++ b/tensorflow/cc/saved_model/testdata/half_plus_two/assets/foo.txt @@ -0,0 +1 @@ +asset-file-contents \ No newline at end of file diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two/saved_model.pb b/tensorflow/cc/saved_model/testdata/half_plus_two/saved_model.pb index 5a2dd4dd841..d0f0853aa87 100644 Binary files a/tensorflow/cc/saved_model/testdata/half_plus_two/saved_model.pb and b/tensorflow/cc/saved_model/testdata/half_plus_two/saved_model.pb differ diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two/variables/checkpoint b/tensorflow/cc/saved_model/testdata/half_plus_two/variables/checkpoint deleted file mode 100644 index 88f46487280..00000000000 --- a/tensorflow/cc/saved_model/testdata/half_plus_two/variables/checkpoint +++ /dev/null @@ -1,2 +0,0 @@ -model_checkpoint_path: "/tmp/saved_model/half_plus_two/variables/variables" -all_model_checkpoint_paths: "/tmp/saved_model/half_plus_two/variables/variables" diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/assets/foo.txt b/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/assets/foo.txt new file mode 100644 index 00000000000..f9ff0366880 --- /dev/null +++ b/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/assets/foo.txt @@ -0,0 +1 @@ +asset-file-contents \ No newline at end of file diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/saved_model.pbtxt b/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/saved_model.pbtxt index 30c2c25a197..2e714d262db 100644 --- a/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/saved_model.pbtxt +++ b/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/saved_model.pbtxt @@ -140,6 +140,88 @@ meta_graphs { op { name: "NoOp" } + op { + name: "ParseExample" + input_arg { + name: "serialized" + type: DT_STRING + } + input_arg { + name: "names" + type: DT_STRING + } + input_arg { + name: "sparse_keys" + type: DT_STRING + number_attr: "Nsparse" + } + input_arg { + name: "dense_keys" + type: DT_STRING + number_attr: "Ndense" + } + input_arg { + name: "dense_defaults" + type_list_attr: "Tdense" + } + output_arg { + name: "sparse_indices" + type: DT_INT64 + number_attr: "Nsparse" + } + output_arg { + name: "sparse_values" + type_list_attr: "sparse_types" + } + output_arg { + name: "sparse_shapes" + type: DT_INT64 + number_attr: "Nsparse" + } + output_arg { + name: "dense_values" + type_list_attr: "Tdense" + } + attr { + name: "Nsparse" + type: "int" + has_minimum: true + } + attr { + name: "Ndense" + type: "int" + has_minimum: true + } + attr { + name: "sparse_types" + type: "list(type)" + has_minimum: true + allowed_values { + list { + type: DT_FLOAT + type: DT_INT64 + type: DT_STRING + } + } + } + attr { + name: "Tdense" + type: "list(type)" + has_minimum: true + allowed_values { + list { + type: DT_FLOAT + type: DT_INT64 + type: DT_STRING + } + } + } + attr { + name: "dense_shapes" + type: "list(shape)" + has_minimum: true + } + } op { name: "Placeholder" output_arg { @@ -160,33 +242,28 @@ meta_graphs { } } op { - name: "RestoreSlice" + name: "RestoreV2" input_arg { - name: "file_pattern" + name: "prefix" type: DT_STRING } input_arg { - name: "tensor_name" + name: "tensor_names" type: DT_STRING } input_arg { - name: "shape_and_slice" + name: "shape_and_slices" type: DT_STRING } output_arg { - name: "tensor" - type_attr: "dt" + name: "tensors" + type_list_attr: "dtypes" } attr { - name: "dt" - type: "type" - } - attr { - name: "preferred_shard" - type: "int" - default_value { - i: -1 - } + name: "dtypes" + type: "list(type)" + has_minimum: true + minimum: 1 } } op { @@ -214,6 +291,40 @@ meta_graphs { minimum: 1 } } + op { + name: "ShardedFilename" + input_arg { + name: "basename" + type: DT_STRING + } + input_arg { + name: "shard" + type: DT_INT32 + } + input_arg { + name: "num_shards" + type: DT_INT32 + } + output_arg { + name: "filename" + type: DT_STRING + } + } + op { + name: "ShardedFilespec" + input_arg { + name: "basename" + type: DT_STRING + } + input_arg { + name: "num_shards" + type: DT_INT32 + } + output_arg { + name: "filename" + type: DT_STRING + } + } op { name: "Variable" output_arg { @@ -524,7 +635,7 @@ meta_graphs { } } node { - name: "x" + name: "tf_example" op: "Placeholder" attr { key: "_output_shapes" @@ -539,7 +650,7 @@ meta_graphs { attr { key: "dtype" value { - type: DT_FLOAT + type: DT_STRING } } attr { @@ -550,6 +661,190 @@ meta_graphs { } } } + node { + name: "ParseExample/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + } + } + } + } + } + } + node { + name: "ParseExample/ParseExample/names" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + } + } + } + } + } + } + node { + name: "ParseExample/ParseExample/dense_keys_0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "x" + } + } + } + } + node { + name: "ParseExample/ParseExample" + op: "ParseExample" + input: "tf_example" + input: "ParseExample/ParseExample/names" + input: "ParseExample/ParseExample/dense_keys_0" + input: "ParseExample/Const" + attr { + key: "Ndense" + value { + i: 1 + } + } + attr { + key: "Nsparse" + value { + i: 0 + } + } + attr { + key: "Tdense" + value { + list { + type: DT_FLOAT + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "dense_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "sparse_types" + value { + list { + } + } + } + } + node { + name: "x" + op: "Identity" + input: "ParseExample/ParseExample" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + dim { + size: 1 + } + } + } + } + } + } node { name: "Mul" op: "Mul" @@ -566,7 +861,12 @@ meta_graphs { value { list { shape { - unknown_rank: true + dim { + size: -1 + } + dim { + size: 1 + } } } } @@ -588,7 +888,38 @@ meta_graphs { value { list { shape { - unknown_rank: true + dim { + size: -1 + } + dim { + size: 1 + } + } + } + } + } + } + node { + name: "Identity" + op: "Identity" + input: "y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + dim { + size: 1 + } } } } @@ -630,6 +961,82 @@ meta_graphs { } } } + node { + name: "save/num_shards" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } + } + node { + name: "save/ShardedFilename/shard" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } + } + node { + name: "save/ShardedFilename" + op: "ShardedFilename" + input: "save/Const" + input: "save/ShardedFilename/shard" + input: "save/num_shards" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } node { name: "save/save/tensor_names" op: "Const" @@ -707,7 +1114,7 @@ meta_graphs { node { name: "save/save" op: "SaveSlices" - input: "save/Const" + input: "save/ShardedFilename" input: "save/save/tensor_names" input: "save/save/shapes_and_slices" input: "a" @@ -725,7 +1132,7 @@ meta_graphs { node { name: "save/control_dependency" op: "Identity" - input: "save/Const" + input: "save/ShardedFilename" input: "^save/save" attr { key: "T" @@ -737,7 +1144,7 @@ meta_graphs { key: "_class" value { list { - s: "loc:@save/Const" + s: "loc:@save/ShardedFilename" } } } @@ -752,13 +1159,32 @@ meta_graphs { } } node { - name: "save/restore_slice/tensor_name" + name: "save/ShardedFilespec" + op: "ShardedFilespec" + input: "save/Const" + input: "save/num_shards" + input: "^save/control_dependency" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + node { + name: "save/RestoreV2/tensor_names" op: "Const" attr { key: "_output_shapes" value { list { shape { + dim { + size: 1 + } } } } @@ -775,6 +1201,9 @@ meta_graphs { tensor { dtype: DT_STRING tensor_shape { + dim { + size: 1 + } } string_val: "a" } @@ -782,13 +1211,16 @@ meta_graphs { } } node { - name: "save/restore_slice/shape_and_slice" + name: "save/RestoreV2/shape_and_slices" op: "Const" attr { key: "_output_shapes" value { list { shape { + dim { + size: 1 + } } } } @@ -805,6 +1237,9 @@ meta_graphs { tensor { dtype: DT_STRING tensor_shape { + dim { + size: 1 + } } string_val: "" } @@ -812,11 +1247,11 @@ meta_graphs { } } node { - name: "save/restore_slice" - op: "RestoreSlice" + name: "save/RestoreV2" + op: "RestoreV2" input: "save/Const" - input: "save/restore_slice/tensor_name" - input: "save/restore_slice/shape_and_slice" + input: "save/RestoreV2/tensor_names" + input: "save/RestoreV2/shape_and_slices" attr { key: "_output_shapes" value { @@ -828,15 +1263,11 @@ meta_graphs { } } attr { - key: "dt" + key: "dtypes" value { - type: DT_FLOAT - } - } - attr { - key: "preferred_shard" - value { - i: -1 + list { + type: DT_FLOAT + } } } } @@ -844,7 +1275,7 @@ meta_graphs { name: "save/Assign" op: "Assign" input: "a" - input: "save/restore_slice" + input: "save/RestoreV2" attr { key: "T" value { @@ -882,13 +1313,16 @@ meta_graphs { } } node { - name: "save/restore_slice_1/tensor_name" + name: "save/RestoreV2_1/tensor_names" op: "Const" attr { key: "_output_shapes" value { list { shape { + dim { + size: 1 + } } } } @@ -905,6 +1339,9 @@ meta_graphs { tensor { dtype: DT_STRING tensor_shape { + dim { + size: 1 + } } string_val: "b" } @@ -912,13 +1349,16 @@ meta_graphs { } } node { - name: "save/restore_slice_1/shape_and_slice" + name: "save/RestoreV2_1/shape_and_slices" op: "Const" attr { key: "_output_shapes" value { list { shape { + dim { + size: 1 + } } } } @@ -935,6 +1375,9 @@ meta_graphs { tensor { dtype: DT_STRING tensor_shape { + dim { + size: 1 + } } string_val: "" } @@ -942,11 +1385,11 @@ meta_graphs { } } node { - name: "save/restore_slice_1" - op: "RestoreSlice" + name: "save/RestoreV2_1" + op: "RestoreV2" input: "save/Const" - input: "save/restore_slice_1/tensor_name" - input: "save/restore_slice_1/shape_and_slice" + input: "save/RestoreV2_1/tensor_names" + input: "save/RestoreV2_1/shape_and_slices" attr { key: "_output_shapes" value { @@ -958,15 +1401,11 @@ meta_graphs { } } attr { - key: "dt" + key: "dtypes" value { - type: DT_FLOAT - } - } - attr { - key: "preferred_shard" - value { - i: -1 + list { + type: DT_FLOAT + } } } } @@ -974,7 +1413,7 @@ meta_graphs { name: "save/Assign_1" op: "Assign" input: "b" - input: "save/restore_slice_1" + input: "save/RestoreV2_1" attr { key: "T" value { @@ -1012,20 +1451,26 @@ meta_graphs { } } node { - name: "save/restore_all" + name: "save/restore_shard" op: "NoOp" input: "^save/Assign" input: "^save/Assign_1" } + node { + name: "save/restore_all" + op: "NoOp" + input: "^save/restore_shard" + } versions { producer: 15 } } saver_def { filename_tensor_name: "save/Const:0" - save_tensor_name: "save/control_dependency:0" + save_tensor_name: "save/ShardedFilespec:0" restore_op_name: "save/restore_all" max_to_keep: 5 + sharded: true keep_checkpoint_every_n_hours: 10000.0 version: V1 } @@ -1048,21 +1493,21 @@ meta_graphs { } } signature_def { - key: "regression" + key: "tensorflow/serving/regress" value { inputs { - key: "input" + key: "inputs" value { - name: "x:0" + name: "tf_example:0" } } outputs { - key: "output" + key: "outputs" value { - name: "y:0" + name: "Identity:0" } } - method_name: "regression" + method_name: "tensorflow/serving/regress" } } } diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/variables/checkpoint b/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/variables/checkpoint deleted file mode 100644 index 76c6cefbbbd..00000000000 --- a/tensorflow/cc/saved_model/testdata/half_plus_two_pbtxt/variables/checkpoint +++ /dev/null @@ -1,2 +0,0 @@ -model_checkpoint_path: "/tmp/saved_model/half_plus_two_pbtxt/variables/variables-?????-of-00001" -all_model_checkpoint_paths: "/tmp/saved_model/half_plus_two_pbtxt/variables/variables-?????-of-00001" diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/assets/foo.txt b/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/assets/foo.txt new file mode 100644 index 00000000000..f9ff0366880 --- /dev/null +++ b/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/assets/foo.txt @@ -0,0 +1 @@ +asset-file-contents \ No newline at end of file diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/saved_model.pb b/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/saved_model.pb index 0a87f3306f5..d0f0853aa87 100644 Binary files a/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/saved_model.pb and b/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/saved_model.pb differ diff --git a/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/variables/checkpoint b/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/variables/checkpoint deleted file mode 100644 index 1065013315f..00000000000 --- a/tensorflow/cc/saved_model/testdata/half_plus_two_sharded/variables/checkpoint +++ /dev/null @@ -1,2 +0,0 @@ -model_checkpoint_path: "/tmp/saved_model/half_plus_two/variables/variables-?????-of-00001" -all_model_checkpoint_paths: "/tmp/saved_model/half_plus_two/variables/variables-?????-of-00001" diff --git a/tensorflow/contrib/distributions/__init__.py b/tensorflow/contrib/distributions/__init__.py index 53e0af10636..36bc4072382 100644 --- a/tensorflow/contrib/distributions/__init__.py +++ b/tensorflow/contrib/distributions/__init__.py @@ -97,6 +97,7 @@ from __future__ import print_function # pylint: disable=unused-import,wildcard-import,line-too-long,g-importing-member +from tensorflow.contrib.distributions.python.ops import bijector from tensorflow.contrib.distributions.python.ops.bernoulli import * from tensorflow.contrib.distributions.python.ops.beta import * from tensorflow.contrib.distributions.python.ops.binomial import * diff --git a/tensorflow/contrib/distributions/python/kernel_tests/beta_test.py b/tensorflow/contrib/distributions/python/kernel_tests/beta_test.py index c4eea35dc12..e1a8a6d6025 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/beta_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/beta_test.py @@ -245,6 +245,23 @@ class BetaTest(tf.test.TestCase): stats.beta.var(a, b), atol=1e-1) + # Test that sampling with the same seed twice gives the same results. + def testBetaSampleMultipleTimes(self): + with self.test_session(): + a_val = 1. + b_val = 2. + n_val = 100 + + tf.set_random_seed(654321) + beta1 = tf.contrib.distributions.Beta(a=a_val, b=b_val, name="beta1") + samples1 = beta1.sample_n(n_val, seed=123456).eval() + + tf.set_random_seed(654321) + beta2 = tf.contrib.distributions.Beta(a=a_val, b=b_val, name="beta2") + samples2 = beta2.sample_n(n_val, seed=123456).eval() + + self.assertAllClose(samples1, samples2) + def testBetaSampleMultidimensional(self): with self.test_session(): a = np.random.rand(3, 2, 2).astype(np.float32) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijector_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijector_test.py index fe4ac931719..d05f3cfe316 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijector_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijector_test.py @@ -23,9 +23,20 @@ import math import numpy as np import tensorflow as tf -from tensorflow.contrib.distributions.python.ops.bijector import _Exp -from tensorflow.contrib.distributions.python.ops.bijector import _Identity -from tensorflow.contrib.distributions.python.ops.bijector import _ShiftAndScale +bijectors = tf.contrib.distributions.bijector +rng = np.random.RandomState(42) + + +class BaseBijectorTest(tf.test.TestCase): + """Tests properties of the Bijector base-class.""" + + def testBijector(self): + with self.test_session(): + with self.assertRaisesRegexp( + TypeError, + ("Can't instantiate abstract class Bijector " + "with abstract methods __init__")): + bijectors.Bijector() class IdentityBijectorTest(tf.test.TestCase): @@ -33,7 +44,7 @@ class IdentityBijectorTest(tf.test.TestCase): def testBijector(self): with self.test_session(): - bijector = _Identity() + bijector = bijectors.Identity() self.assertEqual("Identity", bijector.name) x = [[[0.], [1.]]] @@ -50,7 +61,7 @@ class ExpBijectorTest(tf.test.TestCase): def testBijector(self): with self.test_session(): - bijector = _Exp(event_ndims=1) + bijector = bijectors.Exp(event_ndims=1) self.assertEqual("Exp", bijector.name) x = [[[1.], [2.]]] @@ -63,14 +74,39 @@ class ExpBijectorTest(tf.test.TestCase): self.assertAllClose([[0., -math.log(2.)]], jac.eval()) -class _ShiftAndScaleBijectorTest(tf.test.TestCase): +class InlineBijectorTest(tf.test.TestCase): + """Tests the correctness of the inline constructed bijector.""" + + def testBijector(self): + with self.test_session(): + exp = bijectors.Exp(event_ndims=1) + inline = bijectors.Inline( + forward_fn=tf.exp, + inverse_fn=tf.log, + inverse_log_det_jacobian_fn=( + lambda y: -tf.reduce_sum(tf.log(x), reduction_indices=-1)), + name="Exp") + + self.assertEqual(exp.name, inline.name) + x = [[[1., 2.], + [3., 4.], + [5., 6.]]] + self.assertAllClose(exp.forward(x).eval(), inline.forward(x).eval()) + self.assertAllClose(exp.inverse(x).eval(), inline.inverse(x).eval()) + self.assertAllClose(exp.inverse_log_det_jacobian(x).eval(), + inline.inverse_log_det_jacobian(x).eval()) + + +class ScaleAndShiftBijectorTest(tf.test.TestCase): + """Tests the correctness of the Y = scale * x + loc transformation.""" def testProperties(self): with self.test_session(): mu = -1. sigma = 2. - bijector = _ShiftAndScale(loc=mu, scale=sigma) - self.assertEqual("ShiftAndScale", bijector.name) + bijector = bijectors.ScaleAndShift( + loc=mu, scale=sigma) + self.assertEqual("ScaleAndShift", bijector.name) def testNoBatchScalar(self): with self.test_session() as sess: @@ -85,7 +121,8 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase): for run in (static_run, dynamic_run): mu = -1. sigma = 2. # Scalar. - bijector = _ShiftAndScale(loc=mu, scale=sigma) + bijector = bijectors.ScaleAndShift( + loc=mu, scale=sigma) self.assertEqual(0, bijector.shaper.batch_ndims.eval()) # "no batches" self.assertEqual(0, bijector.shaper.event_ndims.eval()) # "is scalar" x = [1., 2, 3] # Three scalar samples (no batches). @@ -107,7 +144,8 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase): for run in (static_run, dynamic_run): mu = -1. sigma = 2. # Scalar. - bijector = _ShiftAndScale(loc=mu, scale=sigma) + bijector = bijectors.ScaleAndShift( + loc=mu, scale=sigma) self.assertEqual(0, bijector.shaper.batch_ndims.eval()) # "no batches" self.assertEqual(0, bijector.shaper.event_ndims.eval()) # "is scalar" x = [[1., 2, 3], @@ -134,7 +172,8 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase): for run in (static_run, dynamic_run): mu = [1.] sigma = [1.] # One batch, scalar. - bijector = _ShiftAndScale(loc=mu, scale=sigma) + bijector = bijectors.ScaleAndShift( + loc=mu, scale=sigma) self.assertEqual( 1, bijector.shaper.batch_ndims.eval()) # "one batch dim" self.assertEqual( @@ -158,7 +197,8 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase): for run in (static_run, dynamic_run): mu = [1., -1] sigma = [1., 1] # Univariate, two batches. - bijector = _ShiftAndScale(loc=mu, scale=sigma) + bijector = bijectors.ScaleAndShift( + loc=mu, scale=sigma) self.assertEqual( 1, bijector.shaper.batch_ndims.eval()) # "one batch dim" self.assertEqual( @@ -182,7 +222,8 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase): for run in (static_run, dynamic_run): mu = [1., -1] sigma = np.eye(2, dtype=np.float32) - bijector = _ShiftAndScale(loc=mu, scale=sigma, event_ndims=1) + bijector = bijectors.ScaleAndShift( + loc=mu, scale=sigma, event_ndims=1) self.assertEqual(0, bijector.shaper.batch_ndims.eval()) # "no batches" self.assertEqual(1, bijector.shaper.event_ndims.eval()) # "is vector" x = [1., 1] @@ -205,7 +246,8 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase): for run in (static_run, dynamic_run): mu = 1. sigma = np.eye(2, dtype=np.float32) - bijector = _ShiftAndScale(loc=mu, scale=sigma, event_ndims=1) + bijector = bijectors.ScaleAndShift( + loc=mu, scale=sigma, event_ndims=1) self.assertEqual(0, bijector.shaper.batch_ndims.eval()) # "no batches" self.assertEqual(1, bijector.shaper.event_ndims.eval()) # "is vector" x = [1., 1] @@ -231,7 +273,8 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase): feed_dict = {x: x_value, mu: mu_value, sigma: sigma_value, event_ndims: event_ndims_value} - bijector = _ShiftAndScale(loc=mu, scale=sigma, event_ndims=event_ndims) + bijector = bijectors.ScaleAndShift( + loc=mu, scale=sigma, event_ndims=event_ndims) self.assertEqual(0, sess.run(bijector.shaper.batch_ndims, feed_dict)) self.assertEqual(1, sess.run(bijector.shaper.event_ndims, feed_dict)) self.assertAllClose([[2., 0]], sess.run(bijector.forward(x), feed_dict)) @@ -252,7 +295,8 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase): for run in (static_run, dynamic_run): mu = [[1., -1]] sigma = np.array([np.eye(2, dtype=np.float32)]) - bijector = _ShiftAndScale(loc=mu, scale=sigma, event_ndims=1) + bijector = bijectors.ScaleAndShift( + loc=mu, scale=sigma, event_ndims=1) self.assertEqual( 1, bijector.shaper.batch_ndims.eval()) # "one batch dim" self.assertEqual( @@ -276,7 +320,8 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase): feed_dict = {x: x_value, mu: mu_value, sigma: sigma_value, event_ndims: event_ndims_value} - bijector = _ShiftAndScale(loc=mu, scale=sigma, event_ndims=event_ndims) + bijector = bijectors.ScaleAndShift( + loc=mu, scale=sigma, event_ndims=event_ndims) self.assertEqual(1, sess.run(bijector.shaper.batch_ndims, feed_dict)) self.assertEqual(1, sess.run(bijector.shaper.event_ndims, feed_dict)) self.assertAllClose([[[2., 0]]], sess.run(bijector.forward(x), feed_dict)) @@ -285,5 +330,65 @@ class _ShiftAndScaleBijectorTest(tf.test.TestCase): [0.], sess.run(bijector.inverse_log_det_jacobian(x), feed_dict)) +class SoftplusBijectorTest(tf.test.TestCase): + """Tests the correctness of the Y = g(X) = Log[1 + exp(X)] transformation.""" + + def _softplus(self, x): + return np.log(1 + np.exp(x)) + + def _softplus_inverse(self, y): + return np.log(np.exp(y) - 1) + + def _softplus_ildj_before_reduction(self, y): + """Inverse log det jacobian, before being reduced.""" + return -np.log(1 - np.exp(-y)) + + def testBijectorForwardInverseEventDimsZero(self): + with self.test_session(): + bijector = bijectors.Softplus(event_ndims=0) + self.assertEqual("Softplus", bijector.name) + x = 2 * rng.randn(2, 10) + y = self._softplus(x) + + self.assertAllClose(y, bijector.forward(x).eval()) + self.assertAllClose(x, bijector.inverse(y).eval()) + self.assertAllClose( + x, bijector.inverse_and_inverse_log_det_jacobian(y)[0].eval()) + + def testBijectorLogDetJacobianEventDimsZero(self): + with self.test_session(): + bijector = bijectors.Softplus(event_ndims=0) + y = 2 * rng.rand(2, 10) + # No reduction needed if event_dims = 0. + ildj = self._softplus_ildj_before_reduction(y) + + self.assertAllClose(ildj, bijector.inverse_log_det_jacobian(y).eval()) + self.assertAllClose( + ildj, bijector.inverse_and_inverse_log_det_jacobian(y)[1].eval()) + + def testBijectorForwardInverseEventDimsOne(self): + with self.test_session(): + bijector = bijectors.Softplus(event_ndims=1) + self.assertEqual("Softplus", bijector.name) + x = 2 * rng.randn(2, 10) + y = self._softplus(x) + + self.assertAllClose(y, bijector.forward(x).eval()) + self.assertAllClose(x, bijector.inverse(y).eval()) + self.assertAllClose( + x, bijector.inverse_and_inverse_log_det_jacobian(y)[0].eval()) + + def testBijectorLogDetJacobianEventDimsOne(self): + with self.test_session(): + bijector = bijectors.Softplus(event_ndims=1) + y = 2 * rng.rand(2, 10) + ildj_before = self._softplus_ildj_before_reduction(y) + ildj = np.sum(ildj_before, axis=1) + + self.assertAllClose(ildj, bijector.inverse_log_det_jacobian(y).eval()) + self.assertAllClose( + ildj, bijector.inverse_and_inverse_log_det_jacobian(y)[1].eval()) + + if __name__ == "__main__": tf.test.main() diff --git a/tensorflow/contrib/distributions/python/kernel_tests/mixture_test.py b/tensorflow/contrib/distributions/python/kernel_tests/mixture_test.py index f871f1961c8..af15f36522a 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/mixture_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/mixture_test.py @@ -334,6 +334,32 @@ class MixtureTest(tf.test.TestCase): which_dist_samples = dist_sample_values[c][:size_c] self.assertAllClose(which_dist_samples, sample_values[which_c]) + # Test that sampling with the same seed twice gives the same results. + def testSampleMultipleTimes(self): + # 5 component mixture. + logits = [-10.0, -5.0, 0.0, 5.0, 10.0] + mus = [-5.0, 0.0, 5.0, 4.0, 20.0] + sigmas = [0.1, 5.0, 3.0, 0.2, 4.0] + + with self.test_session(): + n = 100 + + tf.set_random_seed(654321) + components = [distributions_py.Normal( + mu=mu, sigma=sigma) for mu, sigma in zip(mus, sigmas)] + cat = distributions_py.Categorical(logits, dtype=tf.int32, name="cat1") + dist1 = distributions_py.Mixture(cat, components, name="mixture1") + samples1 = dist1.sample_n(n, seed=123456).eval() + + tf.set_random_seed(654321) + components2 = [distributions_py.Normal( + mu=mu, sigma=sigma) for mu, sigma in zip(mus, sigmas)] + cat2 = distributions_py.Categorical(logits, dtype=tf.int32, name="cat2") + dist2 = distributions_py.Mixture(cat2, components2, name="mixture2") + samples2 = dist2.sample_n(n, seed=123456).eval() + + self.assertAllClose(samples1, samples2) + def testSampleScalarBatchMultivariate(self): with self.test_session() as sess: num_components = 3 diff --git a/tensorflow/contrib/distributions/python/kernel_tests/student_t_test.py b/tensorflow/contrib/distributions/python/kernel_tests/student_t_test.py index bf0d6f94900..c78ca2d6439 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/student_t_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/student_t_test.py @@ -108,20 +108,54 @@ class StudentTTest(tf.test.TestCase): df_v = 4.0 mu_v = 3.0 sigma_v = np.sqrt(10.0) - n = tf.constant(100000) + n = tf.constant(200000) student = tf.contrib.distributions.StudentT(df=df, mu=mu, sigma=sigma) - samples = student.sample_n(n, seed=137) + samples = student.sample_n(n) sample_values = samples.eval() - n = 100000 - self.assertEqual(sample_values.shape, (n,)) - self.assertAllClose(sample_values.mean(), mu_v, atol=1e-2) + n_val = 200000 + self.assertEqual(sample_values.shape, (n_val,)) + self.assertAllClose(sample_values.mean(), mu_v, rtol=1e-2, atol=0) self.assertAllClose(sample_values.var(), sigma_v**2 * df_v / (df_v - 2), - atol=.25) + rtol=1e-2, atol=0) self._checkKLApprox(df_v, mu_v, sigma_v, sample_values) - def _testStudentSampleMultiDimensional(self): - # DISABLED: Please enable this test once b/issues/30149644 is resolved. + # Test that sampling with the same seed twice gives the same results. + def testStudentSampleMultipleTimes(self): + with self.test_session(): + df = tf.constant(4.0) + mu = tf.constant(3.0) + sigma = tf.constant(math.sqrt(10.0)) + df_v = 4.0 + mu_v = 3.0 + sigma_v = np.sqrt(10.0) + n = tf.constant(100) + + tf.set_random_seed(654321) + student = tf.contrib.distributions.StudentT( + df=df, mu=mu, sigma=sigma, name="student_t1") + samples1 = student.sample_n(n, seed=123456).eval() + + tf.set_random_seed(654321) + student2 = tf.contrib.distributions.StudentT( + df=df, mu=mu, sigma=sigma, name="student_t2") + samples2 = student2.sample_n(n, seed=123456).eval() + + self.assertAllClose(samples1, samples2) + + def testStudentSampleSmallDfNoNan(self): + with self.test_session(): + df_v = [1e-1, 1e-5, 1e-10, 1e-20] + df = tf.constant(df_v) + n = tf.constant(200000) + student = tf.contrib.distributions.StudentT(df=df, mu=1.0, sigma=1.0) + samples = student.sample_n(n) + sample_values = samples.eval() + n_val = 200000 + self.assertEqual(sample_values.shape, (n_val, 4)) + self.assertTrue(np.all(np.logical_not(np.isnan(sample_values)))) + + def testStudentSampleMultiDimensional(self): with self.test_session(): batch_size = 7 df = tf.constant([[3.0, 7.0]] * batch_size) @@ -130,20 +164,22 @@ class StudentTTest(tf.test.TestCase): df_v = [3.0, 7.0] mu_v = [3.0, -3.0] sigma_v = [np.sqrt(10.0), np.sqrt(15.0)] - n = tf.constant(100000) + n = tf.constant(200000) student = tf.contrib.distributions.StudentT(df=df, mu=mu, sigma=sigma) samples = student.sample_n(n) sample_values = samples.eval() - self.assertEqual(samples.get_shape(), (100000, batch_size, 2)) - self.assertAllClose(sample_values[:, 0, 0].mean(), mu_v[0], atol=.15) + self.assertEqual(samples.get_shape(), (200000, batch_size, 2)) + self.assertAllClose( + sample_values[:, 0, 0].mean(), mu_v[0], rtol=1e-2, atol=0) self.assertAllClose(sample_values[:, 0, 0].var(), sigma_v[0]**2 * df_v[0] / (df_v[0] - 2), - atol=1) + rtol=1e-1, atol=0) self._checkKLApprox(df_v[0], mu_v[0], sigma_v[0], sample_values[:, 0, 0]) - self.assertAllClose(sample_values[:, 0, 1].mean(), mu_v[1], atol=.01) + self.assertAllClose( + sample_values[:, 0, 1].mean(), mu_v[1], rtol=1e-2, atol=0) self.assertAllClose(sample_values[:, 0, 1].var(), sigma_v[1]**2 * df_v[1] / (df_v[1] - 2), - atol=.25) + rtol=1e-1, atol=0) self._checkKLApprox(df_v[0], mu_v[0], sigma_v[0], sample_values[:, 0, 1]) def _checkKLApprox(self, df, mu, sigma, samples): @@ -337,8 +373,7 @@ class StudentTTest(tf.test.TestCase): mode = student.mode().eval() self.assertAllClose([-1., 0, 1], mode) - def _testPdfOfSample(self): - # DISABLED: Please enable this test once b/issues/30149644 is resolved. + def testPdfOfSample(self): with self.test_session() as sess: student = tf.contrib.distributions.StudentT(df=3., mu=np.pi, sigma=1.) num = 20000 @@ -357,8 +392,7 @@ class StudentTTest(tf.test.TestCase): # Verify integral over sample*pdf ~= 1. self._assertIntegral(sample_vals, pdf_vals) - def _testPdfOfSampleMultiDims(self): - # DISABLED: Please enable this test once b/issues/30149644 is resolved. + def testPdfOfSampleMultiDims(self): with self.test_session() as sess: student = tf.contrib.distributions.StudentT(df=[7., 11.], mu=[[5.], [6.]], diff --git a/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py b/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py index c97473cf4f0..ba6a998d466 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py @@ -33,12 +33,8 @@ class TransformedDistributionTest(tf.test.TestCase): # Note: the Jacobian callable only works for this example; more generally # you may or may not need a reduce_sum. log_normal = tf.contrib.distributions.TransformedDistribution( - base_dist_cls=tf.contrib.distributions.Normal, - mu=mu, - sigma=sigma, - transform=lambda x: tf.exp(x), - inverse=lambda y: tf.log(y), - log_det_jacobian=(lambda x: x)) + base_distribution=tf.contrib.distributions.Normal(mu=mu, sigma=sigma), + bijector=tf.contrib.distributions.bijector.Exp(event_ndims=0)) sp_dist = stats.lognorm(s=sigma, scale=np.exp(mu)) # sample @@ -67,12 +63,8 @@ class TransformedDistributionTest(tf.test.TestCase): mu = 3.0 sigma = 0.02 log_normal = tf.contrib.distributions.TransformedDistribution( - base_dist_cls=tf.contrib.distributions.Normal, - mu=mu, - sigma=sigma, - transform=lambda x: tf.exp(x), - inverse=None, - log_det_jacobian=(lambda x: tf.reduce_sum(x))) + base_distribution=tf.contrib.distributions.Normal(mu=mu, sigma=sigma), + bijector=tf.contrib.distributions.bijector.Exp(event_ndims=0)) sample = log_normal.sample_n(1) sample_val, log_pdf_val = sess.run([sample, log_normal.log_pdf(sample)]) @@ -82,10 +74,6 @@ class TransformedDistributionTest(tf.test.TestCase): log_pdf_val, atol=1e-2) - with self.assertRaisesRegexp(ValueError, - "was not returned from `sample`"): - log_normal.log_pdf(tf.constant(3.0)) - if __name__ == "__main__": tf.test.main() diff --git a/tensorflow/contrib/distributions/python/kernel_tests/wishart_test.py b/tensorflow/contrib/distributions/python/kernel_tests/wishart_test.py index 521b0d4b2dd..8a0cf5ae1d9 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/wishart_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/wishart_test.py @@ -149,6 +149,30 @@ class WishartCholeskyTest(tf.test.TestCase): variance_estimate, rtol=0.05) + # Test that sampling with the same seed twice gives the same results. + def testSampleMultipleTimes(self): + with self.test_session(): + df = 4. + n_val = 100 + + tf.set_random_seed(654321) + chol_w1 = distributions.WishartCholesky( + df=df, + scale=chol(make_pd(1., 3)), + cholesky_input_output_matrices=False, + name="wishart1") + samples1 = chol_w1.sample_n(n_val, seed=123456).eval() + + tf.set_random_seed(654321) + chol_w2 = distributions.WishartCholesky( + df=df, + scale=chol(make_pd(1., 3)), + cholesky_input_output_matrices=False, + name="wishart2") + samples2 = chol_w2.sample_n(n_val, seed=123456).eval() + + self.assertAllClose(samples1, samples2) + def testProb(self): with self.test_session(): # Generate some positive definite (pd) matrices and their Cholesky diff --git a/tensorflow/contrib/distributions/python/ops/beta.py b/tensorflow/contrib/distributions/python/ops/beta.py index 7f77254a644..684d6ec56b2 100644 --- a/tensorflow/contrib/distributions/python/ops/beta.py +++ b/tensorflow/contrib/distributions/python/ops/beta.py @@ -197,7 +197,8 @@ class Beta(distribution.Distribution): gamma1_sample = random_ops.random_gamma( [n,], a, dtype=self.dtype, seed=seed) gamma2_sample = random_ops.random_gamma( - [n,], b, dtype=self.dtype, seed=seed) + [n,], b, dtype=self.dtype, + seed=distribution_util.gen_new_seed(seed, "beta")) beta_sample = gamma1_sample / (gamma1_sample + gamma2_sample) return beta_sample diff --git a/tensorflow/contrib/distributions/python/ops/bijector.py b/tensorflow/contrib/distributions/python/ops/bijector.py index 9f69d3cb21d..d658e02802c 100644 --- a/tensorflow/contrib/distributions/python/ops/bijector.py +++ b/tensorflow/contrib/distributions/python/ops/bijector.py @@ -12,12 +12,37 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""An API for reversible (bijective) transformations of random variables.""" +r"""Bijector Ops. + +An API for reversible (bijective) transformations of random variables. + +## Background + +Differentiable, bijective transformations of continuous random variables alter +the calculations made in the cumulative/probability distribution functions and +sample function. This module provides a standard interface for making these +manipulations. + +For more details and examples, see the `Bijector` docstring. + +To apply a `Bijector`, use `distributions.TransformedDistribution`. + +## Bijectors + +@@Bijector +@@Identity +@@Inline +@@Exp +@@ScaleAndShift + +""" from __future__ import absolute_import from __future__ import division from __future__ import print_function +import abc import contextlib +import six from tensorflow.contrib.distributions.python.ops.shape import _DistributionShape from tensorflow.python.framework import constant_op @@ -26,40 +51,43 @@ from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_ops -class _Bijector(object): - """An interface for transforming a `Distribution` `Tensor`. - - Recall that a `Distribution` `Tensor` has dimensions which have `sample`, - `batch`, and `event` semantics. (See `DistributionShape` for more details.) +@six.add_metaclass(abc.ABCMeta) +class Bijector(object): + """Interface for transforming a `Distribution` via `TransformedDistribution`. A `Bijector` implements a bijective, differentiable function by transforming an input `Tensor`. The output `Tensor` shape is constrained by the input `sample`, `batch`, and `event` shape. A `Bijector` is characterized by three operations: - (1) Forward Evaluation - Useful for turning one random outcome into another random outcome from a - different distribution. + 1. Forward Evaluation - (2) Inverse Evaluation - Useful for "reversing" a transformation to compute one probability in - terms of another. + Useful for turning one random outcome into another random outcome from a + different distribution. - (3) (log o det o Jacobian o inverse)(x) - "The log of the determinant of the matrix of all first-order partial - derivatives of the inverse function." - Useful for inverting a transformation to compute one probability in terms - of another. Geometrically, the det(Jacobian) is the volume of the - transformation and is used to scale the probability. + 2. Inverse Evaluation + + Useful for "reversing" a transformation to compute one probability in + terms of another. + + 3. (log o det o Jacobian o inverse)(x) + + "The log of the determinant of the matrix of all first-order partial + derivatives of the inverse function." + Useful for inverting a transformation to compute one probability in terms + of another. Geometrically, the det(Jacobian) is the volume of the + transformation and is used to scale the probability. By convention, transformations of random variables are named in terms of the forward transformation. The forward transformation creates samples, the inverse is useful for computing probabilities. Example Use: - Basic properties: + + - Basic properties: ```python x = ... # A tensor. @@ -69,7 +97,7 @@ class _Bijector(object): x != my_bijector.forward(fwd_x) # Not equal because g(x) != g(g(x)). ``` - Computing a log-likelihood: + - Computing a log-likelihood: ```python def transformed_log_pdf(bijector, log_pdf, x): @@ -77,7 +105,7 @@ class _Bijector(object): log_pdf(bijector.inverse(x))) ``` - Transforming a random outcome: + - Transforming a random outcome: ```python def transformed_sample(bijector, x): @@ -85,7 +113,8 @@ class _Bijector(object): ``` Example transformations: - "Exponential" + + - "Exponential" ``` Y = g(X) = exp(X) @@ -102,7 +131,7 @@ class _Bijector(object): = (1 / y) Normal(log(y); 0, 1) ``` - "ShiftAndScale" + - "ScaleAndShift" ``` Y = g(X) = sqrtSigma * X + mu @@ -122,7 +151,8 @@ class _Bijector(object): Example of why a `Bijector` needs to understand sample, batch, event partitioning: - Consider the `Exp` `Bijector` applied to a `Tensor` which has sample, batch, + + - Consider the `Exp` `Bijector` applied to a `Tensor` which has sample, batch, and event (S, B, E) shape semantics. Suppose the `Tensor`'s partitioned-shape is `(S=[4], B=[2], E=[3, 3])`. @@ -132,24 +162,25 @@ class _Bijector(object): over the event dimensions. Subclass Requirements: - Subclasses are expected to implement `_forward` and one or both of: + + - Subclasses are expected to implement `_forward` and one or both of: - `_inverse`, `_inverse_log_det_jacobian`, - `_inverse_and_inverse_log_det_jacobian`. - If computation can be shared among `_inverse` and + - If computation can be shared among `_inverse` and `_inverse_log_det_jacobian` it is preferable to implement `_inverse_and_inverse_log_det_jacobian`. This usually reduces graph-construction overhead because a `Distribution`'s implementation of `log_prob` will need to evaluate both the inverse Jacobian as well as the inverse function. - If an additional use case needs just `inverse` or just + - If an additional use case needs just `inverse` or just `inverse_log_det_jacobian` then he or she may also wish to implement these functions to avoid computing the `inverse_log_det_jacobian` or the `inverse`, respectively. """ - # TODO(b/30476956): Try to remove constructor dependence on ndims. + @abc.abstractmethod def __init__(self, batch_ndims=None, event_ndims=None, @@ -236,6 +267,9 @@ class _Bijector(object): """Returns the string name of this `Bijector`.""" return self._name + def _forward(self, x): + raise NotImplementedError("forward is not implemented.") + def forward(self, x, name="forward"): """Returns the forward `Bijector` evaluation, i.e., X = g(Y). @@ -249,13 +283,16 @@ class _Bijector(object): Raises: TypeError: if `self.dtype` is specified and `x.dtype` is not `self.dtype`. - AttributeError: if `_forward` is not implemented. + NotImplementedError: if `_forward` is not implemented. """ with self._name_scope(name, [x]): x = ops.convert_to_tensor(x, name="x") self._maybe_assert_dtype(x) return self._forward(x) + def _inverse(self, x): + raise NotImplementedError("inverse is not implemented") + def inverse(self, x, name="inverse"): """Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y). @@ -269,7 +306,7 @@ class _Bijector(object): Raises: TypeError: if `self.dtype` is specified and `x.dtype` is not `self.dtype`. - AttributeError: if neither `_inverse` nor + NotImplementedError: if neither `_inverse` nor `_inverse_and_inverse_log_det_jacobian` are implemented. """ with self._name_scope(name, [x]): @@ -277,11 +314,14 @@ class _Bijector(object): self._maybe_assert_dtype(x) try: return self._inverse(x) - except AttributeError: + except NotImplementedError: # Since _inverse was not implemented, try to see if it's implemented # by the _inverse_and_inverse_log_det_jacobian member. return self._inverse_and_inverse_log_det_jacobian(x)[0] + def _inverse_log_det_jacobian(self, x): + raise NotImplementedError("inverse_log_det_jacobian is not implemented") + def inverse_log_det_jacobian(self, x, name="inverse_log_det_jacobian"): """Returns the (log o det o Jacobian o inverse)(x). @@ -300,7 +340,7 @@ class _Bijector(object): Raises: TypeError: if `self.dtype` is specified and `x.dtype` is not `self.dtype`. - AttributeError: if neither `_inverse_log_det_jacobian` nor + NotImplementedError: if neither `_inverse_log_det_jacobian` nor `_inverse_and_inverse_log_det_jacobian` are implemented. """ with self._name_scope(name, [x]): @@ -308,11 +348,15 @@ class _Bijector(object): self._maybe_assert_dtype(x) try: return self._inverse_log_det_jacobian(x) - except AttributeError: + except NotImplementedError: # Since _inverse_log_det_jacobian was not implemented, try to see if # it's implemented by the _inverse_and_inverse_log_det_jacobian member. return self._inverse_and_inverse_log_det_jacobian(x)[1] + def _inverse_and_inverse_log_det_jacobian(self, x): + raise NotImplementedError( + "inverse_and_inverse_log_det_jacobian is not implemented.") + def inverse_and_inverse_log_det_jacobian( self, x, name="inverse_and_inverse_log_det_jacobian"): """Returns both the inverse evaluation and inverse_log_det_jacobian. @@ -332,15 +376,15 @@ class _Bijector(object): Raises: TypeError: if `self.dtype` is specified and `x.dtype` is not `self.dtype`. - AttributeError: if neither `_inverse_and_inverse_log_det_jacobian` nor - {`_inverse`, `_inverse_log_det_jacobian`} are implemented. + NotImplementedError: if neither `_inverse_and_inverse_log_det_jacobian` + nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented. """ with self._name_scope(name, [x]): x = ops.convert_to_tensor(x, name="x") self._maybe_assert_dtype(x) try: return self._inverse_and_inverse_log_det_jacobian(x) - except AttributeError: + except NotImplementedError: # Since _inverse_and_inverse_log_det_jacobian was not implemented, try # to see if we can separately use _inverse and # _inverse_log_det_jacobian members. @@ -361,7 +405,7 @@ class _Bijector(object): (self.dtype, x.dtype)) -class _Identity(_Bijector): +class Identity(Bijector): """Bijector which computes Y = g(X) = X. Example Use: @@ -378,7 +422,7 @@ class _Identity(_Bijector): """ def __init__(self, validate_args=False, name="Identity"): - super(_Identity, self).__init__( + super(Identity, self).__init__( batch_ndims=0, event_ndims=0, is_constant_jacobian=True, @@ -396,7 +440,59 @@ class _Identity(_Bijector): return constant_op.constant(0., dtype=x.dtype) -class _Exp(_Bijector): +class Inline(Bijector): + # pylint: disable=line-too-long + """Bijector constructed from callables implementing forward, inverse, and inverse_log_det_jacobian. + + Example Use: + + ```python + exp = Inline( + forward_fn=tf.exp, + inverse_fn=tf.log, + inverse_log_det_jacobian_fn=( + lambda y: -tf.reduce_sum(tf.log(y), reduction_indices=-1)), + name="Exp") + ``` + + The above example is equivalent to the `Bijector` `Exp(event_ndims=1)`. + """ + # pylint: enable=line-too-long + + def __init__(self, forward_fn, inverse_fn, inverse_log_det_jacobian_fn, + is_constant_jacobian=False, name="Inline"): + """Creates a `Bijector` from callables. + + Args: + forward_fn: Python callable implementing the forward transformation. + inverse_fn: Python callable implementing the inverse transformation. + inverse_log_det_jacobian_fn: Python callable implementing the + inverse_log_det_jacobian transformation. + is_constant_jacobian: `Boolean` indicating that the Jacobian is constant + for all input arguments. + name: `String`, name given to ops managed by this object. + """ + super(Inline, self).__init__( + batch_ndims=0, + event_ndims=0, + is_constant_jacobian=is_constant_jacobian, + validate_args=False, + name=name) + self._forward_fn = forward_fn + self._inverse_fn = inverse_fn + self._inverse_log_det_jacobian_fn = inverse_log_det_jacobian_fn + + def _forward(self, x): + return self._forward_fn(x) + + def _inverse(self, y): + return self._inverse_fn(y) + + def _inverse_log_det_jacobian(self, y): + return self._inverse_log_det_jacobian_fn(y) + + +class Exp(Bijector): """Bijector which computes Y = g(X) = exp(X). Example Use: @@ -417,12 +513,11 @@ class _Exp(_Bijector): over the event space. """ - # TODO(b/30476956): Try to remove constructor dependence on ndims. def __init__(self, event_ndims=0, validate_args=False, name="Exp"): - super(_Exp, self).__init__( + super(Exp, self).__init__( batch_ndims=0, event_ndims=event_ndims, validate_args=validate_args, @@ -448,7 +543,7 @@ class _Exp(_Bijector): return y, -math_ops.reduce_sum(y, reduction_indices=event_dims) -class _ShiftAndScale(_Bijector): +class ScaleAndShift(Bijector): """Bijector which computes Y = g(X; loc, scale) = scale * X + loc. Example Use: @@ -457,35 +552,35 @@ class _ShiftAndScale(_Bijector): # No batch, scalar. mu = 0 # shape=[] sigma = 1 # shape=[] - b = ShiftAndScale(loc=mu, scale=sigma) + b = ScaleAndShift(loc=mu, scale=sigma) # b.shaper.batch_ndims == 0 # b.shaper.event_ndims == 0 # One batch, scalar. mu = ... # shape=[b], b>0 sigma = ... # shape=[b], b>0 - b = ShiftAndScale(loc=mu, scale=sigma) + b = ScaleAndShift(loc=mu, scale=sigma) # b.shaper.batch_ndims == 1 # b.shaper.event_ndims == 0 # No batch, multivariate. mu = ... # shape=[d], d>0 sigma = ... # shape=[d, d], d>0 - b = ShiftAndScale(loc=mu, scale=sigma, event_ndims=1) + b = ScaleAndShift(loc=mu, scale=sigma, event_ndims=1) # b.shaper.batch_ndims == 0 # b.shaper.event_ndims == 1 # (B1*B2*...*Bb)-batch, multivariate. mu = ... # shape=[B1,...,Bb, d], b>0, d>0 sigma = ... # shape=[B1,...,Bb, d, d], b>0, d>0 - b = ShiftAndScale(loc=mu, scale=sigma, event_ndims=1) + b = ScaleAndShift(loc=mu, scale=sigma, event_ndims=1) # b.shaper.batch_ndims == b # b.shaper.event_ndims == 1 # Mu is broadcast: mu = 1 sigma = [I, I] # I is a 3x3 identity matrix. - b = ShiftAndScale(loc=mu, scale=sigma, event_ndims=1) + b = ScaleAndShift(loc=mu, scale=sigma, event_ndims=1) x = numpy.ones(S + sigma.shape) b.forward(x) # == x + 1 ``` @@ -497,7 +592,7 @@ class _ShiftAndScale(_Bijector): scale, event_ndims=0, validate_args=False, - name="ShiftAndScale"): + name="ScaleAndShift"): self._parameters = {} self._name = name with self._name_scope("init", values=[loc, scale, event_ndims]): @@ -512,7 +607,7 @@ class _ShiftAndScale(_Bijector): raise TypeError("%s.dtype=%s does not match %s" % (event_ndims.name, event_ndims.dtype, dtypes.int32)) self._scale, batch_ndims = self._process_scale(self.scale, event_ndims) - super(_ShiftAndScale, self).__init__( + super(ScaleAndShift, self).__init__( batch_ndims=batch_ndims, event_ndims=event_ndims, parameters={"loc": self.loc, "scale": self.scale}, @@ -590,3 +685,77 @@ class _ShiftAndScale(_Bijector): return -math_ops.reduce_sum( math_ops.log(array_ops.matrix_diag_part(self.scale)), reduction_indices=[-1]) + + +class Softplus(Bijector): + """Bijector which computes `Y = g(X) = Log[1 + exp(X)]`. + + The softplus `Bijector` has the following two useful properties: + + * The domain is the positive real numbers + * `softplus(x) approx x`, for large `x`, so it does not overflow as easily as + the `Exp` `Bijector`. + + Example Use: + + ```python + # Create the Y=g(X)=softplus(X) transform which works only on Tensors with 1 + # batch ndim and 2 event ndims (i.e., vector of matrices). + softplus = Softplus(batch_ndims=1, event_ndims=2) + x = [[[1., 2], + [3, 4]], + [[5, 6], + [7, 8]]] + log(1 + exp(x)) == softplus.forward(x) + log(exp(x) - 1) == softplus.inverse(x) + ``` + + Note: log(.) and exp(.) are applied element-wise but the Jacobian is a + reduction over the event space. + """ + + def __init__(self, + event_ndims=0, + validate_args=False, + name="Softplus"): + super(Softplus, self).__init__( + batch_ndims=0, + event_ndims=event_ndims, + validate_args=validate_args, + name=name) + + def _forward(self, x): + return nn_ops.softplus(x) + + def _inverse(self, x): + # The most stable inverse of softplus is not the most direct one. + # y = softplus(x) = Log[1 + exp{x}], (which means y > 0). + # ==> exp{y} = 1 + exp{x} + # ==> x = Log[exp{y} - 1] + # = Log[(exp{y} - 1) / exp{y}] + Log[exp{y}] + # = Log[(1 - exp{-y}) / 1] + Log[exp{y}] + # = Log[1 - exp{-y}] + y + # Recalling y > 0, you see that this is more stable than Log[exp{y} - 1]. + return x + math_ops.log(1. - math_ops.exp(-x)) + + def _inverse_log_det_jacobian(self, x): + # Stable inverse log det jacobian. + # Y = Log[1 + exp{X}] ==> X = Log[exp{Y} - 1] + # ==> dX/dY = exp{Y} / (exp{Y} - 1) + # = 1 / (1 - exp{-Y}), + # which is the most stable for Y > 0. + if self.shaper is None: + raise ValueError("Jacobian cannot be computed with unknown event_ndims") + _, _, event_dims = self.shaper.get_dims(x) + return -math_ops.reduce_sum( + math_ops.log(1. - math_ops.exp(-x)), reduction_indices=event_dims) + + def _inverse_and_inverse_log_det_jacobian(self, x): + if self.shaper is None: + raise ValueError("Jacobian cannot be computed with unknown event_ndims") + _, _, event_dims = self.shaper.get_dims(x) + log_one_minus_exp_neg = math_ops.log(1. - math_ops.exp(-x)) + y = x + log_one_minus_exp_neg + ildj = -math_ops.reduce_sum( + log_one_minus_exp_neg, reduction_indices=event_dims) + return y, ildj diff --git a/tensorflow/contrib/distributions/python/ops/distribution_util.py b/tensorflow/contrib/distributions/python/ops/distribution_util.py index 1838c3a9ea1..bbb7a84f27a 100644 --- a/tensorflow/contrib/distributions/python/ops/distribution_util.py +++ b/tensorflow/contrib/distributions/python/ops/distribution_util.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import functools +import hashlib import sys import numpy as np @@ -197,8 +198,8 @@ def log_combinations(n, counts, name="log_combinations"): # The sum should be along the last dimension of counts. This is the # "distribution" dimension. Here n a priori represents the sum of counts. with ops.name_scope(name, values=[n, counts]): - n = array_ops.identity(n, name="n") - counts = array_ops.identity(counts, name="counts") + n = ops.convert_to_tensor(n, name="n") + counts = ops.convert_to_tensor(counts, name="counts") total_permutations = math_ops.lgamma(n + 1) counts_factorial = math_ops.lgamma(counts + 1) redundant_permutations = math_ops.reduce_sum(counts_factorial, @@ -397,6 +398,14 @@ def pick_vector(cond, [math_ops.select(cond, n, -1)]) +def gen_new_seed(seed, salt): + """Generate a new seed, from the given seed and salt.""" + if seed: + string = (str(seed) + salt).encode("utf-8") + return int(hashlib.md5(string).hexdigest()[:8], 16) & 0x7FFFFFFF + return None + + def override_docstring_if_empty(fn, doc_str): """Override the `doc_str` argument to `fn.__doc__`. diff --git a/tensorflow/contrib/distributions/python/ops/mixture.py b/tensorflow/contrib/distributions/python/ops/mixture.py index bd6f920c2cc..9827df6d10b 100644 --- a/tensorflow/contrib/distributions/python/ops/mixture.py +++ b/tensorflow/contrib/distributions/python/ops/mixture.py @@ -22,6 +22,7 @@ import numpy as np from tensorflow.contrib.distributions.python.ops import categorical from tensorflow.contrib.distributions.python.ops import distribution +from tensorflow.contrib.distributions.python.ops import distribution_util from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util @@ -295,8 +296,10 @@ class Mixture(distribution.Distribution): partitions=cat_samples, num_partitions=self.num_components) samples_class = [None for _ in range(self.num_components)] + for c in range(self.num_components): n_class = array_ops.size(partitioned_samples_indices[c]) + seed = distribution_util.gen_new_seed(seed, "mixture") samples_class_c = self.components[c].sample_n(n_class, seed=seed) # Pull out the correct batch entries from each index. diff --git a/tensorflow/contrib/distributions/python/ops/student_t.py b/tensorflow/contrib/distributions/python/ops/student_t.py index d038100799c..06350482af0 100644 --- a/tensorflow/contrib/distributions/python/ops/student_t.py +++ b/tensorflow/contrib/distributions/python/ops/student_t.py @@ -177,22 +177,17 @@ class StudentT(distribution.Distribution): return tensor_shape.scalar() def _sample_n(self, n, seed=None): - # We use 2 uniform random floats to generate polar random variates. - # http://dl.acm.org/citation.cfm?id=179631 - # Theorem 2. Let G, H be iid variates, uniformly distributed on [0,1]. - # Let theta = 2*pi*H, let R = sqrt(df*(G^(-2/df) - 1)) for df > 0. - # Let X = R*cos(theta), and let Y = R*sin(theta). - # Then X ~ t_df and Y ~ t_df. - # The variates X and Y are not independent. - shape = array_ops.concat(0, ([2, n], self.batch_shape())) - uniform = random_ops.random_uniform(shape=shape, - dtype=self.dtype, - seed=seed) - samples_g, samples_h = array_ops.unpack(uniform, num=2) - theta = (2. * math.pi) * samples_h - r = math_ops.sqrt(self.df * - (math_ops.pow(samples_g, -2 / self.df) - 1)) - samples = r * math_ops.cos(theta) + # The sampling method comes from the well known fact that if X ~ Normal(0, + # 1), and Z ~ Chi2(df), then X / sqrt(Z / df) ~ StudentT(df). + shape = array_ops.concat(0, ([n], self.batch_shape())) + normal_sample = random_ops.random_normal( + shape, dtype=self.dtype, seed=seed) + half = constant_op.constant(0.5, self.dtype) + df = self.df * array_ops.ones(self.batch_shape(), dtype=self.dtype) + gamma_sample = random_ops.random_gamma( + [n,], half * df, beta=half, dtype=self.dtype, + seed=distribution_util.gen_new_seed(seed, salt="student_t")) + samples = normal_sample / math_ops.sqrt(gamma_sample / df) return samples * self.sigma + self.mu def _log_prob(self, x): diff --git a/tensorflow/contrib/distributions/python/ops/transformed_distribution.py b/tensorflow/contrib/distributions/python/ops/transformed_distribution.py index 20a7a2bf048..f43f1c0421e 100644 --- a/tensorflow/contrib/distributions/python/ops/transformed_distribution.py +++ b/tensorflow/contrib/distributions/python/ops/transformed_distribution.py @@ -26,107 +26,139 @@ from tensorflow.python.ops import math_ops class TransformedDistribution(distribution.Distribution): """A Transformed Distribution. - A Transformed Distribution models `p(y)` given a base distribution `p(x)`, - an invertible transform, `y = f(x)`, and the determinant of the Jacobian of - `f(x)`. + A Transformed Distribution models `p(y)` given a base distribution `p(x)`, and + a deterministic, invertible, differentiable transform, `Y = g(X)`. The + transform is typically an instance of the `Bijector` class and the base + distribution is typically an instance of the `Distribution` class. Shapes, type, and reparameterization are taken from the base distribution. - #### Mathematical details + Write `P(Y=y)` for cumulative density function of random variable (rv) `Y` and + `p` for its derivative wrt to `Y`. Assume that `Y=g(X)` where `g` is + continuous and `X=g^{-1}(Y)`. Write `J` for the Jacobian (of some function). - * `p(x)` - probability distribution for random variable X - * `p(y)` - probability distribution for random variable Y - * `f` - transform - * `g` - inverse transform, `g(f(x)) = x` - * `J(x)` - Jacobian of f(x) + A `TransformedDistribution` alters the input/outputs of a `Distribution` + associated with rv `X` in the following ways: - A Transformed Distribution exposes `sample` and `pdf`: + * `sample`: - * `sample`: `y = f(x)`, after drawing a sample of X. - * `pdf`: `p(y) = p(x) / det|J(x)| = p(g(y)) / det|J(g(y))|` + Mathematically: + + ``` + Y = g(X) + ``` + + Programmatically: + + ```python + return bijector.forward(distribution.sample(...)) + ``` + + * `log_prob`: + + Mathematically: + + ``` + (log o p o g^{-1})(y) + (log o det o J o g^{-1})(y) + ``` + + Programmatically: + + ```python + return (bijector.inverse_log_det_jacobian(x) + + distribution.log_prob(bijector.inverse(x)) + ``` + + * `log_cdf`: + + Mathematically: + + ``` + (log o P o g^{-1})(y) + ``` + + Programmatically: + + ```python + return distribution.log_prob(bijector.inverse(x)) + ``` + + * and similarly for: `cdf`, `prob`, `log_survival_function`, + `survival_function`. A simple example constructing a Log-Normal distribution from a Normal distribution: ```python - logit_normal = TransformedDistribution( - base_dist_cls=tf.contrib.distributions.Normal, - mu=mu, - sigma=sigma, - transform=lambda x: tf.sigmoid(x), - inverse=lambda y: tf.log(y) - tf.log(1. - y), - log_det_jacobian=(lambda x: - tf.reduce_sum(tf.log(tf.sigmoid(x)) + tf.log(1. - tf.sigmoid(x)), - reduction_indices=[-1]))) - name="LogitNormalTransformedDistribution" - ) + ds = tf.contrib.distributions + log_normal = ds.TransformedDistribution( + base_distribution=ds.Normal(mu=mu, sigma=sigma), + bijector=ds.bijector.Exp(), + name="LogNormalTransformedDistribution") + ``` + + A `LogNormal` made from callables: + + ```python + ds = tf.contrib.distributions + log_normal = ds.TransformedDistribution( + base_distribution=ds.Normal(mu=mu, sigma=sigma), + bijector=ds.bijector.Inline( + forward_fn=tf.exp, + inverse_fn=tf.log, + inverse_log_det_jacobian_fn=( + lambda y: -tf.reduce_sum(tf.log(x), reduction_indices=-1)), + name="LogNormalTransformedDistribution") + ``` + + Another example constructing a Normal from a StandardNormal: + + ```python + ds = tf.contrib.distributions + normal = ds.TransformedDistribution( + base_distribution=ds.Normal(mu=0, sigma=1), + bijector=ds.bijector.ScaleAndShift(loc=mu, scale=sigma, event_ndims=0), + name="NormalTransformedDistribution") ``` """ def __init__(self, - base_dist_cls, - transform, - inverse, - log_det_jacobian, - name="TransformedDistribution", - **base_dist_args): + base_distribution, + bijector, + name="TransformedDistribution"): """Construct a Transformed Distribution. Args: - base_dist_cls: the base distribution class to transform. Must be a - subclass of `Distribution`. - transform: a callable that takes a `Tensor` sample from `base_dist` and - returns a `Tensor` of the same shape and type. `x => y`. - inverse: a callable that computes the inverse of transform. `y => x`. If - None, users can only call `log_pdf` on values returned by `sample`. - log_det_jacobian: a callable that takes a `Tensor` sample from `base_dist` - and returns the log of the determinant of the Jacobian of `transform`. + base_distribution: The base distribution class to transform. Typically an + instance of `Distribution`. + bijector: The object responsible for calculating the transformation. + Typically an instance of `Bijector`. name: The name for the distribution. - **base_dist_args: kwargs to pass on to dist_cls on construction. - - Raises: - TypeError: if `base_dist_cls` is not a subclass of - `Distribution`. """ - with ops.name_scope(name, values=base_dist_args.values()) as ns: - self._base_dist = base_dist_cls(**base_dist_args) - self._transform = transform - self._inverse = inverse - self._log_det_jacobian = log_det_jacobian + with ops.name_scope(name) as ns: + self._base_distribution = base_distribution + self._bijector = bijector self._inverse_cache = {} super(TransformedDistribution, self).__init__( - dtype=self._base_dist.dtype, - parameters={"base_dist_cls": base_dist_cls, - "transform": transform, - "inverse": inverse, - "log_det_jacobian": log_det_jacobian, - "base_dist_args": base_dist_args}, - is_continuous=self._base_dist.is_continuous, - is_reparameterized=self._base_dist.is_reparameterized, - validate_args=self._base_dist.validate_args, - allow_nan_stats=self._base_dist.allow_nan_stats, + dtype=self._base_distribution.dtype, + parameters={"base_distribution": base_distribution, + "bijector": bijector}, + is_continuous=self._base_distribution.is_continuous, + is_reparameterized=self._base_distribution.is_reparameterized, + validate_args=self._base_distribution.validate_args, + allow_nan_stats=self._base_distribution.allow_nan_stats, name=ns) @property def base_distribution(self): """Base distribution, p(x).""" - return self._base_dist + return self._base_distribution @property - def transform(self): + def bijector(self): """Function transforming x => y.""" - return self._transform - - @property - def inverse(self): - """Inverse function of transform, y => x.""" - return self._inverse - - @property - def log_det_jacobian(self): - """Function computing the log determinant of the Jacobian of transform.""" - return self._log_det_jacobian + return self._bijector def _batch_shape(self): return self.base_distribution.batch_shape() @@ -142,29 +174,27 @@ class TransformedDistribution(distribution.Distribution): @distribution_util.AppendDocstring( """Samples from the base distribution and then passes through - the transform.""") + the bijector's forward transform.""") def _sample_n(self, n, seed=None): - samples = self.base_distribution.sample_n(n=n, seed=seed) - with ops.name_scope("transform"): - transformed = self.transform(samples) - self._inverse_cache[transformed] = samples - return transformed + raw_samples = self.base_distribution.sample_n(n=n, seed=seed) + samples = self.bijector.forward(raw_samples) + self._inverse_cache[samples] = raw_samples + return samples @distribution_util.AppendDocstring( - """Implements `(log o p o g)(y) - (log o det o J o g)(y)`, - where `g` is the inverse of `transform`. + """Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`, + where `g^{-1}` is the inverse of `transform`. Also raises a `ValueError` if `inverse` was not provided to the distribution and `y` was not returned from `sample`.""") def _log_prob(self, y): x = self._inverse_possibly_from_cache(y) - with ops.name_scope("log_det_jacobian"): - log_det_jacobian = self.log_det_jacobian(x) - return self.base_distribution.log_prob(x) - log_det_jacobian + inverse_log_det_jacobian = self.bijector.inverse_log_det_jacobian(y) + return self.base_distribution.log_prob(x) + inverse_log_det_jacobian @distribution_util.AppendDocstring( - """Implements `p(g(y)) / det|J(g(y))|`, where `g` is the inverse of - `transform`. + """Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the + inverse of `transform`. Also raises a `ValueError` if `inverse` was not provided to the distribution and `y` was not returned from `sample`.""") @@ -172,8 +202,6 @@ class TransformedDistribution(distribution.Distribution): return math_ops.exp(self._log_prob(y)) def _log_cdf(self, y): - # If Y = f(X), - # P[Y <= y] = P[f(X) <= y] = P[X <= f^{-1}(y)] x = self._inverse_possibly_from_cache(y) return self.base_distribution.log_cdf(x) @@ -192,12 +220,7 @@ class TransformedDistribution(distribution.Distribution): def _inverse_possibly_from_cache(self, y): """Return `self._inverse(y)`, possibly using cached value.""" y = ops.convert_to_tensor(y, name="y") - with ops.name_scope("inverse"): - if y in self._inverse_cache: - x = self._inverse_cache[y] - elif self.inverse: - x = self.inverse(y) - else: - raise ValueError("No inverse function exists and input `y` was not " - "returned from `sample`.") - return x + if y in self._inverse_cache: + return self._inverse_cache[y] + else: + return self.bijector.inverse(y) diff --git a/tensorflow/contrib/distributions/python/ops/wishart.py b/tensorflow/contrib/distributions/python/ops/wishart.py index 92d952a1d24..29fa19d6fd1 100644 --- a/tensorflow/contrib/distributions/python/ops/wishart.py +++ b/tensorflow/contrib/distributions/python/ops/wishart.py @@ -22,6 +22,7 @@ import math import numpy as np from tensorflow.contrib.distributions.python.ops import distribution +from tensorflow.contrib.distributions.python.ops import distribution_util from tensorflow.contrib.distributions.python.ops import operator_pd_cholesky from tensorflow.contrib.distributions.python.ops import operator_pd_full from tensorflow.contrib.framework.python.framework import tensor_util as contrib_tensor_util @@ -211,7 +212,8 @@ class _WishartOperatorPD(distribution.Distribution): 0.5 * self.df, self.dimension), beta=0.5, dtype=self.dtype, - seed=seed) + seed=distribution_util.gen_new_seed( + seed, "wishart")) # Complexity: O(nbk^2) x = array_ops.matrix_band_part(x, -1, 0) # Tri-lower. diff --git a/tensorflow/contrib/framework/python/framework/decorator_utils.py b/tensorflow/contrib/framework/python/framework/decorator_utils.py index e8d6dbe249e..155003498ce 100644 --- a/tensorflow/contrib/framework/python/framework/decorator_utils.py +++ b/tensorflow/contrib/framework/python/framework/decorator_utils.py @@ -56,6 +56,7 @@ def add_notice_to_docstring( def validate_callable(func, decorator_name): if not hasattr(func, '__call__'): raise ValueError( - '%s is not a function. If this is a property, ' - 'apply @%s before @property:\n\n@property\n@%s\ndef method(...)' % ( + '%s is not a function. If this is a property, make sure' + ' @property appears before @%s in your source code:' + '\n\n@property\n@%s\ndef method(...)' % ( func, decorator_name, decorator_name)) diff --git a/tensorflow/contrib/framework/python/framework/deprecation_test.py b/tensorflow/contrib/framework/python/framework/deprecation_test.py index 409758b8531..c5422f47316 100644 --- a/tensorflow/contrib/framework/python/framework/deprecation_test.py +++ b/tensorflow/contrib/framework/python/framework/deprecation_test.py @@ -245,11 +245,10 @@ class DeprecationTest(tf.test.TestCase): self.assertRegexpMatches(args[0], r"deprecated and will be removed after") self._assert_subset(set([date, instructions]), set(args[1:])) - @tf.test.mock.patch.object(logging, "warning", autospec=True) - def test_prop_wrong_order(self, mock_warning): - + def test_prop_wrong_order(self): with self.assertRaisesRegexp( - ValueError, "apply @deprecated before @property"): + ValueError, + "make sure @property appears before @deprecated in your source code"): # pylint: disable=unused-variable class _Object(object): @@ -357,8 +356,7 @@ class DeprecatedArgsTest(tf.test.TestCase): with self.assertRaisesRegexp(ValueError, "argument"): deprecation.deprecated_args(date, instructions) - @tf.test.mock.patch.object(logging, "warning", autospec=True) - def test_deprecated_missing_args(self, mock_warning): + def test_deprecated_missing_args(self): date = "2016-07-04" instructions = "This is how you update..." diff --git a/tensorflow/contrib/layers/kernels/sparse_feature_cross_kernel.cc b/tensorflow/contrib/layers/kernels/sparse_feature_cross_kernel.cc index a81ccff4310..1f1f1a8ca25 100644 --- a/tensorflow/contrib/layers/kernels/sparse_feature_cross_kernel.cc +++ b/tensorflow/contrib/layers/kernels/sparse_feature_cross_kernel.cc @@ -68,6 +68,7 @@ class SparseTensorColumn : public ColumnInterface { return feature_counts_[batch]; } + // InternalType is int64 only when using HashCrosser. int64 DoFeature(int64 batch, int64 n, int64 not_used) const { const int64 start = feature_start_indices_[batch]; if (DT_STRING == values_.dtype()) @@ -75,6 +76,7 @@ class SparseTensorColumn : public ColumnInterface { return values_.vec().data()[start + n]; } + // InternalType is string or StringPiece when using StringCrosser. string DoFeature(int64 batch, int64 n, string not_used) const { const int64 start = feature_start_indices_[batch]; if (DT_STRING == values_.dtype()) @@ -103,12 +105,14 @@ class DenseTensorColumn : public ColumnInterface { int64 FeatureCount(int64 batch) const override { return tensor_.dim_size(1); } + // InternalType is int64 only when using HashCrosser. int64 DoFeature(int64 batch, int64 n, int64 not_used) const { if (DT_STRING == tensor_.dtype()) return Fingerprint64(tensor_.matrix()(batch, n)); return tensor_.matrix()(batch, n); } + // Internal type is string or StringPiece when using StringCrosser. string DoFeature(int64 batch, int64 n, string not_used) const { if (DT_STRING == tensor_.dtype()) return tensor_.matrix()(batch, n); return std::to_string(tensor_.matrix()(batch, n)); @@ -158,7 +162,7 @@ class StringCrosser { public: StringCrosser(const std::vector< std::unique_ptr>>& columns, - const int64 not_used) + const int64 num_buckets_unused, const uint64 hash_key_unused) : columns_(columns) {} string Generate(const int64 batch_index, @@ -178,32 +182,62 @@ class StringCrosser { const std::vector>>& columns_; }; -// Seed is chosen based on third_party/tensorflow/core/lib/hash/hash.h -const int64 kInitialHashSeed = 0xDECAFCAFFE; - -int64 HashCombine(int64 a, int64 b) { - return a ^ (b + 0x9e3779b97f4a7800 + (a << 10) + (a >> 4)); -} - // Generates the sparse crosses as nested hash to avoid string manipulations. class HashCrosser { public: HashCrosser( const std::vector>>& columns, - const int64 num_buckets) + const int64 num_buckets, const uint64 hash_key_unused) : columns_(columns), num_buckets_(num_buckets) {} int64 Generate(const int64 batch_index, const std::vector& permutation) const { + // Seed is chosen based on third_party/tensorflow/core/lib/hash/hash.h + static const int64 kInitialHashSeed = 0xDECAFCAFFE; + uint64 hashed_output = kInitialHashSeed; - for (int i = 0; i < permutation.size(); i++) { + for (size_t i = 0; i < permutation.size(); ++i) { int64 hash_i = columns_[i]->Feature(batch_index, permutation[i]); hashed_output = HashCombine(hashed_output, hash_i); } if (num_buckets_ > 0) { return hashed_output % num_buckets_; } else { - // To perevent negative output we take module to max int64. + // To prevent negative output we take modulo to max int64. + return hashed_output % std::numeric_limits::max(); + } + } + + private: + static int64 HashCombine(int64 a, int64 b) { + return a ^ (b + 0x9e3779b97f4a7800 + (a << 10) + (a >> 4)); + } + + const std::vector>>& columns_; + const int64 num_buckets_; +}; + +// Generates the sparse crosses as nested hash to avoid string manipulations. +class HashCrosserV2 { + public: + HashCrosserV2( + const std::vector>>& columns, + const int64 num_buckets, const uint64 hash_key) + : columns_(columns), num_buckets_(num_buckets), hash_key_(hash_key) {} + + int64 Generate(const int64 batch_index, + const std::vector& permutation) const { + // Do the fingerprint concatenation on uint64. + uint64 hashed_output = hash_key_; + for (size_t i = 0; i < permutation.size(); ++i) { + uint64 hash_i = columns_[i]->Feature(batch_index, permutation[i]); + hashed_output = FingerprintCat64(hashed_output, hash_i); + } + // The return value is int64 based on the number of buckets. + if (num_buckets_ > 0) { + return hashed_output % num_buckets_; + } else { + // To prevent negative output we take modulo to max int64. return hashed_output % std::numeric_limits::max(); } } @@ -211,6 +245,7 @@ class HashCrosser { private: const std::vector>>& columns_; const int64 num_buckets_; + const uint64 hash_key_; }; // ProductIterator generates cartesian products based on indices. @@ -262,28 +297,41 @@ class ProductIterator { std::vector next_permutation_; }; -template +template struct CrossTraits; -template -struct CrossTraits { +template +struct CrossTraits { typedef StringCrosser Crosser; typedef OutputUpdater Updater; }; template <> -struct CrossTraits { +struct CrossTraits { typedef HashCrosser Crosser; typedef OutputUpdater Updater; }; + +template <> +struct CrossTraits { + typedef HashCrosserV2 Crosser; + typedef OutputUpdater Updater; +}; } // namespace -template +template class SparseFeatureCrossOp : public OpKernel { public: explicit SparseFeatureCrossOp(OpKernelConstruction* context) : OpKernel(context) { OP_REQUIRES_OK(context, context->GetAttr("num_buckets", &num_buckets_)); + if (VERSION_2) { + // Read signed_hash_key_ as int64 since uint64 attributes are not + // supported by REGISTER_OP. + int64 signed_hash_key_; + OP_REQUIRES_OK(context, context->GetAttr("hash_key", &signed_hash_key_)); + hash_key_ = static_cast(signed_hash_key_); + } } void Compute(OpKernelContext* context) override { @@ -303,8 +351,8 @@ class SparseFeatureCrossOp : public OpKernel { GenerateColumnsFromInput(indices_list_in, values_list_in, shapes_list_in, dense_list_in); - typename CrossTraits::Crosser crosser( - columns, num_buckets_); + typename CrossTraits::Crosser + crosser(columns, num_buckets_, hash_key_); Tensor* indices_out; Tensor* values_out; Tensor* shape_out; @@ -313,8 +361,8 @@ class SparseFeatureCrossOp : public OpKernel { CreateOutputTensors(columns, batch_size, context, &indices_out, &values_out, &shape_out, &output_start_indices); - typename CrossTraits::Updater updater( - output_start_indices, indices_out, values_out); + typename CrossTraits::Updater + updater(output_start_indices, indices_out, values_out); auto do_work = [this, &columns, crosser, updater](int64 begin, int64 end) { for (int b = begin; b < end; b++) { ProductIterator product_iterator(columns, b); @@ -459,7 +507,7 @@ class SparseFeatureCrossOp : public OpKernel { return columns; } - // Extrats data about the features and populates feature data. + // Extracts data about the features and populates feature data. void ExtractFeatureData( const OpInputList& indices_list_in, int64 batch_size, std::vector>* feature_counts, @@ -536,30 +584,57 @@ class SparseFeatureCrossOp : public OpKernel { return cross_count; } int64 num_buckets_; + uint64 hash_key_; }; REGISTER_KERNEL_BUILDER(Name("SparseFeatureCross") .Device(DEVICE_CPU) .TypeConstraint("out_type") .TypeConstraint("internal_type"), - SparseFeatureCrossOp); + SparseFeatureCrossOp); REGISTER_KERNEL_BUILDER(Name("SparseFeatureCross") .Device(DEVICE_CPU) .TypeConstraint("out_type") .TypeConstraint("internal_type"), - SparseFeatureCrossOp); + SparseFeatureCrossOp); REGISTER_KERNEL_BUILDER(Name("SparseFeatureCross") .Device(DEVICE_CPU) .TypeConstraint("out_type") .TypeConstraint("internal_type"), - SparseFeatureCrossOp); + SparseFeatureCrossOp); REGISTER_KERNEL_BUILDER(Name("SparseFeatureCross") .Device(DEVICE_CPU) .TypeConstraint("out_type") .TypeConstraint("internal_type"), - SparseFeatureCrossOp); + SparseFeatureCrossOp); + +// The following builders enable FingerprintCat64 concatenation for the +// crosses features. +REGISTER_KERNEL_BUILDER(Name("SparseFeatureCrossV2") + .Device(DEVICE_CPU) + .TypeConstraint("out_type") + .TypeConstraint("internal_type"), + SparseFeatureCrossOp); + +REGISTER_KERNEL_BUILDER(Name("SparseFeatureCrossV2") + .Device(DEVICE_CPU) + .TypeConstraint("out_type") + .TypeConstraint("internal_type"), + SparseFeatureCrossOp); + +REGISTER_KERNEL_BUILDER(Name("SparseFeatureCrossV2") + .Device(DEVICE_CPU) + .TypeConstraint("out_type") + .TypeConstraint("internal_type"), + SparseFeatureCrossOp); + +REGISTER_KERNEL_BUILDER(Name("SparseFeatureCrossV2") + .Device(DEVICE_CPU) + .TypeConstraint("out_type") + .TypeConstraint("internal_type"), + SparseFeatureCrossOp); } // namespace tensorflow diff --git a/tensorflow/contrib/layers/ops/sparse_feature_cross_op.cc b/tensorflow/contrib/layers/ops/sparse_feature_cross_op.cc index e854292f9da..f73ea5e2c9e 100644 --- a/tensorflow/contrib/layers/ops/sparse_feature_cross_op.cc +++ b/tensorflow/contrib/layers/ops/sparse_feature_cross_op.cc @@ -68,9 +68,87 @@ then the output will be if hashed_output=true then the output will be shape = [2, 2] - [0, 0]: Hash64("f", Hash64("d", Hash64("a"))) - [1, 0]: Hash64("g", Hash64("e", Hash64("b"))) - [1, 1]: Hash64("g", Hash64("e", Hash64("c"))) + [0, 0]: HashCombine( + Fingerprint64("f"), HashCombine( + Fingerprint64("d"), Fingerprint64("a"))) + [1, 0]: HashCombine( + Fingerprint64("g"), HashCombine( + Fingerprint64("e"), Fingerprint64("b"))) + [1, 1]: HashCombine( + Fingerprint64("g"), HashCombine( + Fingerprint64("e"), Fingerprint64("c"))) + +indices: 2-D. Indices of each input `SparseTensor`. +values: 1-D. values of each `SparseTensor`. +shapes: 1-D. Shapes of each `SparseTensor`. +dense: 2-D. Columns represented by dense `Tensor`. +output_indices: 2-D. Indices of the concatenated `SparseTensor`. +output_values: 1-D. Non-empty values of the concatenated or hashed + `SparseTensor`. +output_shape: 1-D. Shape of the concatenated `SparseTensor`. +)doc"); + +REGISTER_OP("SparseFeatureCrossV2") + .Input("indices: N * int64") + .Input("values: sparse_types") + .Input("shapes: N * int64") + .Input("dense: dense_types") + .Output("output_indices: int64") + .Output("output_values: out_type") + .Output("output_shape: int64") + .Attr("N: int >= 0") + .Attr("hashed_output: bool") + .Attr("num_buckets: int >= 0") + .Attr("hash_key: int") + .Attr("sparse_types: list({int64, string}) >= 0") + .Attr("dense_types: list({int64, string}) >= 0") + .Attr("out_type: {int64, string}") + .Attr("internal_type: {int64, string}") + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->Matrix(c->UnknownDim(), 2)); + c->set_output(1, c->Vector(c->UnknownDim())); + c->set_output(2, c->Vector(2)); + return Status::OK(); + }) + .Doc(R"doc( +Generates sparse cross form a list of sparse tensors. + +The op takes two lists, one of 2D `SparseTensor` and one of 2D `Tensor`, each +representing features of one feature column. It outputs a 2D `SparseTensor` with +the batchwise crosses of these features. + +For example, if the inputs are + + inputs[0]: SparseTensor with shape = [2, 2] + [0, 0]: "a" + [1, 0]: "b" + [1, 1]: "c" + + inputs[1]: SparseTensor with shape = [2, 1] + [0, 0]: "d" + [1, 0]: "e" + + inputs[2]: Tensor [["f"], ["g"]] + +then the output will be + + shape = [2, 2] + [0, 0]: "a_X_d_X_f" + [1, 0]: "b_X_e_X_g" + [1, 1]: "c_X_e_X_g" + +if hashed_output=true then the output will be + + shape = [2, 2] + [0, 0]: FingerprintCat64( + Fingerprint64("f"), FingerprintCat64( + Fingerprint64("d"), Fingerprint64("a"))) + [1, 0]: FingerprintCat64( + Fingerprint64("g"), FingerprintCat64( + Fingerprint64("e"), Fingerprint64("b"))) + [1, 1]: FingerprintCat64( + Fingerprint64("g"), FingerprintCat64( + Fingerprint64("e"), Fingerprint64("c"))) indices: 2-D. Indices of each input `SparseTensor`. values: 1-D. values of each `SparseTensor`. diff --git a/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py b/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py index 1d39435ded9..3bdfc3e81bd 100644 --- a/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py +++ b/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py @@ -17,6 +17,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy import tensorflow as tf @@ -253,10 +254,13 @@ class SparseCrossOpTest(tf.test.TestCase): Cross for the corresponding batch should be empty. """ op = tf.contrib.layers.sparse_feature_cross([ - self._sparse_tensor( - [['batch1-FC1-F1', 'batch1-FC1-F2']], 2), self._sparse_tensor( - [['batch1-FC2-F1'], ['batch2-FC2-F1']], 2), self._sparse_tensor( - [['batch1-FC3-F1', 'batch1-FC3-F2']], 2) + self._sparse_tensor([ + ['batch1-FC1-F1', 'batch1-FC1-F2'] + ], 2), self._sparse_tensor([ + ['batch1-FC2-F1'], ['batch2-FC2-F1'] + ], 2), self._sparse_tensor([ + ['batch1-FC3-F1', 'batch1-FC3-F2'] + ], 2) ]) expected_out = self._sparse_tensor([[ 'batch1-FC1-F1_X_batch1-FC2-F1_X_batch1-FC3-F1', @@ -298,6 +302,26 @@ class SparseCrossOpTest(tf.test.TestCase): with self.test_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) + def test_hashed_output_zero_bucket_v2(self): + """Tests a simple scenario. + """ + op = tf.contrib.layers.sparse_feature_cross( + [ + self._sparse_tensor([ + ['batch1-FC1-F1'] + ]), self._sparse_tensor([ + ['batch1-FC2-F1'] + ]), self._sparse_tensor([ + ['batch1-FC3-F1'] + ]) + ], + hashed_output=True, + hash_key=tf.contrib.layers.SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY) + # Check actual hashed output to prevent unintentional hashing changes. + expected_out = self._sparse_tensor([[1971693436396284976]]) + with self.test_session() as sess: + self._assert_sparse_tensor_equals(expected_out, sess.run(op)) + # TODO(sibyl-Aix6ihai): Add benchmark to compare Hashed vs Non-hashed. def test_hashed_output(self): """Tests a simple scenario. @@ -319,6 +343,56 @@ class SparseCrossOpTest(tf.test.TestCase): with self.test_session() as sess: self._assert_sparse_tensor_equals(expected_out, sess.run(op)) + def test_hashed_output_v2(self): + """Tests a simple scenario. + """ + op = tf.contrib.layers.sparse_feature_cross( + [ + self._sparse_tensor([ + ['batch1-FC1-F1'] + ]), self._sparse_tensor([ + ['batch1-FC2-F1'] + ]), self._sparse_tensor([ + ['batch1-FC3-F1'] + ]) + ], + hashed_output=True, + num_buckets=100, + hash_key=tf.contrib.layers.SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY) + # Check actual hashed output to prevent unintentional hashing changes. + expected_out = self._sparse_tensor([[83]]) + with self.test_session() as sess: + self._assert_sparse_tensor_equals(expected_out, sess.run(op)) + + def test_hashed_output_v1_has_collision(self): + """Tests the old version of the fingerprint concatenation has collisions. + """ + # The last 10 bits of 359 and 1024+359 are identical. + # As a result, all the crosses collide. + t1 = tf.constant([[359], [359 + 1024]]) + t2 = tf.constant([list(range(10)), list(range(10))]) + cross = tf.contrib.layers.sparse_feature_cross( + [t2, t1], hashed_output=True, num_buckets=1024) + cross_dense = tf.sparse_tensor_to_dense(cross) + with tf.Session(): + values = cross_dense.eval() + self.assertTrue(numpy.equal(values[0], values[1]).all()) + + def test_hashed_output_v2_has_no_collision(self): + """Tests the new version of the fingerprint concatenation has no collisions. + """ + # Although the last 10 bits of 359 and 1024+359 are identical. + # As a result, all the crosses shouldn't collide. + t1 = tf.constant([[359], [359 + 1024]]) + t2 = tf.constant([list(range(10)), list(range(10))]) + cross = tf.contrib.layers.sparse_feature_cross( + [t2, t1], hashed_output=True, num_buckets=1024, + hash_key=tf.contrib.layers.SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY) + cross_dense = tf.sparse_tensor_to_dense(cross) + with tf.Session(): + values = cross_dense.eval() + self.assertTrue(numpy.not_equal(values[0], values[1]).all()) + def test_hashed_3x1x2(self): """Tests 3x1x2 permutation with hashed output. """ diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops.py b/tensorflow/contrib/layers/python/layers/embedding_ops.py index f81c20bdc76..6515f52ac33 100644 --- a/tensorflow/contrib/layers/python/layers/embedding_ops.py +++ b/tensorflow/contrib/layers/python/layers/embedding_ops.py @@ -170,7 +170,8 @@ def _prune_invalid_ids(sparse_ids, sparse_weights): return sparse_ids, sparse_weights -def hashed_embedding_lookup(params, values, dimension, name=None): +def hashed_embedding_lookup(params, values, dimension, name=None, + hash_key=None): """Looks up embeddings using parameter hashing for each value in `values`. The i-th embedding component of a value v in `values` is found by retrieving @@ -200,6 +201,9 @@ def hashed_embedding_lookup(params, values, dimension, name=None): values: `Tensor` of values to be embedded. dimension: Embedding dimension name: An optional name for this op. + hash_key: Specify the hash_key that will be used by the `FingerprintCat64` + function to combine the crosses fingerprints on SparseFeatureCrossOp + (optional). Returns: A tensor with shape [d0, ..., dn, dimension] @@ -243,7 +247,8 @@ def hashed_embedding_lookup(params, values, dimension, name=None): tensors_to_cross = [array_ops.tile(array_ops.expand_dims( math_ops.range(0, dimension), 0), array_ops.shape(values)), values] ids = sparse_feature_cross_op.sparse_feature_cross( - tensors_to_cross, hashed_output=True, num_buckets=num_params) + tensors_to_cross, hashed_output=True, num_buckets=num_params, + hash_key=hash_key) ids = sparse_ops.sparse_tensor_to_dense(ids) # No need to validate the indices since we have checked the params @@ -260,7 +265,8 @@ def hashed_embedding_lookup_sparse(params, dimension, combiner=None, default_value=None, - name=None): + name=None, + hash_key=None): """Looks up embeddings of a sparse feature using parameter hashing. See `tf.contrib.layers.hashed_embedding_lookup` for embedding with hashing. @@ -276,6 +282,9 @@ def hashed_embedding_lookup_sparse(params, the default. default_value: The value to use for an entry with no features. name: An optional name for this op. + hash_key: Specify the hash_key that will be used by the `FingerprintCat64` + function to combine the crosses fingerprints on SparseFeatureCrossOp + (optional). Returns: Dense tensor with shape [N, dimension] with N the number of rows in @@ -315,7 +324,8 @@ def hashed_embedding_lookup_sparse(params, values = sparse_values.values values, idx = array_ops.unique(values) - embeddings = hashed_embedding_lookup(params, values, dimension) + embeddings = hashed_embedding_lookup(params, values, dimension, + hash_key=hash_key) if combiner == "sum": embeddings = math_ops.sparse_segment_sum(embeddings, idx, segment_ids, diff --git a/tensorflow/contrib/layers/python/layers/feature_column.py b/tensorflow/contrib/layers/python/layers/feature_column.py index 23b54870a2d..d75eba31db9 100644 --- a/tensorflow/contrib/layers/python/layers/feature_column.py +++ b/tensorflow/contrib/layers/python/layers/feature_column.py @@ -1476,6 +1476,7 @@ def bucketized_column(source_column, boundaries): class _CrossedColumn(_FeatureColumn, collections.namedtuple("_CrossedColumn", ["columns", "hash_bucket_size", + "hash_key", "combiner", "ckpt_to_load_from", "tensor_name_in_ckpt"])): """Represents a cross transformation also known as conjuction or combination. @@ -1536,6 +1537,7 @@ class _CrossedColumn(_FeatureColumn, def __new__(cls, columns, hash_bucket_size, + hash_key, combiner="sqrtn", ckpt_to_load_from=None, tensor_name_in_ckpt=None): @@ -1560,7 +1562,8 @@ class _CrossedColumn(_FeatureColumn, sorted_columns = sorted( [column for column in columns], key=lambda column: column.name) return super(_CrossedColumn, cls).__new__(cls, tuple(sorted_columns), - hash_bucket_size, combiner, + hash_bucket_size, hash_key, + combiner, ckpt_to_load_from, tensor_name_in_ckpt) @@ -1623,6 +1626,7 @@ class _CrossedColumn(_FeatureColumn, feature_tensors, hashed_output=True, num_buckets=self.hash_bucket_size, + hash_key=self.hash_key, name="cross") # pylint: disable=unused-argument @@ -1650,7 +1654,8 @@ class _CrossedColumn(_FeatureColumn, def crossed_column(columns, hash_bucket_size, combiner=None, ckpt_to_load_from=None, - tensor_name_in_ckpt=None): + tensor_name_in_ckpt=None, + hash_key=None): """Creates a _CrossedColumn. Args: @@ -1664,6 +1669,9 @@ def crossed_column(columns, hash_bucket_size, combiner=None, tensor_name_in_ckpt: (Optional). Name of the `Tensor` in the provided checkpoint from which to restore the column weights. Required if `ckpt_to_load_from` is not None. + hash_key: Specify the hash_key that will be used by the `FingerprintCat64` + function to combine the crosses fingerprints on SparseFeatureCrossOp + (optional). Returns: A _CrossedColumn. @@ -1682,6 +1690,7 @@ def crossed_column(columns, hash_bucket_size, combiner=None, return _CrossedColumn( columns, hash_bucket_size, + hash_key, combiner=combiner, ckpt_to_load_from=ckpt_to_load_from, tensor_name_in_ckpt=tensor_name_in_ckpt) diff --git a/tensorflow/contrib/layers/python/layers/feature_column_ops.py b/tensorflow/contrib/layers/python/layers/feature_column_ops.py index 623c6093bc3..4000ce88850 100644 --- a/tensorflow/contrib/layers/python/layers/feature_column_ops.py +++ b/tensorflow/contrib/layers/python/layers/feature_column_ops.py @@ -128,7 +128,6 @@ def _embeddings_from_arguments(column, embeddings, input_tensor, sparse_weights=weight_tensor, - default_id=0, combiner=args.combiner, name=column.name + 'weights') @@ -214,10 +213,8 @@ def input_from_feature_columns(columns_to_tensors, age_buckets = bucketized_column( source_column=age, boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65]) - occupation_x_age = crossed_column(columns=[occupation, age_buckets], - hash_bucket_size=10000) - feature_columns=[occupation_emb, occupation_x_age] + feature_columns=[occupation_emb, age_buckets] Args: columns_to_tensors: A mapping from feature column to tensors. 'string' key @@ -328,7 +325,6 @@ def _create_embedding_lookup(column, variable, embedding_lookup_arguments.input_tensor, sparse_weights=embedding_lookup_arguments.weight_tensor, - default_id=0, combiner=embedding_lookup_arguments.combiner, name=column.name + '_weights') return variable, predictions @@ -387,7 +383,6 @@ def _create_joint_embedding_lookup(columns_to_tensors, variable, sparse_tensor, sparse_weights=None, - default_id=0, combiner='sum', name='_weights') return variable, predictions @@ -488,8 +483,6 @@ def weighted_sum_from_feature_columns(columns_to_tensors, occupation = sparse_column_with_hash_bucket(column_name="occupation", hash_bucket_size=1000) - occupation_emb = embedding_column(sparse_id_column=occupation, dimension=16, - combiner="sum") age = real_valued_column("age") age_buckets = bucketized_column( source_column=age, @@ -497,7 +490,7 @@ def weighted_sum_from_feature_columns(columns_to_tensors, occupation_x_age = crossed_column(columns=[occupation, age_buckets], hash_bucket_size=10000) - feature_columns=[occupation_emb, occupation_x_age] + feature_columns=[age_buckets, occupation, occupation_x_age] Args: columns_to_tensors: A mapping from feature column to tensors. 'string' key diff --git a/tensorflow/contrib/layers/python/layers/feature_column_ops_test.py b/tensorflow/contrib/layers/python/layers/feature_column_ops_test.py index 0bcbcba5e30..dd3bd8fcb0b 100644 --- a/tensorflow/contrib/layers/python/layers/feature_column_ops_test.py +++ b/tensorflow/contrib/layers/python/layers/feature_column_ops_test.py @@ -644,7 +644,7 @@ class CreateInputLayersForDNNsTest(tf.test.TestCase): hashed_sparse = tf.contrib.layers.sparse_column_with_hash_bucket("wire", 10) wire_tensor = tf.SparseTensor(values=["omar", "stringer", "marlo"], indices=[[0, 0], [1, 0], [1, 1]], - shape=[2, 2]) + shape=[3, 2]) features = {"wire": wire_tensor} embeded_sparse = tf.contrib.layers.embedding_column( hashed_sparse, 1, combiner="sum", initializer=init_ops.ones_initializer) @@ -653,18 +653,18 @@ class CreateInputLayersForDNNsTest(tf.test.TestCase): with self.test_session(): tf.initialize_all_variables().run() # score: (number of values) - self.assertAllEqual(output.eval(), [[1.], [2.]]) + self.assertAllEqual(output.eval(), [[1.], [2.], [0.]]) def testEmbeddingColumnWithWeightedSparseColumnForDNN(self): ids = tf.contrib.layers.sparse_column_with_keys( "ids", ["marlo", "omar", "stringer"]) ids_tensor = tf.SparseTensor(values=["stringer", "stringer", "marlo"], indices=[[0, 0], [1, 0], [1, 1]], - shape=[2, 2]) + shape=[3, 2]) weighted_ids = tf.contrib.layers.weighted_sparse_column(ids, "weights") weights_tensor = tf.SparseTensor(values=[10.0, 20.0, 30.0], indices=[[0, 0], [1, 0], [1, 1]], - shape=[2, 2]) + shape=[3, 2]) features = {"ids": ids_tensor, "weights": weights_tensor} embeded_sparse = tf.contrib.layers.embedding_column( @@ -675,7 +675,7 @@ class CreateInputLayersForDNNsTest(tf.test.TestCase): tf.initialize_all_variables().run() tf.initialize_all_tables().run() # score: (sum of weights) - self.assertAllEqual(output.eval(), [[10.], [50.]]) + self.assertAllEqual(output.eval(), [[10.], [50.], [0.]]) def testInputLayerWithCollectionsForDNN(self): real_valued = tf.contrib.layers.real_valued_column("price") @@ -960,7 +960,7 @@ class SequenceInputFromFeatureColumnTest(tf.test.TestCase): # `ids_tensor` consists of 7 instances of , 3 occurences of "b", # 2 occurences of "c" and 1 instance of "a". - expected_gradient_values = sorted([7., 3., 2., 1.] * embedding_dimension) + expected_gradient_values = sorted([0., 3., 2., 1.] * embedding_dimension) actual_gradient_values = np.sort(gradients[0].values, axis=None) self.assertAllClose(expected_gradient_values, actual_gradient_values) diff --git a/tensorflow/contrib/layers/python/layers/target_column.py b/tensorflow/contrib/layers/python/layers/target_column.py index 711510f32cf..0667fee32a5 100644 --- a/tensorflow/contrib/layers/python/layers/target_column.py +++ b/tensorflow/contrib/layers/python/layers/target_column.py @@ -22,6 +22,7 @@ import six from tensorflow.contrib import losses from tensorflow.contrib import metrics as metrics_lib +from tensorflow.contrib.framework import deprecated from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops @@ -30,6 +31,11 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn +@deprecated( + "2016-11-12", + "This file will be removed after the deprecation date." + "Please switch to " + "third_party/tensorflow/contrib/learn/python/learn/estimators/head.py") def regression_target(label_name=None, weight_column_name=None, target_dimension=1): @@ -54,6 +60,11 @@ def regression_target(label_name=None, # TODO(zakaria): Add logistic_regression_target +@deprecated( + "2016-11-12", + "This file will be removed after the deprecation date." + "Please switch to " + "third_party/tensorflow/contrib/learn/python/learn/estimators/head.py") def multi_class_target(n_classes, label_name=None, weight_column_name=None): """Creates a _TargetColumn for multi class single label classification. @@ -85,6 +96,11 @@ def multi_class_target(n_classes, label_name=None, weight_column_name=None): weight_column_name=weight_column_name) +@deprecated( + "2016-11-12", + "This file will be removed after the deprecation date." + "Please switch to " + "third_party/tensorflow/contrib/learn/python/learn/estimators/head.py") def binary_svm_target(label_name=None, weight_column_name=None): """Creates a _TargetColumn for binary classification with SVMs. @@ -105,6 +121,11 @@ def binary_svm_target(label_name=None, weight_column_name=None): weight_column_name=weight_column_name) +@deprecated( + "2016-11-12", + "This file will be removed after the deprecation date." + "Please switch to " + "third_party/tensorflow/contrib/learn/python/learn/estimators/head.py") class ProblemType(object): UNSPECIFIED = 0 CLASSIFICATION = 1 @@ -391,7 +412,6 @@ def _log_loss_with_two_classes(logits, target): def _softmax_cross_entropy_loss(logits, target): - # sigmoid_cross_entropy_with_logits requires [batch_size, 1] target. # Check that we got int32/int64 for classification. if (not target.dtype.is_compatible_with(dtypes.int64) and not target.dtype.is_compatible_with(dtypes.int32)): @@ -416,6 +436,11 @@ def _run_metrics(predictions, targets, metrics, weights): return result +@deprecated( + "2016-11-12", + "This file will be removed after the deprecation date." + "Please switch to " + "third_party/tensorflow/contrib/learn/python/learn/estimators/head.py") def get_default_binary_metrics_for_eval(thresholds): """Returns a dictionary of basic metrics for logistic regression. diff --git a/tensorflow/contrib/layers/python/ops/sparse_feature_cross_op.py b/tensorflow/contrib/layers/python/ops/sparse_feature_cross_op.py index a3cbd2a35ab..560598024a7 100644 --- a/tensorflow/contrib/layers/python/ops/sparse_feature_cross_op.py +++ b/tensorflow/contrib/layers/python/ops/sparse_feature_cross_op.py @@ -17,6 +17,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function + +from tensorflow.contrib.framework import deprecated_arg_values from tensorflow.python.framework import common_shapes from tensorflow.python.framework import dtypes from tensorflow.python.framework import load_library @@ -28,9 +30,21 @@ _sparse_feature_cross_op = load_library.load_op_library( resource_loader.get_path_to_datafile("_sparse_feature_cross_op.so")) assert _sparse_feature_cross_op, "Could not load _sparse_feature_cross_op.so." +# Default hash key for the FingerprintCat64. +SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY = 0xDECAFCAFFE + +@deprecated_arg_values( + "2016-11-20", + "The default behavior of sparse_feature_cross is changing, the default\n" + "value for hash_key will change to SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY.\n" + "From that point on sparse_feature_cross will always use FingerprintCat64\n" + "to concatenate the feature fingerprints. And the underlying\n" + "_sparse_feature_cross_op.sparse_feature_cross operation will be marked\n" + "as deprecated.", + hash_key=None) def sparse_feature_cross(inputs, hashed_output=False, num_buckets=0, - name=None): + name=None, hash_key=None): """Crosses a list of Tensor or SparseTensor objects. See sparse_feature_cross_kernel.cc for more details. @@ -42,6 +56,10 @@ def sparse_feature_cross(inputs, hashed_output=False, num_buckets=0, num_buckets: It is used if hashed_output is true. output = hashed_value%num_buckets if num_buckets > 0 else hashed_value. name: A name prefix for the returned tensors (optional). + hash_key: Specify the hash_key that will be used by the `FingerprintCat64` + function to combine the crosses fingerprints on SparseFeatureCrossOp. + The default value is None, but will become + SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY after 2016-11-20 (optional). Returns: A `SparseTensor` with the crossed features. @@ -74,18 +92,36 @@ def sparse_feature_cross(inputs, hashed_output=False, num_buckets=0, dense_inputs[i] = math_ops.to_int64(dense_inputs[i]) internal_type = dtypes.int64 - indices_out, values_out, shape_out = ( - _sparse_feature_cross_op.sparse_feature_cross(indices, - values, - shapes, - dense_inputs, - hashed_output, - num_buckets, - out_type=out_type, - internal_type=internal_type, - name=name)) + if hash_key: + indices_out, values_out, shape_out = ( + _sparse_feature_cross_op.sparse_feature_cross_v2( + indices, + values, + shapes, + dense_inputs, + hashed_output, + num_buckets, + hash_key=hash_key, + out_type=out_type, + internal_type=internal_type, + name=name)) + else: + indices_out, values_out, shape_out = ( + _sparse_feature_cross_op.sparse_feature_cross( + indices, + values, + shapes, + dense_inputs, + hashed_output, + num_buckets, + out_type=out_type, + internal_type=internal_type, + name=name)) + return ops.SparseTensor(indices_out, values_out, shape_out) ops.RegisterShape("SparseFeatureCross")(common_shapes.call_cpp_shape_fn) ops.NotDifferentiable("SparseFeatureCross") +ops.RegisterShape("SparseFeatureCrossV2")(common_shapes.call_cpp_shape_fn) +ops.NotDifferentiable("SparseFeatureCrossV2") diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD index 40e014a4409..b6d83884f4d 100644 --- a/tensorflow/contrib/learn/BUILD +++ b/tensorflow/contrib/learn/BUILD @@ -27,19 +27,7 @@ py_library( py_test( name = "base_test", size = "medium", - srcs = ["python/learn/tests/base_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":learn", - "//tensorflow:tensorflow_py", - "//tensorflow/python:framework_test_lib", - ], -) - -py_test( - name = "load_csv_test", - size = "small", - srcs = ["python/learn/tests/load_csv_test.py"], + srcs = ["python/learn/estimators/base_test.py"], srcs_version = "PY2AND3", deps = [ ":learn", @@ -51,7 +39,7 @@ py_test( py_test( name = "data_feeder_test", size = "small", - srcs = ["python/learn/tests/data_feeder_test.py"], + srcs = ["python/learn/learn_io/data_feeder_test.py"], srcs_version = "PY2AND3", deps = [ ":learn", @@ -274,7 +262,7 @@ py_test( py_test( name = "estimators_test", size = "small", - srcs = ["python/learn/tests/estimators_test.py"], + srcs = ["python/learn/estimators/estimators_test.py"], srcs_version = "PY2AND3", deps = [ ":learn", @@ -286,7 +274,7 @@ py_test( py_test( name = "metric_spec_test", size = "small", - srcs = ["python/learn/tests/metric_spec_test.py"], + srcs = ["python/learn/metric_spec_test.py"], srcs_version = "PY2AND3", deps = [ ":learn", @@ -298,7 +286,7 @@ py_test( py_test( name = "experiment_test", size = "small", - srcs = ["python/learn/tests/experiment_test.py"], + srcs = ["python/learn/experiment_test.py"], srcs_version = "PY2AND3", deps = [ ":learn", @@ -310,7 +298,7 @@ py_test( py_test( name = "graph_actions_test", size = "small", - srcs = ["python/learn/tests/graph_actions_test.py"], + srcs = ["python/learn/graph_actions_test.py"], srcs_version = "PY2AND3", deps = [ ":learn", @@ -322,7 +310,7 @@ py_test( py_test( name = "learn_runner_test", size = "small", - srcs = ["python/learn/tests/learn_runner_test.py"], + srcs = ["python/learn/learn_runner_test.py"], srcs_version = "PY2AND3", deps = [ ":learn", @@ -334,7 +322,7 @@ py_test( py_test( name = "monitors_test", size = "small", - srcs = ["python/learn/tests/monitors_test.py"], + srcs = ["python/learn/monitors_test.py"], srcs_version = "PY2AND3", deps = [ ":learn", @@ -347,7 +335,7 @@ py_test( name = "run_config_test", size = "small", srcs = [ - "python/learn/tests/run_config_test.py", + "python/learn/estimators/run_config_test.py", ], srcs_version = "PY2AND3", deps = [ @@ -356,18 +344,6 @@ py_test( ], ) -py_test( - name = "basic_session_run_hooks_test", - size = "small", - srcs = ["python/learn/tests/basic_session_run_hooks_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":learn", - "//tensorflow:tensorflow_py", - "//tensorflow/python:framework_test_lib", - ], -) - py_test( name = "tensor_signature_test", srcs = ["python/learn/estimators/tensor_signature_test.py"], @@ -430,15 +406,10 @@ py_test( ) py_test( - name = "dnn_test", - size = "medium", - srcs = ["python/learn/estimators/dnn_test.py"], - shard_count = 4, + name = "head_test", + size = "small", + srcs = ["python/learn/estimators/head_test.py"], srcs_version = "PY2AND3", - tags = [ - "manual", # http://b/31934515 - "notap", - ], deps = [ ":learn", "//tensorflow:tensorflow_py", @@ -447,9 +418,10 @@ py_test( ) py_test( - name = "dnn_sampled_softmax_classifier_test", - size = "large", - srcs = ["python/learn/estimators/dnn_sampled_softmax_classifier_test.py"], + name = "dnn_test", + size = "medium", + srcs = ["python/learn/estimators/dnn_test.py"], + shard_count = 4, srcs_version = "PY2AND3", tags = [ "manual", # http://b/31934515 @@ -538,7 +510,7 @@ py_test( py_test( name = "grid_search_test", size = "small", - srcs = ["python/learn/tests/grid_search_test.py"], + srcs = ["python/learn/grid_search_test.py"], srcs_version = "PY2AND3", deps = [ ":learn", @@ -550,7 +522,7 @@ py_test( py_test( name = "io_test", size = "small", - srcs = ["python/learn/tests/io_test.py"], + srcs = ["python/learn/learn_io/io_test.py"], srcs_version = "PY2AND3", deps = [ ":learn", @@ -562,7 +534,7 @@ py_test( py_test( name = "multioutput_test", size = "small", - srcs = ["python/learn/tests/multioutput_test.py"], + srcs = ["python/learn/estimators/multioutput_test.py"], srcs_version = "PY2AND3", deps = [ ":learn", @@ -574,7 +546,7 @@ py_test( py_test( name = "nonlinear_test", size = "medium", - srcs = ["python/learn/tests/nonlinear_test.py"], + srcs = ["python/learn/estimators/nonlinear_test.py"], srcs_version = "PY2AND3", deps = [ ":learn", @@ -586,7 +558,7 @@ py_test( py_test( name = "regression_test", size = "small", - srcs = ["python/learn/tests/regression_test.py"], + srcs = ["python/learn/estimators/regression_test.py"], srcs_version = "PY2AND3", deps = [ ":learn", @@ -598,7 +570,7 @@ py_test( py_test( name = "ops_test", size = "small", - srcs = ["python/learn/ops/tests/ops_test.py"], + srcs = ["python/learn/ops/ops_test.py"], srcs_version = "PY2AND3", deps = [ ":learn", @@ -610,7 +582,7 @@ py_test( py_test( name = "seq2seq_ops_test", size = "small", - srcs = ["python/learn/ops/tests/seq2seq_ops_test.py"], + srcs = ["python/learn/ops/seq2seq_ops_test.py"], srcs_version = "PY2AND3", deps = [ ":learn", @@ -687,7 +659,7 @@ py_test( py_test( name = "stability_test", size = "small", - srcs = ["python/learn/tests/stability_test.py"], + srcs = ["python/learn/estimators/stability_test.py"], srcs_version = "PY2AND3", deps = [ ":learn", diff --git a/tensorflow/contrib/learn/python/learn/datasets/BUILD b/tensorflow/contrib/learn/python/learn/datasets/BUILD index 43a4aa0d0d3..ec1fa815f84 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/BUILD +++ b/tensorflow/contrib/learn/python/learn/datasets/BUILD @@ -43,3 +43,15 @@ filegroup( ), visibility = ["//tensorflow:__subpackages__"], ) + +py_test( + name = "load_csv_test", + size = "small", + srcs = ["load_csv_test.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow:tensorflow_py", + "//tensorflow/contrib/learn", + "//tensorflow/python:framework_test_lib", + ], +) diff --git a/tensorflow/contrib/learn/python/learn/tests/load_csv_test.py b/tensorflow/contrib/learn/python/learn/datasets/load_csv_test.py similarity index 100% rename from tensorflow/contrib/learn/python/learn/tests/load_csv_test.py rename to tensorflow/contrib/learn/python/learn/datasets/load_csv_test.py diff --git a/tensorflow/contrib/learn/python/learn/estimators/__init__.py b/tensorflow/contrib/learn/python/learn/estimators/__init__.py index a46f6ec364b..b2033add2f4 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/__init__.py +++ b/tensorflow/contrib/learn/python/learn/estimators/__init__.py @@ -14,7 +14,6 @@ # ============================================================================== """Estimators.""" - from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -32,6 +31,8 @@ from tensorflow.contrib.learn.python.learn.estimators.estimator import Estimator from tensorflow.contrib.learn.python.learn.estimators.estimator import infer_real_valued_columns_from_input from tensorflow.contrib.learn.python.learn.estimators.estimator import infer_real_valued_columns_from_input_fn from tensorflow.contrib.learn.python.learn.estimators.estimator import ModeKeys +from tensorflow.contrib.learn.python.learn.estimators.head import MetricKey +from tensorflow.contrib.learn.python.learn.estimators.head import PedictionKey from tensorflow.contrib.learn.python.learn.estimators.linear import LinearClassifier from tensorflow.contrib.learn.python.learn.estimators.linear import LinearRegressor from tensorflow.contrib.learn.python.learn.estimators.logistic_regressor import LogisticRegressor diff --git a/tensorflow/contrib/learn/python/learn/tests/base_test.py b/tensorflow/contrib/learn/python/learn/estimators/base_test.py similarity index 88% rename from tensorflow/contrib/learn/python/learn/tests/base_test.py rename to tensorflow/contrib/learn/python/learn/estimators/base_test.py index 7d6e193e7cc..ed486adacde 100644 --- a/tensorflow/contrib/learn/python/learn/tests/base_test.py +++ b/tensorflow/contrib/learn/python/learn/estimators/base_test.py @@ -43,7 +43,7 @@ class BaseTest(tf.test.TestCase): feature_columns = learn.infer_real_valued_columns_from_input(x) regressor = learn.LinearRegressor(feature_columns=feature_columns) regressor.fit(x, y, max_steps=100) - score = mean_squared_error(y, regressor.predict(x)) + score = mean_squared_error(y, np.array(list(regressor.predict(x)))) self.assertLess(score, 1.0, "Failed with score = {0}".format(score)) def testIris(self): @@ -52,7 +52,7 @@ class BaseTest(tf.test.TestCase): feature_columns=learn.infer_real_valued_columns_from_input(iris.data), n_classes=3) classifier.fit(iris.data, [x for x in iris.target], max_steps=100) - score = accuracy_score(iris.target, classifier.predict(iris.data)) + score = accuracy_score(iris.target, list(classifier.predict(iris.data))) self.assertGreater(score, 0.7, "Failed with score = {0}".format(score)) def testIrisAllVariables(self): @@ -82,7 +82,7 @@ class BaseTest(tf.test.TestCase): feature_columns=learn.infer_real_valued_columns_from_input(iris.data), n_classes=3, model_dir=output_dir) classifier.fit(iris.data, iris.target, max_steps=100) - score = accuracy_score(iris.target, classifier.predict(iris.data)) + score = accuracy_score(iris.target, list(classifier.predict(iris.data))) self.assertGreater(score, 0.5, "Failed with score = {0}".format(score)) # TODO(ipolosukhin): Check that summaries are correctly written. @@ -92,9 +92,9 @@ class BaseTest(tf.test.TestCase): feature_columns=learn.infer_real_valued_columns_from_input(iris.data), n_classes=3) classifier.fit(iris.data, iris.target, steps=100) - score1 = accuracy_score(iris.target, classifier.predict(iris.data)) + score1 = accuracy_score(iris.target, list(classifier.predict(iris.data))) classifier.fit(iris.data, iris.target, steps=500) - score2 = accuracy_score(iris.target, classifier.predict(iris.data)) + score2 = accuracy_score(iris.target, list(classifier.predict(iris.data))) self.assertGreater( score2, score1, "Failed with score2 {0} <= score1 {1}".format(score2, score1)) @@ -120,9 +120,10 @@ class BaseTest(tf.test.TestCase): feature_columns=learn.infer_real_valued_columns_from_input(iris.data), n_classes=3) classifier.fit(iris_data(), iris_target(), max_steps=500) - score1 = accuracy_score(iris.target, classifier.predict(iris.data)) + score1 = accuracy_score(iris.target, + list(classifier.predict(iris.data))) score2 = accuracy_score(iris.target, - classifier.predict(iris_predict_data())) + list(classifier.predict(iris_predict_data()))) self.assertGreater(score1, 0.5, "Failed with score = {0}".format(score1)) self.assertEqual(score2, score1, "Scores from {0} iterator doesn't " "match score {1} from full " @@ -137,7 +138,7 @@ class BaseTest(tf.test.TestCase): feature_columns=learn.infer_real_valued_columns_from_input(iris.data), n_classes=3) classifier.fit(iris.data, iris.target, max_steps=250) - score = log_loss(iris.target, classifier.predict_proba(iris.data)) + score = log_loss(iris.target, list(classifier.predict_proba(iris.data))) self.assertLess(score, 0.8, "Failed with score = {0}".format(score)) def testBoston(self): @@ -146,7 +147,8 @@ class BaseTest(tf.test.TestCase): regressor = learn.LinearRegressor( feature_columns=learn.infer_real_valued_columns_from_input(boston.data)) regressor.fit(boston.data, boston.target, max_steps=500) - score = mean_squared_error(boston.target, regressor.predict(boston.data)) + score = mean_squared_error( + boston.target, np.array(list(regressor.predict(boston.data)))) self.assertLess(score, 150, "Failed with score = {0}".format(score)) def testUnfitted(self): diff --git a/tensorflow/contrib/learn/python/learn/estimators/classifier.py b/tensorflow/contrib/learn/python/learn/estimators/classifier.py index ac9ec45f89b..978ab9339b9 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/classifier.py +++ b/tensorflow/contrib/learn/python/learn/estimators/classifier.py @@ -126,7 +126,7 @@ class Classifier(estimator.Estimator): @deprecated_arg_values( estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS, as_iterable=False) - def predict(self, x=None, input_fn=None, batch_size=None, as_iterable=False): + def predict(self, x=None, input_fn=None, batch_size=None, as_iterable=True): """Returns predicted classes for given features. Args: @@ -160,7 +160,7 @@ class Classifier(estimator.Estimator): estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS, as_iterable=False) def predict_proba( - self, x=None, input_fn=None, batch_size=None, as_iterable=False): + self, x=None, input_fn=None, batch_size=None, as_iterable=True): """Returns predicted probabilty distributions for given features. Args: diff --git a/tensorflow/contrib/learn/python/learn/estimators/classifier_test.py b/tensorflow/contrib/learn/python/learn/estimators/classifier_test.py index a2883c39d37..353f25d1c2a 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/classifier_test.py +++ b/tensorflow/contrib/learn/python/learn/estimators/classifier_test.py @@ -32,9 +32,8 @@ from tensorflow.contrib.session_bundle import manifest_pb2 def iris_input_fn(num_epochs=None): iris = tf.contrib.learn.datasets.load_iris() - features = tf.reshape(tf.constant(iris.data), [-1, 4]) - if num_epochs: - features = tf.train.limit_epochs(features, num_epochs=num_epochs) + features = tf.train.limit_epochs( + tf.reshape(tf.constant(iris.data), [-1, 4]), num_epochs=num_epochs) target = tf.reshape(tf.constant(iris.target), [-1]) return features, target @@ -71,42 +70,22 @@ class ClassifierTest(tf.test.TestCase): params={'learning_rate': 0.01}) self._runIrisAll(est) - def testIrisPredictAsIterable(self): - iris = tf.contrib.learn.datasets.load_iris() - est = tf.contrib.learn.Classifier(model_fn=logistic_model_fn, n_classes=3) - est.fit(iris.data, iris.target, steps=100) - scores = est.evaluate(x=iris.data, y=iris.target, name='eval') - predictions = list(est.predict(x=iris.data, as_iterable=True)) - predictions_proba = list(est.predict_proba(x=iris.data, as_iterable=True)) - self.assertEqual(len(predictions), iris.target.shape[0]) - self.assertAllEqual(predictions, np.argmax(predictions_proba, axis=1)) - other_score = _sklearn.accuracy_score(iris.target, predictions) - self.assertAllClose(other_score, scores['accuracy']) - def testIrisInputFn(self): - iris = tf.contrib.learn.datasets.load_iris() - est = tf.contrib.learn.Classifier(model_fn=logistic_model_fn, n_classes=3) - est.fit(input_fn=iris_input_fn, steps=100) - est.evaluate(input_fn=iris_input_fn, steps=1, name='eval') - predictions = est.predict(input_fn=iris_input_fn) - self.assertEqual(predictions.shape[0], iris.target.shape[0]) - - def testIrisPredictInputFnAsIterable(self): iris = tf.contrib.learn.datasets.load_iris() est = tf.contrib.learn.Classifier(model_fn=logistic_model_fn, n_classes=3) est.fit(input_fn=iris_input_fn, steps=100) est.evaluate(input_fn=iris_input_fn, steps=1, name='eval') predict_input_fn = functools.partial(iris_input_fn, num_epochs=1) - predictions = list(est.predict(input_fn=predict_input_fn, as_iterable=True)) + predictions = list(est.predict(input_fn=predict_input_fn)) self.assertEqual(len(predictions), iris.target.shape[0]) def _runIrisAll(self, est): iris = tf.contrib.learn.datasets.load_iris() est.fit(iris.data, iris.target, steps=100) scores = est.evaluate(x=iris.data, y=iris.target, name='eval') - predictions = est.predict(x=iris.data) - predictions_proba = est.predict_proba(x=iris.data) - self.assertEqual(predictions.shape[0], iris.target.shape[0]) + predictions = list(est.predict(x=iris.data)) + predictions_proba = list(est.predict_proba(x=iris.data)) + self.assertEqual(len(predictions), iris.target.shape[0]) self.assertAllEqual(predictions, np.argmax(predictions_proba, axis=1)) other_score = _sklearn.accuracy_score(iris.target, predictions) self.assertAllClose(other_score, scores['accuracy']) diff --git a/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py b/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py index ae8fb6944d8..88cafc655fc 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py +++ b/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py @@ -23,11 +23,11 @@ import tempfile import tensorflow as tf -from tensorflow.contrib import layers from tensorflow.contrib import metrics as metrics_lib from tensorflow.contrib.framework.python.ops import variables as contrib_variables from tensorflow.contrib.learn.python.learn.estimators import composable_model from tensorflow.contrib.learn.python.learn.estimators import estimator +from tensorflow.contrib.learn.python.learn.estimators import head as head_lib from tensorflow.python.framework import ops from tensorflow.python.ops import state_ops @@ -42,10 +42,10 @@ def _iris_input_fn(): class _BaseEstimatorForTest(estimator.BaseEstimator): def __init__(self, - target_column, + head, feature_columns): super(_BaseEstimatorForTest, self).__init__(model_dir=tempfile.mkdtemp()) - self._target_column = target_column + self._head = head self._feature_columns = feature_columns def _get_train_ops(self, features, targets): @@ -54,18 +54,22 @@ class _BaseEstimatorForTest(estimator.BaseEstimator): logits = self._model.build_model( features, self._feature_columns, is_training=True) - loss = self._target_column.loss(logits, targets, features) - train_step = self._model.get_train_step(loss) + model_fn_ops = self._head.head_ops(features, targets, + tf.contrib.learn.ModeKeys.TRAIN, + _noop_training_fn, logits=logits) + train_step = self._model.get_train_step(model_fn_ops.loss) with ops.control_dependencies(train_step): with ops.get_default_graph().colocate_with(global_step): - return state_ops.assign_add(global_step, 1).op, loss + return state_ops.assign_add(global_step, 1).op, model_fn_ops.loss def _get_eval_ops(self, features, targets, metrics=None): logits = self._model.build_model( features, self._feature_columns, is_training=False) - loss = self._target_column.loss(logits, targets, features) - return {'loss': metrics_lib.streaming_mean(loss)} + model_fn_ops = self._head.head_ops(features, targets, + tf.contrib.learn.ModeKeys.TRAIN, + _noop_training_fn, logits=logits) + return {'loss': metrics_lib.streaming_mean(model_fn_ops.loss)} def _get_predict_ops(self, features): raise NotImplementedError @@ -74,32 +78,32 @@ class _BaseEstimatorForTest(estimator.BaseEstimator): class LinearEstimator(_BaseEstimatorForTest): def __init__(self, - target_column, + head, feature_columns): - super(LinearEstimator, self).__init__(target_column, feature_columns) + super(LinearEstimator, self).__init__(head, feature_columns) self._model = composable_model.LinearComposableModel( - num_label_columns=target_column.num_label_columns) + num_label_columns=head.logits_dimension) class JointLinearEstimator(_BaseEstimatorForTest): def __init__(self, - target_column, + head, feature_columns): - super(JointLinearEstimator, self).__init__(target_column, feature_columns) + super(JointLinearEstimator, self).__init__(head, feature_columns) self._model = composable_model.LinearComposableModel( - num_label_columns=target_column.num_label_columns, _joint_weights=True) + num_label_columns=head.logits_dimension, _joint_weights=True) class DNNEstimator(_BaseEstimatorForTest): def __init__(self, - target_column, + head, feature_columns, hidden_units): - super(DNNEstimator, self).__init__(target_column, feature_columns) + super(DNNEstimator, self).__init__(head, feature_columns) self._model = composable_model.DNNComposableModel( - num_label_columns=target_column.num_label_columns, + num_label_columns=head.logits_dimension, hidden_units=hidden_units) @@ -119,8 +123,8 @@ class ComposableModelTest(tf.test.TestCase): language = tf.contrib.layers.sparse_column_with_hash_bucket('language', 100) age = tf.contrib.layers.real_valued_column('age') - target_column = layers.multi_class_target(n_classes=2) - classifier = LinearEstimator(target_column, + head = head_lib._multi_class_head(n_classes=2) + classifier = LinearEstimator(head, feature_columns=[age, language]) classifier.fit(input_fn=input_fn, steps=1000) @@ -144,8 +148,8 @@ class ComposableModelTest(tf.test.TestCase): language = tf.contrib.layers.sparse_column_with_hash_bucket('language', 100) age = tf.contrib.layers.sparse_column_with_hash_bucket('age', 2) - target_column = layers.multi_class_target(n_classes=2) - classifier = JointLinearEstimator(target_column, + head = head_lib._multi_class_head(n_classes=2) + classifier = JointLinearEstimator(head, feature_columns=[age, language]) classifier.fit(input_fn=input_fn, steps=1000) @@ -160,8 +164,8 @@ class ComposableModelTest(tf.test.TestCase): cont_features = [ tf.contrib.layers.real_valued_column('feature', dimension=4)] - target_column = layers.multi_class_target(n_classes=3) - classifier = DNNEstimator(target_column, + head = head_lib._multi_class_head(n_classes=3) + classifier = DNNEstimator(head, feature_columns=cont_features, hidden_units=[3, 3]) @@ -169,5 +173,9 @@ class ComposableModelTest(tf.test.TestCase): classifier.evaluate(input_fn=_iris_input_fn, steps=100) +def _noop_training_fn(unused_loss): + return tf.no_op() + + if __name__ == '__main__': tf.test.main() diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn.py b/tensorflow/contrib/learn/python/learn/estimators/dnn.py index 5e0af3a5d3f..8cc46857615 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/dnn.py +++ b/tensorflow/contrib/learn/python/learn/estimators/dnn.py @@ -267,16 +267,24 @@ def _dnn_classifier_model_fn(features, targets, mode, params): if mode == estimator.ModeKeys.TRAIN: targets = _reshape_targets(targets) - loss = loss_fn(logits, targets, - weight=_get_weight_tensor(features, weight_column_name)) + weight = _get_weight_tensor(features, weight_column_name) + training_loss = loss_fn(logits, targets, weight=weight) + loss = _rescale_eval_loss(training_loss, weight) train_ops = [optimizers.optimize_loss( - loss=loss, global_step=contrib_variables.get_global_step(), - learning_rate=_LEARNING_RATE, optimizer=_get_optimizer(optimizer), - clip_gradients=gradient_clip_norm, name=parent_scope)] + loss=training_loss, + global_step=contrib_variables.get_global_step(), + learning_rate=_LEARNING_RATE, + optimizer=_get_optimizer(optimizer), + clip_gradients=gradient_clip_norm, + name=parent_scope, + # Empty summaries to prevent optimizers from logging the training_loss. + summaries=[])] if enable_centered_bias: train_ops.append(_centered_bias_step(targets, loss_fn, num_label_columns)) + logging_ops.scalar_summary("loss", loss) + return None, loss, control_flow_ops.group(*train_ops) elif mode == estimator.ModeKeys.EVAL: diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py index e61511134f7..1b40681442c 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py +++ b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py @@ -20,6 +20,7 @@ from __future__ import division from __future__ import print_function import numpy as np +import six from tensorflow.contrib import layers from tensorflow.contrib.framework import deprecated @@ -28,15 +29,12 @@ from tensorflow.contrib.framework.python.ops import variables as contrib_variabl from tensorflow.contrib.layers.python.layers import feature_column_ops from tensorflow.contrib.learn.python.learn.estimators import composable_model from tensorflow.contrib.learn.python.learn.estimators import estimator +from tensorflow.contrib.learn.python.learn.estimators import head as head_lib from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import logging_ops from tensorflow.python.ops import nn from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import state_ops -from tensorflow.python.ops import variables from tensorflow.python.platform import tf_logging as logging -from tensorflow.python.training import training def _changing_default_center_bias(): @@ -67,7 +65,7 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator): """ def __init__(self, # _joint_linear_weights pylint: disable=invalid-name - target_column, + head, model_dir=None, linear_feature_columns=None, linear_optimizer=None, @@ -78,13 +76,13 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator): dnn_activation_fn=nn.relu, dnn_dropout=None, gradient_clip_norm=None, - enable_centered_bias=True, config=None, - feature_engineering_fn=None): + feature_engineering_fn=None, + default_prediction_key=None): """Initializes a _DNNLinearCombinedBaseEstimator instance. Args: - target_column: A _TargetColumn object. + head: A _Head object. model_dir: Directory to save model parameters, graph and etc. This can also be used to load checkpoints from the directory into a estimator to continue training a previously saved model. @@ -111,14 +109,12 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator): gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. See tf.clip_by_global_norm for more details. - enable_centered_bias: A bool. If True, estimator will learn a centered - bias variable for each class. Rest of the model structure learns the - residual after centered bias. config: RunConfig object to configure the runtime settings. feature_engineering_fn: Feature engineering function. Takes features and targets which are the output of `input_fn` and returns features and targets which will be fed into the model. + default_prediction_key: Default prediction key to use with metrics. Raises: ValueError: If both linear_feature_columns and dnn_features_columns are @@ -130,14 +126,14 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator): num_ps_replicas = config.num_ps_replicas if config else 0 self._linear_model = composable_model.LinearComposableModel( - num_label_columns=target_column.num_label_columns, + num_label_columns=head.logits_dimension, optimizer=linear_optimizer, _joint_weights=_joint_linear_weights, gradient_clip_norm=gradient_clip_norm, num_ps_replicas=num_ps_replicas) self._dnn_model = composable_model.DNNComposableModel( - num_label_columns=target_column.num_label_columns, + num_label_columns=head.logits_dimension, hidden_units=dnn_hidden_units, optimizer=dnn_optimizer, activation_fn=dnn_activation_fn, @@ -149,9 +145,8 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator): self._linear_optimizer = linear_optimizer self._dnn_feature_columns = dnn_feature_columns self._dnn_hidden_units = dnn_hidden_units - self._centered_bias_weight_collection = "centered_bias" - self._enable_centered_bias = enable_centered_bias - self._target_column = target_column + self._head = head + self._default_prediction_key = default_prediction_key self._feature_engineering_fn = ( feature_engineering_fn or (lambda features, targets: (features, targets))) @@ -194,9 +189,12 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator): return (self._dnn_model.get_bias(model_dir=self._model_dir) + [self.get_variable_value("centered_bias_weight")]) - def _get_target_column(self): - """Returns the target column of this Estimator.""" - return self._target_column + # TODO(zakaria): Remove this function once export. export_estimator is + # obsolete. + def _create_signature_fn(self): + """Returns a function to create export signature of this Estimator.""" + # pylint: disable=protected-access + return self._head._create_signature_fn() def _get_feature_dict(self, features): if isinstance(features, dict): @@ -205,45 +203,60 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator): def _get_train_ops(self, features, targets): """See base class.""" - global_step = contrib_variables.get_global_step() - assert global_step features = self._get_feature_dict(features) features, targets = self._feature_engineering_fn(features, targets) logits = self._logits(features, is_training=True) - if self._enable_centered_bias: - centered_bias_step = [self._centered_bias_step(targets, features)] - else: - centered_bias_step = [] - with ops.control_dependencies(centered_bias_step): - training_loss = self._target_column.training_loss(logits, targets, - features) - weighted_average_loss = self._target_column.loss(logits, targets, - features) - logging_ops.scalar_summary("loss", weighted_average_loss) + def _make_training_op(training_loss): + global_step = contrib_variables.get_global_step() + assert global_step - linear_train_step = self._linear_model.get_train_step(training_loss) - dnn_train_step = (self._dnn_model.get_train_step(training_loss) if - self._dnn_model else []) + linear_train_step = self._linear_model.get_train_step(training_loss) + dnn_train_step = (self._dnn_model.get_train_step(training_loss) if + self._dnn_model else []) + with ops.control_dependencies(linear_train_step + dnn_train_step): + with ops.get_default_graph().colocate_with(global_step): + return state_ops.assign_add(global_step, 1).op - with ops.control_dependencies(linear_train_step + dnn_train_step): - with ops.get_default_graph().colocate_with(global_step): - return state_ops.assign_add(global_step, 1).op, weighted_average_loss + model_fn_ops = self._head.head_ops(features, targets, + estimator.ModeKeys.TRAIN, + _make_training_op, + logits=logits) + return model_fn_ops.training_op, model_fn_ops.loss def _get_eval_ops(self, features, targets, metrics=None): """See base class.""" features = self._get_feature_dict(features) features, targets = self._feature_engineering_fn(features, targets) logits = self._logits(features) - return self._target_column.get_eval_ops(features, logits, targets, metrics) + + model_fn_ops = self._head.head_ops(features, targets, + estimator.ModeKeys.EVAL, None, + logits=logits) + all_metrics = model_fn_ops.default_metrics + if metrics: + for name, metric in six.iteritems(metrics): + if not isinstance(name, tuple): + # TODO(zakaria): remove once deprecation is finished (b/31229024) + all_metrics[(name, self._default_prediction_key)] = metric + else: + all_metrics[name] = metric + # TODO(zakaria): Remove this once we refactor this class to delegate + # to estimator. + # pylint: disable=protected-access + result = estimator._make_metrics_ops(all_metrics, features, targets, + model_fn_ops.predictions) + return result def _get_predict_ops(self, features): """See base class.""" features = self._get_feature_dict(features) features, _ = self._feature_engineering_fn(features, None) logits = self._logits(features) - return self._target_column.logits_to_predictions(logits, proba=True) + model_fn_ops = self._head.head_ops(features, None, estimator.ModeKeys.INFER, + None, logits=logits) + return model_fn_ops.predictions @deprecated( "2016-09-23", @@ -278,32 +291,6 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator): return self._linear_model.build_model( features, self._linear_feature_columns, is_training) - def _centered_bias(self): - centered_bias = variables.Variable( - array_ops.zeros([self._target_column.num_label_columns]), - collections=[self._centered_bias_weight_collection, - ops.GraphKeys.VARIABLES], - name="centered_bias_weight") - logging_ops.scalar_summary( - ["centered_bias_%d" % cb for cb in range( - self._target_column.num_label_columns)], - array_ops.reshape(centered_bias, [-1])) - return centered_bias - - def _centered_bias_step(self, targets, features): - centered_bias = ops.get_collection(self._centered_bias_weight_collection) - batch_size = array_ops.shape(targets)[0] - logits = array_ops.reshape( - array_ops.tile(centered_bias[0], [batch_size]), - [batch_size, self._target_column.num_label_columns]) - with ops.name_scope(None, "centered_bias", (targets, features)): - training_loss = self._target_column.training_loss( - logits, targets, features) - # Learn central bias by an optimizer. 0.1 is a convervative lr for a - # single variable. - return training.AdagradOptimizer(0.1).minimize( - training_loss, var_list=centered_bias) - def _logits(self, features, is_training=False): linear_feature_columns = self._get_linear_feature_columns() dnn_feature_columns = self._get_dnn_feature_columns() @@ -319,10 +306,7 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator): else: logits = self._linear_logits(features, is_training) - if self._enable_centered_bias: - return nn.bias_add(logits, self._centered_bias()) - else: - return logits + return logits class DNNLinearCombinedClassifier(_DNNLinearCombinedBaseEstimator): @@ -448,10 +432,11 @@ class DNNLinearCombinedClassifier(_DNNLinearCombinedBaseEstimator): if enable_centered_bias is None: enable_centered_bias = True _changing_default_center_bias() - - target_column = layers.multi_class_target( + # pylint: disable=protected-access + head = head_lib._multi_class_head( n_classes=n_classes, - weight_column_name=weight_column_name) + weight_column_name=weight_column_name, + enable_centered_bias=enable_centered_bias) super(DNNLinearCombinedClassifier, self).__init__( model_dir=model_dir, linear_feature_columns=linear_feature_columns, @@ -463,15 +448,15 @@ class DNNLinearCombinedClassifier(_DNNLinearCombinedBaseEstimator): dnn_activation_fn=dnn_activation_fn, dnn_dropout=dnn_dropout, gradient_clip_norm=gradient_clip_norm, - enable_centered_bias=enable_centered_bias, - target_column=target_column, + head=head, config=config, - feature_engineering_fn=feature_engineering_fn) + feature_engineering_fn=feature_engineering_fn, + default_prediction_key=head_lib.PedictionKey.CLASSES) @deprecated_arg_values( estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS, as_iterable=False) - def predict(self, x=None, input_fn=None, batch_size=None, as_iterable=False): + def predict(self, x=None, input_fn=None, batch_size=None, as_iterable=True): """Returns predicted classes for given features. Args: @@ -498,7 +483,7 @@ class DNNLinearCombinedClassifier(_DNNLinearCombinedBaseEstimator): estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS, as_iterable=False) def predict_proba( - self, x=None, input_fn=None, batch_size=None, as_iterable=False): + self, x=None, input_fn=None, batch_size=None, as_iterable=True): """Returns prediction probabilities for given features. Args: @@ -517,6 +502,11 @@ class DNNLinearCombinedClassifier(_DNNLinearCombinedBaseEstimator): return super(DNNLinearCombinedClassifier, self).predict( x=x, input_fn=input_fn, batch_size=batch_size, as_iterable=as_iterable) + def _get_predict_ops(self, features): + """See base class.""" + return super(DNNLinearCombinedClassifier, self)._get_predict_ops(features)[ + head_lib.PedictionKey.PROBABILITIES] + class DNNLinearCombinedRegressor(_DNNLinearCombinedBaseEstimator): """A regressor for TensorFlow Linear and DNN joined training models. @@ -642,9 +632,11 @@ class DNNLinearCombinedRegressor(_DNNLinearCombinedBaseEstimator): if enable_centered_bias is None: enable_centered_bias = True _changing_default_center_bias() - target_column = layers.regression_target( + # pylint: disable=protected-access + head = head_lib._regression_head( weight_column_name=weight_column_name, - target_dimension=target_dimension) + target_dimension=target_dimension, + enable_centered_bias=enable_centered_bias) super(DNNLinearCombinedRegressor, self).__init__( model_dir=model_dir, linear_feature_columns=linear_feature_columns, @@ -656,7 +648,14 @@ class DNNLinearCombinedRegressor(_DNNLinearCombinedBaseEstimator): dnn_activation_fn=dnn_activation_fn, dnn_dropout=dnn_dropout, gradient_clip_norm=gradient_clip_norm, - enable_centered_bias=enable_centered_bias, - target_column=target_column, + head=head, config=config, - feature_engineering_fn=feature_engineering_fn) + feature_engineering_fn=feature_engineering_fn, + default_prediction_key=head_lib.PedictionKey.SCORES) + + def _get_predict_ops(self, features): + """See base class.""" + return super(DNNLinearCombinedRegressor, self)._get_predict_ops(features)[ + head_lib.PedictionKey.SCORES] + + diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_test.py b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_test.py index 452973f7528..fa28eb6a3e4 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_test.py +++ b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_test.py @@ -254,7 +254,6 @@ class DNNLinearCombinedClassifierTest(tf.test.TestCase): dnn_feature_columns=[tf.contrib.layers.real_valued_column('x')], dnn_hidden_units=[3, 3], config=tf.contrib.learn.RunConfig(tf_random_seed=1)) - classifier.fit(input_fn=_input_fn_train, steps=100) scores = classifier.evaluate(input_fn=_input_fn_eval, steps=1) # Weighted cross entropy = (-7*log(0.25)-3*log(0.75))/10 = 1.06 @@ -289,7 +288,6 @@ class DNNLinearCombinedClassifierTest(tf.test.TestCase): dnn_feature_columns=[tf.contrib.layers.real_valued_column('x')], dnn_hidden_units=[3, 3], config=tf.contrib.learn.RunConfig(tf_random_seed=1)) - classifier.fit(input_fn=_input_fn_train, steps=100) scores = classifier.evaluate(input_fn=_input_fn_eval, steps=1) # The model should learn (y = x) because of the weights, so the accuracy @@ -371,7 +369,7 @@ class DNNLinearCombinedClassifierTest(tf.test.TestCase): def _input_fn_train(): # Create 4 rows, one of them (y = x), three of them (y=Not(x)) target = tf.constant([[1], [0], [0], [0]]) - features = {'x': tf.ones(shape=[4, 1], dtype=tf.float32),} + features = {'x': tf.ones(shape=[4, 1], dtype=tf.float32)} return features, target def _input_fn_predict(): @@ -387,30 +385,26 @@ class DNNLinearCombinedClassifierTest(tf.test.TestCase): classifier.fit(input_fn=_input_fn_train, steps=100) - probs = classifier.predict_proba(input_fn=_input_fn_predict) + probs = list(classifier.predict_proba(input_fn=_input_fn_predict)) self.assertAllClose([[0.75, 0.25]] * 4, probs, 0.05) - classes = classifier.predict(input_fn=_input_fn_predict) - self.assertListEqual([0] * 4, list(classes)) - - probs = classifier.predict_proba( - input_fn=_input_fn_predict, as_iterable=True) - self.assertAllClose([[0.75, 0.25]] * 4, list(probs), 0.05) - classes = classifier.predict( - input_fn=_input_fn_predict, as_iterable=True) - self.assertListEqual([0] * 4, list(classes)) + classes = list(classifier.predict(input_fn=_input_fn_predict)) + self.assertListEqual([0] * 4, classes) def testCustomMetrics(self): """Tests custom evaluation metrics.""" - def _input_fn_train(): + def _input_fn(num_epochs=None): # Create 4 rows, one of them (y = x), three of them (y=Not(x)) target = tf.constant([[1], [0], [0], [0]]) - features = {'x': tf.ones(shape=[4, 1], dtype=tf.float32),} + features = { + 'x': tf.train.limit_epochs( + tf.ones(shape=[4, 1], dtype=tf.float32), num_epochs=num_epochs)} return features, target def _my_metric_op(predictions, targets): # For the case of binary classification, the 2nd column of "predictions" # denotes the model predictions. + targets = tf.to_float(targets) predictions = tf.slice(predictions, [0, 1], [-1, 1]) return tf.reduce_sum(tf.mul(predictions, targets)) @@ -419,9 +413,9 @@ class DNNLinearCombinedClassifierTest(tf.test.TestCase): dnn_feature_columns=[tf.contrib.layers.real_valued_column('x')], dnn_hidden_units=[3, 3]) - classifier.fit(input_fn=_input_fn_train, steps=100) + classifier.fit(input_fn=_input_fn, steps=100) scores = classifier.evaluate( - input_fn=_input_fn_train, + input_fn=_input_fn, steps=100, metrics={ 'my_accuracy': tf.contrib.metrics.streaming_accuracy, @@ -431,22 +425,24 @@ class DNNLinearCombinedClassifierTest(tf.test.TestCase): self.assertTrue( set(['loss', 'my_accuracy', 'my_precision', 'my_metric' ]).issubset(set(scores.keys()))) - predictions = classifier.predict(input_fn=_input_fn_train) + predict_input_fn = functools.partial(_input_fn, num_epochs=1) + predictions = np.array( + list(classifier.predict(input_fn=predict_input_fn))) self.assertEqual(_sklearn.accuracy_score([1, 0, 0, 0], predictions), scores['my_accuracy']) # Test the case where the 2nd element of the key is neither "classes" nor # "probabilities". - with self.assertRaises(ValueError): + with self.assertRaises(KeyError): classifier.evaluate( - input_fn=_input_fn_train, + input_fn=_input_fn, steps=100, metrics={('bad_name', 'bad_type'): tf.contrib.metrics.streaming_auc}) # Test the case where the tuple of the key doesn't have 2 elements. with self.assertRaises(ValueError): classifier.evaluate( - input_fn=_input_fn_train, + input_fn=_input_fn, steps=100, metrics={ ('bad_length_name', 'classes', 'bad_length'): @@ -536,7 +532,6 @@ class DNNLinearCombinedClassifierTest(tf.test.TestCase): self.assertNotIn('dnn/logits/weights', classifier.get_variable_names()) self.assertEquals(1, len(classifier.linear_bias_)) self.assertEquals(2, len(classifier.linear_weights_)) - print(classifier.linear_weights_) self.assertEquals(1, len(classifier.linear_weights_['linear/age/weight'])) self.assertEquals( 100, len(classifier.linear_weights_['linear/language/weights'])) @@ -810,10 +805,11 @@ class DNNLinearCombinedRegressorTest(tf.test.TestCase): def testCustomMetrics(self): """Tests custom evaluation metrics.""" - def _input_fn_train(): + def _input_fn(num_epochs=None): # Create 4 rows, one of them (y = x), three of them (y=Not(x)) target = tf.constant([[1.], [0.], [0.], [0.]]) - features = {'x': tf.ones(shape=[4, 1], dtype=tf.float32),} + features = {'x': tf.train.limit_epochs( + tf.ones(shape=[4, 1], dtype=tf.float32), num_epochs=num_epochs)} return features, target def _my_metric_op(predictions, targets): @@ -825,9 +821,9 @@ class DNNLinearCombinedRegressorTest(tf.test.TestCase): dnn_hidden_units=[3, 3], config=tf.contrib.learn.RunConfig(tf_random_seed=1)) - regressor.fit(input_fn=_input_fn_train, steps=100) + regressor.fit(input_fn=_input_fn, steps=100) scores = regressor.evaluate( - input_fn=_input_fn_train, + input_fn=_input_fn, steps=1, metrics={ 'my_error': tf.contrib.metrics.streaming_mean_squared_error, @@ -836,25 +832,27 @@ class DNNLinearCombinedRegressorTest(tf.test.TestCase): self.assertIn('loss', set(scores.keys())) self.assertIn('my_error', set(scores.keys())) self.assertIn('my_metric', set(scores.keys())) - predictions = regressor.predict(input_fn=_input_fn_train) + predict_input_fn = functools.partial(_input_fn, num_epochs=1) + predictions = np.array(list(regressor.predict(input_fn=predict_input_fn))) self.assertAlmostEqual( _sklearn.mean_squared_error(np.array([1, 0, 0, 0]), predictions), scores['my_error']) # Tests that when the key is a tuple, an error is raised. - with self.assertRaises(TypeError): + with self.assertRaises(KeyError): regressor.evaluate( - input_fn=_input_fn_train, + input_fn=_input_fn, steps=1, metrics={('my_error', 'predictions' ): tf.contrib.metrics.streaming_mean_squared_error}) def testTrainSaveLoad(self): """Tests regression with restarting training / evaluate.""" - def _input_fn(): + def _input_fn(num_epochs=None): # Create 4 rows of (y = x) target = tf.constant([[100.], [3.], [2.], [2.]]) - features = {'x': tf.constant([[100.], [3.], [2.], [2.]])} + features = {'x': tf.train.limit_epochs( + tf.constant([[100.], [3.], [2.], [2.]]), num_epochs=num_epochs)} return features, target model_dir = tempfile.mkdtemp() @@ -866,13 +864,14 @@ class DNNLinearCombinedRegressorTest(tf.test.TestCase): model_dir=model_dir, config=tf.contrib.learn.RunConfig(tf_random_seed=1)) + predict_input_fn = functools.partial(_input_fn, num_epochs=1) classifier = new_estimator() classifier.fit(input_fn=_input_fn, steps=100) - predictions = classifier.predict(input_fn=_input_fn) + predictions = list(classifier.predict(input_fn=predict_input_fn)) del classifier classifier = new_estimator() - predictions2 = classifier.predict(input_fn=_input_fn) + predictions2 = list(classifier.predict(input_fn=predict_input_fn)) self.assertAllClose(predictions, predictions2) def testTrainWithPartitionedVariables(self): diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn_sampled_softmax_classifier.py b/tensorflow/contrib/learn/python/learn/estimators/dnn_sampled_softmax_classifier.py deleted file mode 100644 index e668e71db88..00000000000 --- a/tensorflow/contrib/learn/python/learn/estimators/dnn_sampled_softmax_classifier.py +++ /dev/null @@ -1,568 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================== - -"""Deep Neural Network estimator for large multi-class multi-label problems. - -The Training is sped up using Candidate Sampling. Evaluation and Inference -uses full softmax. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -import tempfile - -from tensorflow.contrib import framework as contrib_framework -from tensorflow.contrib import layers -from tensorflow.contrib.framework.python.ops import variables -from tensorflow.contrib.layers.python.layers import initializers -from tensorflow.contrib.layers.python.layers import optimizers -from tensorflow.contrib.learn.python.learn import evaluable -from tensorflow.contrib.learn.python.learn import metric_spec -from tensorflow.contrib.learn.python.learn import trainable -from tensorflow.contrib.learn.python.learn.estimators import estimator -from tensorflow.contrib.metrics.python.ops import metric_ops -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import init_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import nn -from tensorflow.python.ops import partitioned_variables -from tensorflow.python.ops import standard_ops -from tensorflow.python.ops import variable_scope -from tensorflow.python.training import training as train - - -_CLASSES = "classes" -_TOP_K = "top_k" -_PROBABILITIES = "probabilities" -_DEFAULT_LEARNING_RATE = 0.01 - - -def _as_iterable(preds, output): - for pred in preds: - yield pred[output] - - -def _get_optimizer(optimizer): - if callable(optimizer): - return optimizer() - else: - return optimizer - - -def _get_default_optimizer(): - """Default optimizer for DNN models.""" - return train.AdagradOptimizer(_DEFAULT_LEARNING_RATE) - - -def _get_feature_dict(features): - if isinstance(features, dict): - return features - return {"": features} - - -def _dnn_sampled_softmax_classifier_model_fn(features, targets, mode, params): - """model_fn that uses candidate sampling. - - Args: - features: Single Tensor or dict of Tensor (depends on data passed to `fit`) - targets: A single Tensor of shape [batch_size, n_labels] containing - the target indices. - mode: Represents if this training, evaluation or prediction. See `ModeKeys`. - params: A dict of hyperparameters that are listed below. - hidden_units- List of hidden units per layer. All layers are fully - connected. Ex. `[64, 32]` means first layer has 64 nodes and second one - has 32. - feature_columns- An iterable containing all the feature columns used by - the model. All items in the set should be instances of classes derived - from `FeatureColumn`. - n_classes- number of target classes. It must be greater than 2. - n_samples- number of sample target classes. Needs to be tuned - A good - starting point could be 2% of n_classes. - n_labels- number of labels in each example. - top_k- The number of classes to predict. - optimizer- An instance of `tf.Optimizer` used to train the model. If - `None`, will use an Adagrad optimizer. - dropout- When not `None`, the probability we will drop out a given - coordinate. - gradient_clip_norm- A float > 0. If provided, gradients are - clipped to their global norm with this clipping ratio. See - tf.clip_by_global_norm for more details. - num_ps_replicas- The number of parameter server replicas. - - Returns: - predictions: A single Tensor or a dict of Tensors. - loss: A scalar containing the loss of the step. - train_op: The op for training. - """ - - hidden_units = params["hidden_units"] - feature_columns = params["feature_columns"] - n_classes = params["n_classes"] - n_samples = params["n_samples"] - n_labels = params["n_labels"] - top_k = params["top_k"] - optimizer = params["optimizer"] - dropout = params["dropout"] - gradient_clip_norm = params["gradient_clip_norm"] - num_ps_replicas = params["num_ps_replicas"] - - parent_scope = "dnn_ss" - - features = _get_feature_dict(features) - targets = _reshape_targets(targets) - - # Setup the input layer partitioner. - input_layer_partitioner = ( - partitioned_variables.min_max_variable_partitioner( - max_partitions=num_ps_replicas, - min_slice_size=64 << 20)) - - # Create the input layer. - with variable_scope.variable_scope( - parent_scope + "/input_from_feature_columns", - features.values(), - partitioner=input_layer_partitioner) as scope: - net = layers.input_from_feature_columns( - features, - feature_columns, - weight_collections=[parent_scope], - scope=scope) - - # Setup the hidden layer partitioner. - hidden_layer_partitioner = ( - partitioned_variables.min_max_variable_partitioner( - max_partitions=num_ps_replicas)) - - final_hidden_layer_dim = None - # Create hidden layers using fully_connected. - for layer_id, num_hidden_units in enumerate(hidden_units): - with variable_scope.variable_scope( - parent_scope + "/hiddenlayer_%d" % layer_id, [net], - partitioner=hidden_layer_partitioner) as scope: - net = layers.fully_connected(net, - num_hidden_units, - variables_collections=[parent_scope], - scope=scope) - final_hidden_layer_dim = num_hidden_units - # Add dropout if it is enabled. - if dropout is not None and mode == estimator.ModeKeys.TRAIN: - net = layers.dropout(net, keep_prob=(1.0 - dropout)) - - # Create the weights and biases for the logit layer. - with variable_scope.variable_scope( - parent_scope + "/logits", [net], - partitioner=hidden_layer_partitioner) as scope: - dtype = net.dtype.base_dtype - weights_shape = [n_classes, final_hidden_layer_dim] - weights = variables.model_variable( - "weights", - shape=weights_shape, - dtype=dtype, - initializer=initializers.xavier_initializer(), - trainable=True, - collections=[parent_scope]) - biases = variables.model_variable( - "biases", - shape=[n_classes,], - dtype=dtype, - initializer=init_ops.zeros_initializer, - trainable=True, - collections=[parent_scope]) - - if mode == estimator.ModeKeys.TRAIN: - # Call the candidate sampling APIs and calculate the loss. - sampled_values = nn.learned_unigram_candidate_sampler( - true_classes=math_ops.to_int64(targets), - num_true=n_labels, - num_sampled=n_samples, - unique=True, - range_max=n_classes) - - sampled_softmax_loss = nn.sampled_softmax_loss( - weights=weights, - biases=biases, - inputs=net, - labels=math_ops.to_int64(targets), - num_sampled=n_samples, - num_classes=n_classes, - num_true=n_labels, - sampled_values=sampled_values) - - loss = math_ops.reduce_mean(sampled_softmax_loss, name="loss") - - train_op = optimizers.optimize_loss( - loss=loss, global_step=contrib_framework.get_global_step(), - learning_rate=_DEFAULT_LEARNING_RATE, - optimizer=_get_optimizer(optimizer), clip_gradients=gradient_clip_norm, - name=parent_scope) - return None, loss, train_op - - elif mode == estimator.ModeKeys.EVAL: - logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)), - biases) - predictions = {} - predictions[_PROBABILITIES] = nn.softmax(logits) - predictions[_CLASSES] = math_ops.argmax(logits, 1) - _, predictions[_TOP_K] = nn.top_k(logits, top_k) - - # Since the targets have multiple labels, setup the target probabilities - # as 1.0/n_labels for each of the labels. - target_one_hot = array_ops.one_hot( - indices=targets, depth=n_classes, on_value=1.0 / n_labels) - target_one_hot = math_ops.reduce_sum( - input_tensor=target_one_hot, - reduction_indices=[1]) - - loss = math_ops.reduce_mean( - nn.softmax_cross_entropy_with_logits(logits, target_one_hot)) - - return predictions, loss, None - - elif mode == estimator.ModeKeys.INFER: - logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)), - biases) - predictions = {} - predictions[_PROBABILITIES] = nn.softmax(logits) - predictions[_CLASSES] = math_ops.argmax(logits, 1) - _, predictions[_TOP_K] = nn.top_k(logits, top_k) - - return predictions, None, None - - -def _reshape_targets(targets): - if targets is None: - return None - check_shape_op = control_flow_ops.Assert( - math_ops.less_equal(array_ops.rank(targets), 2), - ["target's should be either [batch_size, n_labels] or [batch_size]"]) - with ops.control_dependencies([check_shape_op]): - targets = array_ops.reshape( - targets, shape=[array_ops.shape(targets)[0], -1]) - return targets - - -def _top_k_fn_wrapper(metric_fn, k): - - def wrap_func(predictions, labels): - return metric_fn(predictions, _reshape_targets(labels), k=k) - - wrap_func.__name__ = metric_fn.__name__ - return wrap_func - - -class _DNNSampledSoftmaxClassifier(trainable.Trainable, evaluable.Evaluable): - """A classifier for TensorFlow DNN models. - - Example: - - ```python - legos = sparse_column_with_hash_bucket(column_name="legos", - hash_bucket_size=1000) - watched_videos = sparse_column_with_hash_bucket( - column_name="watched_videos", - hash_bucket_size=20000) - - legos_emb = embedding_column(sparse_id_column=legos, dimension=16, - combiner="sum") - watched_videos_emb = embedding_column(sparse_id_column=watched_videos, - dimension=256, - combiner="sum") - - estimator = DNNSampledSoftmaxClassifier( - n_classes=500000, n_samples=10000, n_labels=5, - feature_columns=[legos_emb, watched_videos_emb], - hidden_units=[1024, 512, 256]) - - # Or estimator using the Adam optimizer with dropout. - estimator = DNNSampledSoftmaxClassifier( - feature_columns=[education_emb, occupation_emb], - hidden_units=[1024, 512, 256], - optimizer=tf.train.ProximalAdagradOptimizer( - learning_rate=0.1), - dropout=0.1) - - # Input builders - def input_fn_train: # returns x, Y - pass - estimator.fit(input_fn=input_fn_train) - - def input_fn_eval: # returns x, Y - pass - estimator.evaluate(input_fn=input_fn_eval) - estimator.predict(x=x) - ``` - - Input of `fit` and `evaluate` should have following features, - otherwise there will be a `KeyError`: - - * for each `column` in `feature_columns`: - - if `column` is a `SparseColumn`, a feature with `key=column.name` - whose `value` is a `SparseTensor`. - - if `column` is a `EmbeddingColumn`, a feature with `key=column.name` - whose `value` is a `SparseTensor`. - - if `column` is a `WeightedSparseColumn`, two features: the first with - `key` the id column name, the second with `key` the weight column name. - Both features' `value` must be a `SparseTensor`. - - if `column` is a `RealValuedColumn`, a feature with `key=column.name` - whose `value` is a `Tensor`. - """ - - def __init__(self, - hidden_units, - feature_columns, - n_classes, - n_samples, - n_labels=1, - top_k=1, - model_dir=None, - optimizer=None, - dropout=None, - gradient_clip_norm=None, - config=None, - feature_engineering_fn=None): - """Initializes a DNNSampledSoftmaxClassifier instance. - - Args: - hidden_units: List of hidden units per layer. All layers are fully - connected. Ex. `[64, 32]` means first layer has 64 nodes and second one - has 32. - feature_columns: An iterable containing all the feature columns used by - the model. All items in the set should be instances of classes derived - from `FeatureColumn`. - n_classes: number of target classes. It must be greater than 2. - n_samples: number of sample target classes. Needs to be tuned - A good - starting point could be 2% of n_classes. - n_labels: number of labels in each example. - top_k: The number of classes to predict. - model_dir: Directory to save model parameters, graph and etc. This can - also be used to load checkpoints from the directory into a estimator to - continue training a previously saved model. - optimizer: An instance of `tf.Optimizer` used to train the model. If - `None`, will use an Adagrad optimizer. - dropout: When not `None`, the probability we will drop out a given - coordinate. - gradient_clip_norm: A float > 0. If provided, gradients are - clipped to their global norm with this clipping ratio. See - tf.clip_by_global_norm for more details. - config: `RunConfig` object to configure the runtime settings. - feature_engineering_fn: Feature engineering function. Takes features and - targets which are the output of `input_fn` and - returns features and targets which will be fed - into the model. - - Returns: - A `DNNSampledSoftmaxClassifier` estimator. - - Raises: - ValueError: If n_classes <= 2. - ValueError: If n_classes < n_samples. - ValueError: If n_classes < n_labels. - """ - # Validate all the inputs. - if n_classes <= 2: - raise ValueError("n_classes should be greater than 2. For n_classes <= 2," - " use DNNClassifier.") - if n_classes < n_samples: - raise ValueError("n_classes (%d) should be greater than n_samples (%d)." % - (n_classes, n_samples)) - if n_classes < n_labels: - raise ValueError("n_classes (%d) should be greater than n_labels" - " (%d)." % (n_classes, n_labels)) - - self._top_k = top_k - self._feature_columns = feature_columns - assert self._feature_columns - self._model_dir = model_dir or tempfile.mkdtemp() - - # Build the estimator with _dnn_sampled_softmax_classifier_model_fn. - self._estimator = estimator.Estimator( - model_fn=_dnn_sampled_softmax_classifier_model_fn, - model_dir=self._model_dir, - config=config, - params={ - "hidden_units": hidden_units, - "feature_columns": feature_columns, - "n_classes": n_classes, - "n_samples": n_samples, - "n_labels": n_labels, - "top_k": top_k, - "optimizer": optimizer or _get_default_optimizer(), - "dropout": dropout, - "gradient_clip_norm": gradient_clip_norm, - "num_ps_replicas": config.num_ps_replicas if config else 0 - }, - feature_engineering_fn=feature_engineering_fn) - - def get_estimator(self): - return self._estimator - - def fit(self, x=None, y=None, input_fn=None, steps=None, batch_size=None, - monitors=None, max_steps=None): - """See trainable.Trainable.""" - return self._estimator.fit(x=x, y=y, input_fn=input_fn, steps=steps, - batch_size=batch_size, monitors=monitors, - max_steps=max_steps) - - def evaluate(self, x=None, y=None, input_fn=None, feed_fn=None, - batch_size=None, steps=None, metrics=None, name=None, - range_k=None): - # pylint: disable=g-doc-args,g-doc-return-or-yield - """See evaluable.Evaluable for a description of the Args. - - Calculates the following metrics by default: - loss - average_precision@top_k: see - https://en.wikipedia.org/wiki/Information_retrieval#Average_precision - for k in range_k: - precision@k and recall@k - - range_k: A list of numbers where precision and recall have to be obtained. - For eg. range_k=[1,5] will calculate precision@1, precision@5, - recall@1 and recall@5. If None, defaults to [1, top_k]. - """ - if not metrics: - metrics = {} - metrics.update({ - "average_precision_at_%d" % self._top_k: metric_spec.MetricSpec( - metric_fn=_top_k_fn_wrapper( - metric_ops.streaming_sparse_average_precision_at_k, - k=self._top_k), - prediction_key=_PROBABILITIES) - }) - if range_k is None: - if self._top_k > 1: - range_k = [1, self._top_k] - else: - range_k = [1] - for k in range_k: - metrics.update({ - "precision_at_%d" % k: metric_spec.MetricSpec( - metric_fn=_top_k_fn_wrapper( - metric_ops.streaming_sparse_precision_at_k, k=k), - prediction_key=_PROBABILITIES,) - }) - metrics.update({ - "recall_at_%d" % k: metric_spec.MetricSpec( - metric_fn=_top_k_fn_wrapper( - metric_ops.streaming_sparse_recall_at_k, k=k), - prediction_key=_PROBABILITIES,) - }) - - return self._estimator.evaluate(x=x, y=y, input_fn=input_fn, - feed_fn=feed_fn, batch_size=batch_size, - steps=steps, metrics=metrics, name=name) - - def predict(self, x=None, input_fn=None, batch_size=None, as_iterable=False, - get_top_k=False): - """Returns predicted classes for given features. - - Args: - x: features. - input_fn: Input function. If set, x must be None. - batch_size: Override default batch size. - as_iterable: If True, return an iterable which keeps yielding predictions - for each example until inputs are exhausted. Note: The inputs must - terminate if you want the iterable to terminate (e.g. be sure to pass - num_epochs=1 if you are using something like read_batch_features). - get_top_k : if set to true returns the top k classes otherwise returns - the top class. - - Returns: - Numpy array of predicted classes (or an iterable of predicted classes if - as_iterable is True). - """ - if get_top_k: - key = _TOP_K - else: - key = _CLASSES - preds = self._estimator.predict(x=x, input_fn=input_fn, - batch_size=batch_size, outputs=[key], - as_iterable=as_iterable) - if as_iterable: - return _as_iterable(preds, output=key) - return preds[key] - - def predict_proba(self, x=None, input_fn=None, batch_size=None, - as_iterable=False): - """Returns prediction probabilities for given features. - - Args: - x: features. - input_fn: Input function. If set, x and y must be None. - batch_size: Override default batch size. - as_iterable: If True, return an iterable which keeps yielding predictions - for each example until inputs are exhausted. Note: The inputs must - terminate if you want the iterable to terminate (e.g. be sure to pass - num_epochs=1 if you are using something like read_batch_features). - - Returns: - Numpy array of predicted probabilities (or an iterable of predicted - probabilities if as_iterable is True). - """ - preds = self._estimator.predict(x=x, input_fn=input_fn, - batch_size=batch_size, - outputs=[_PROBABILITIES], - as_iterable=as_iterable) - if as_iterable: - return _as_iterable(preds, output=_PROBABILITIES) - return preds[_PROBABILITIES] - - def export(self, export_dir, signature_fn=None, - input_fn=None, default_batch_size=1, - exports_to_keep=None): - """Exports inference graph into given dir. - - Args: - export_dir: A string containing a directory to write the exported graph - and checkpoints. - signature_fn: Function that returns a default signature and a named - signature map, given `Tensor` of `Example` strings, `dict` of `Tensor`s - for features and `Tensor` or `dict` of `Tensor`s for predictions. - input_fn: If `use_deprecated_input_fn` is true, then a function that given - `Tensor` of `Example` strings, parses it into features that are then - passed to the model. Otherwise, a function that takes no argument and - returns a tuple of (features, targets), where features is a dict of - string key to `Tensor` and targets is a `Tensor` that's currently not - used (and so can be `None`). - default_batch_size: Default batch size of the `Example` placeholder. - exports_to_keep: Number of exports to keep. - - Returns: - The string path to the exported directory. NB: this functionality was - added ca. 2016/09/25; clients that depend on the return value may need - to handle the case where this function returns None because subclasses - are not returning a value. - """ - def default_input_fn(unused_estimator, examples): - return layers.parse_feature_columns_from_examples( - examples, self._feature_columns) - return self._estimator.export(export_dir=export_dir, - signature_fn=signature_fn, - input_fn=input_fn or default_input_fn, - default_batch_size=default_batch_size, - exports_to_keep=exports_to_keep) - - def get_variable_names(self): - return self._estimator.get_variable_names() - - @property - def model_dir(self): - return self._model_dir diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn_sampled_softmax_classifier_test.py b/tensorflow/contrib/learn/python/learn/estimators/dnn_sampled_softmax_classifier_test.py deleted file mode 100644 index 3a58479aff8..00000000000 --- a/tensorflow/contrib/learn/python/learn/estimators/dnn_sampled_softmax_classifier_test.py +++ /dev/null @@ -1,459 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# ============================================================================== -"""Tests for DNNSampledSoftmaxClassifier estimator.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import functools -import tempfile - -import numpy as np -import tensorflow as tf - -from tensorflow.contrib.learn.python.learn.estimators import dnn_sampled_softmax_classifier -from tensorflow.python.ops import math_ops - - -class DNNSampledSoftmaxClassifierTest(tf.test.TestCase): - - def testMultiClass(self): - """Tests the following. - - 1. Tests fit() and evaluate() calls. - 2. Tests the use of a non default optimizer. - 3. Tests the output of get_variable_names(). - Note that the training output is not verified because it is flaky with the - Iris dataset. - """ - def _iris_input_fn(): - iris = tf.contrib.learn.datasets.load_iris() - return { - 'feature': tf.constant(iris.data, dtype=tf.float32) - }, tf.constant(iris.target, shape=[150, 1], dtype=tf.int64) - - cont_features = [ - tf.contrib.layers.real_valued_column('feature', dimension=4)] - - classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier( - n_classes=3, - n_samples=1, - n_labels=1, - feature_columns=cont_features, - hidden_units=[3, 3]) - - classifier.fit(input_fn=_iris_input_fn, steps=5) - classifier.evaluate(input_fn=_iris_input_fn, steps=1) - var_names = classifier.get_variable_names() - self.assertGreater(len(var_names), 6) - - def testNonDictFeatures(self): - """Tests non-dictionary features runs without error.""" - - def _iris_input_fn(): - iris = tf.contrib.learn.datasets.load_iris() - return (tf.constant( - iris.data, dtype=tf.float32), tf.constant( - iris.target, shape=[150, 1], dtype=tf.int64)) - - cont_features = [tf.contrib.layers.real_valued_column('', dimension=4)] - - classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier( - n_classes=3, - n_samples=1, - n_labels=1, - feature_columns=cont_features, - hidden_units=[3, 3]) - - classifier.fit(input_fn=_iris_input_fn, steps=5) - classifier.evaluate(input_fn=_iris_input_fn, steps=1) - - def testOneDimensionTargets(self): - """Tests one dimensional targets runs without error.""" - - def _input_fn(): - return { - 'feature': tf.constant( - [1, 1, 1], dtype=tf.float32) - }, tf.constant( - [3, 5, 7], dtype=tf.int64) - - cont_features = [ - tf.contrib.layers.real_valued_column( - 'feature', dimension=1) - ] - - classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier( - n_classes=10, - n_samples=1, - n_labels=1, - feature_columns=cont_features, - hidden_units=[3, 3]) - - classifier.fit(input_fn=_input_fn, steps=5) - classifier.evaluate(input_fn=_input_fn, steps=1) - - def testWrongDimensionTargets(self): - """Tests one dimensional targets runs without error.""" - - def _input_fn(): - return { - 'feature': tf.constant( - [1, 1, 1], dtype=tf.float32) - }, tf.constant( - [[[3, 5, 7]]], dtype=tf.int64) - - cont_features = [ - tf.contrib.layers.real_valued_column( - 'feature', dimension=1) - ] - - classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier( - n_classes=10, - n_samples=1, - n_labels=1, - feature_columns=cont_features, - hidden_units=[3, 3]) - - with self.assertRaisesRegexp(tf.errors.InvalidArgumentError, 'target'): - classifier.fit(input_fn=_input_fn, steps=5) - - def testTrainWithPartitionedVariables(self): - """Tests the following. - - 1. Tests training with partitioned variables. - 2. Test that the model actually trains. - 3. Tests the output of evaluate() and predict(). - """ - def _input_fn(): - features = { - 'language': tf.SparseTensor(values=['en', 'fr', 'zh'], - indices=[[0, 0], [0, 1], [2, 0]], - shape=[3, 2]) - } - target = tf.constant([[1], [0], [0]], dtype=tf.int64) - return features, target - - # The given hash_bucket_size results in variables larger than the - # default min_slice_size attribute, so the variables are partitioned. - sparse_column = tf.contrib.layers.sparse_column_with_hash_bucket( - 'language', hash_bucket_size=2e7) - embedding_features = [ - tf.contrib.layers.embedding_column(sparse_column, dimension=1) - ] - - classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier( - n_classes=3, - n_samples=2, - n_labels=1, - feature_columns=embedding_features, - hidden_units=[4, 4], - # Because we did not start a distributed cluster, we need to pass an - # empty ClusterSpec, otherwise the device_setter will look for - # distributed jobs, such as "/job:ps" which are not present. - config=tf.contrib.learn.RunConfig( - num_ps_replicas=2, cluster_spec=tf.train.ClusterSpec({}), - tf_random_seed=5)) - - # Test that the model actually trains. - classifier.fit(input_fn=_input_fn, steps=50) - evaluate_output = classifier.evaluate(input_fn=_input_fn, steps=1) - self.assertGreater(evaluate_output['precision_at_1'], 0.6) - self.assertGreater(evaluate_output['recall_at_1'], 0.6) - - # Test the output of predict() - predict_output = classifier.predict(input_fn=_input_fn) - self.assertListEqual([3], list(predict_output.shape)) - # TODO(dnivara): Setup this test such that it is not flaky and predict() and - # evaluate() outputs can be tested. - - def testTrainSaveLoad(self): - """Tests that ensure that you can save and reload a trained model.""" - def _input_fn(): - features = { - 'language': tf.SparseTensor(values=['en', 'fr', 'zh'], - indices=[[0, 0], [0, 1], [2, 0]], - shape=[3, 2]) - } - target = tf.constant([[1], [0], [0]], dtype=tf.int64) - return features, target - - sparse_column = tf.contrib.layers.sparse_column_with_hash_bucket( - 'language', hash_bucket_size=10) - embedding_features = [ - tf.contrib.layers.embedding_column(sparse_column, dimension=1) - ] - - model_dir = tempfile.mkdtemp() - classifier1 = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier( - model_dir=model_dir, - n_classes=3, - n_samples=2, - n_labels=1, - feature_columns=embedding_features, - hidden_units=[4, 4]) - - classifier1.fit(input_fn=_input_fn, steps=1) - predict_output1 = classifier1.predict(input_fn=_input_fn) - del classifier1 - - classifier2 = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier( - model_dir=model_dir, - n_classes=3, - n_samples=2, - n_labels=1, - feature_columns=embedding_features, - hidden_units=[4, 4]) - - predict_output2 = classifier2.predict(input_fn=_input_fn) - self.assertEqual(list(predict_output1), list(predict_output2)) - - def testCustomOptimizerByObject(self): - """Tests the use of custom optimizer.""" - def _input_fn(): - features = { - 'language': tf.SparseTensor(values=['en', 'fr', 'zh'], - indices=[[0, 0], [0, 1], [2, 0]], - shape=[3, 2]) - } - target = tf.constant([[1], [0], [0]], dtype=tf.int64) - return features, target - - sparse_column = tf.contrib.layers.sparse_column_with_hash_bucket( - 'language', hash_bucket_size=20) - embedding_features = [ - tf.contrib.layers.embedding_column(sparse_column, dimension=1) - ] - - classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier( - n_classes=3, - n_samples=2, - n_labels=1, - feature_columns=embedding_features, - hidden_units=[4, 4], - optimizer=tf.train.AdamOptimizer(learning_rate=0.01), - config=tf.contrib.learn.RunConfig(tf_random_seed=5)) - - # Test that the model actually trains. - classifier.fit(input_fn=_input_fn, steps=50) - evaluate_output = classifier.evaluate(input_fn=_input_fn, steps=1) - self.assertGreater(evaluate_output['precision_at_1'], 0.9) - self.assertGreater(evaluate_output['recall_at_1'], 0.9) - - # Test the output of predict() - predict_output = classifier.predict(input_fn=_input_fn) - self.assertListEqual([1, 0, 0], list(predict_output)) - - def testCustomOptimizerByFunction(self): - """Tests the use of custom optimizer.""" - def _input_fn(): - features = { - 'language': tf.SparseTensor(values=['en', 'fr', 'zh'], - indices=[[0, 0], [0, 1], [2, 0]], - shape=[3, 2]) - } - target = tf.constant([[1], [0], [0]], dtype=tf.int64) - return features, target - def _optimizer_exp_decay(): - global_step = tf.contrib.framework.get_global_step() - learning_rate = tf.train.exponential_decay(learning_rate=0.01, - global_step=global_step, - decay_steps=100, - decay_rate=0.001) - return tf.train.AdagradOptimizer(learning_rate=learning_rate) - - sparse_column = tf.contrib.layers.sparse_column_with_hash_bucket( - 'language', hash_bucket_size=20) - embedding_features = [ - tf.contrib.layers.embedding_column(sparse_column, dimension=1) - ] - - classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier( - n_classes=3, - n_samples=2, - n_labels=1, - feature_columns=embedding_features, - hidden_units=[4, 4], - optimizer=_optimizer_exp_decay, - config=tf.contrib.learn.RunConfig(tf_random_seed=5)) - - # Test that the model actually trains. - classifier.fit(input_fn=_input_fn, steps=50) - evaluate_output = classifier.evaluate(input_fn=_input_fn, steps=1) - self.assertGreater(evaluate_output['precision_at_1'], 0.6) - self.assertGreater(evaluate_output['recall_at_1'], 0.6) - - def testExport(self): - """Tests that export model for servo works.""" - def _input_fn(): - features = { - 'language': tf.SparseTensor(values=['en', 'fr', 'zh'], - indices=[[0, 0], [0, 1], [2, 0]], - shape=[3, 2]) - } - target = tf.constant([[1], [0], [0]], dtype=tf.int64) - return features, target - - sparse_column = tf.contrib.layers.sparse_column_with_hash_bucket( - 'language', hash_bucket_size=100) - embedding_features = [ - tf.contrib.layers.embedding_column(sparse_column, dimension=1) - ] - - classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier( - n_classes=3, - n_samples=2, - n_labels=1, - feature_columns=embedding_features, - hidden_units=[4, 4]) - - export_dir = tempfile.mkdtemp() - classifier.fit(input_fn=_input_fn, steps=50) - classifier.export(export_dir) - - def testPredictAsIterable(self): - """Tests predict() and predict_proba() call with as_iterable set to True.""" - def _input_fn(num_epochs=None): - features = { - 'age': tf.train.limit_epochs(tf.constant([[.9], [.1], [.1]]), - num_epochs=num_epochs), - 'language': tf.SparseTensor(values=['en', 'fr', 'zh'], - indices=[[0, 0], [0, 1], [2, 0]], - shape=[3, 2]) - } - target = tf.constant([[1], [0], [0]], dtype=tf.int64) - return features, target - - sparse_column = tf.contrib.layers.sparse_column_with_hash_bucket( - 'language', hash_bucket_size=20) - feature_columns = [ - tf.contrib.layers.embedding_column(sparse_column, dimension=1), - tf.contrib.layers.real_valued_column('age') - ] - - classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier( - n_classes=3, - n_samples=2, - n_labels=1, - feature_columns=feature_columns, - hidden_units=[4, 4]) - - classifier.fit(input_fn=_input_fn, steps=1) - - predict_input_fn = functools.partial(_input_fn, num_epochs=1) - # Test the output of predict() and predict_proba() with as_iterable=True - predictions = list( - classifier.predict(input_fn=predict_input_fn, as_iterable=True)) - predictions_proba = list( - classifier.predict_proba(input_fn=predict_input_fn, as_iterable=True)) - self.assertTrue(np.array_equal(predictions, - np.argmax(predictions_proba, 1))) - - def testCustomMetrics(self): - """Tests the use of custom metric.""" - def _input_fn(): - features = { - 'language': tf.SparseTensor(values=['en', 'fr', 'zh'], - indices=[[0, 0], [0, 1], [2, 0]], - shape=[3, 2]) - } - target = tf.constant([[1], [0], [0]], dtype=tf.int64) - return features, target - - def _my_metric_op(predictions, targets): - """Simply multiplies predictions and targets to return [1, 0 , 0].""" - prediction_classes = math_ops.argmax(predictions, 1) - return tf.mul(prediction_classes, tf.reshape(targets, [-1])) - - sparse_column = tf.contrib.layers.sparse_column_with_hash_bucket( - 'language', hash_bucket_size=20) - embedding_features = [ - tf.contrib.layers.embedding_column(sparse_column, dimension=1) - ] - - classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier( - n_classes=3, - n_samples=2, - n_labels=1, - feature_columns=embedding_features, - hidden_units=[4, 4], - optimizer=tf.train.AdamOptimizer(learning_rate=0.01), - config=tf.contrib.learn.RunConfig(tf_random_seed=5)) - - # Test that the model actually trains. - classifier.fit(input_fn=_input_fn, steps=50) - metrics = {('my_metric', 'probabilities'): _my_metric_op} - evaluate_output = classifier.evaluate(input_fn=_input_fn, steps=1, - metrics=metrics) - self.assertListEqual([1, 0, 0], list(evaluate_output['my_metric'])) - - def testMultiLabelTopKWithCustomMetrics(self): - """Tests the cases where n_labels>1 top_k>1 and custom metrics on top_k.""" - def _input_fn(): - features = { - 'language': tf.SparseTensor(values=['en', 'fr', 'zh'], - indices=[[0, 0], [0, 1], [2, 0]], - shape=[3, 2]) - } - target = tf.constant([[0, 1], [0, 1], [0, 1]], dtype=tf.int64) - return features, target - - def _my_metric_op(predictions, targets): - """Simply adds the predictions and targets.""" - return tf.add(math_ops.to_int64(predictions), targets) - - sparse_column = tf.contrib.layers.sparse_column_with_hash_bucket( - 'language', hash_bucket_size=20) - embedding_features = [ - tf.contrib.layers.embedding_column(sparse_column, dimension=1) - ] - - classifier = dnn_sampled_softmax_classifier._DNNSampledSoftmaxClassifier( - n_classes=3, - n_samples=2, - n_labels=2, - top_k=2, - feature_columns=embedding_features, - hidden_units=[4, 4], - optimizer=tf.train.AdamOptimizer(learning_rate=0.01), - config=tf.contrib.learn.RunConfig(tf_random_seed=5)) - - classifier.fit(input_fn=_input_fn, steps=50) - # evaluate() without custom metrics. - evaluate_output = classifier.evaluate(input_fn=_input_fn, steps=1) - self.assertGreater(evaluate_output['precision_at_1'], 0.4) - self.assertGreater(evaluate_output['recall_at_1'], 0.4) - self.assertGreater(evaluate_output['precision_at_2'], 0.4) - self.assertGreater(evaluate_output['recall_at_2'], 0.4) - self.assertGreater(evaluate_output['average_precision_at_2'], 0.4) - - # evaluate() with custom metrics. - metrics = {('my_metric', 'top_k'): _my_metric_op} - evaluate_output = classifier.evaluate(input_fn=_input_fn, steps=1, - metrics=metrics) - # This test's output is flaky so just testing that 'my_metric' is indeed - # part of the evaluate_output. - self.assertTrue('my_metric' in evaluate_output) - - # predict() with top_k. - predict_output = classifier.predict(input_fn=_input_fn, get_top_k=True) - self.assertListEqual([3, 2], list(predict_output.shape)) - # TODO(dnivara): Setup this test such that it is not flaky and predict() and - # evaluate() outputs can be tested. - -if __name__ == '__main__': - tf.test.main() diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn_test.py b/tensorflow/contrib/learn/python/learn/estimators/dnn_test.py index 7bd013baa8a..c09b2d682be 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/dnn_test.py +++ b/tensorflow/contrib/learn/python/learn/estimators/dnn_test.py @@ -114,11 +114,13 @@ class DNNClassifierTest(tf.test.TestCase): """Tests binary classification using tensor data as input.""" def _input_fn(num_epochs=None): features = { - 'age': tf.train.limit_epochs(tf.constant([[.8], [.2], [.1]]), - num_epochs=num_epochs), - 'language': tf.SparseTensor(values=['en', 'fr', 'zh'], - indices=[[0, 0], [0, 1], [2, 0]], - shape=[3, 2]) + 'age': tf.train.limit_epochs( + tf.constant([[.8], [0.2], [.1]]), num_epochs=num_epochs), + 'language': tf.SparseTensor( + values=tf.train.limit_epochs( + ['en', 'fr', 'zh'], num_epochs=num_epochs), + indices=[[0, 0], [0, 1], [2, 0]], + shape=[3, 2]) } return features, tf.constant([[1], [0], [0]], dtype=tf.int32) @@ -149,11 +151,13 @@ class DNNClassifierTest(tf.test.TestCase): """Tests binary classification with float labels.""" def _input_fn_float_label(num_epochs=None): features = { - 'age': tf.train.limit_epochs(tf.constant([[50], [20], [10]]), - num_epochs=num_epochs), - 'language': tf.SparseTensor(values=['en', 'fr', 'zh'], - indices=[[0, 0], [0, 1], [2, 0]], - shape=[3, 2]) + 'age': tf.train.limit_epochs( + tf.constant([[50], [20], [10]]), num_epochs=num_epochs), + 'language': tf.SparseTensor( + values=tf.train.limit_epochs( + ['en', 'fr', 'zh'], num_epochs=num_epochs), + indices=[[0, 0], [0, 1], [2, 0]], + shape=[3, 2]) } target = tf.constant([[0.8], [0.], [0.2]], dtype=tf.float32) return features, target @@ -334,11 +338,13 @@ class DNNClassifierTest(tf.test.TestCase): """Tests predict and predict_prob methods with as_iterable=False.""" def _input_fn(num_epochs=None): features = { - 'age': tf.train.limit_epochs(tf.constant([[.8], [.2], [.1]]), - num_epochs=num_epochs), - 'language': tf.SparseTensor(values=['en', 'fr', 'zh'], - indices=[[0, 0], [0, 1], [2, 0]], - shape=[3, 2]) + 'age': tf.train.limit_epochs( + tf.constant([[.8], [.2], [.1]]), num_epochs=num_epochs), + 'language': tf.SparseTensor( + values=tf.train.limit_epochs( + ['en', 'fr', 'zh'], num_epochs=num_epochs), + indices=[[0, 0], [0, 1], [2, 0]], + shape=[3, 2]) } return features, tf.constant([[1], [0], [0]], dtype=tf.int32) @@ -370,11 +376,13 @@ class DNNClassifierTest(tf.test.TestCase): """Tests predict and predict_prob methods with as_iterable=True.""" def _input_fn(num_epochs=None): features = { - 'age': tf.train.limit_epochs(tf.constant([[.8], [.2], [.1]]), - num_epochs=num_epochs), - 'language': tf.SparseTensor(values=['en', 'fr', 'zh'], - indices=[[0, 0], [0, 1], [2, 0]], - shape=[3, 2]) + 'age': tf.train.limit_epochs( + tf.constant([[.8], [.2], [.1]]), num_epochs=num_epochs), + 'language': tf.SparseTensor( + values=tf.train.limit_epochs( + ['en', 'fr', 'zh'], num_epochs=num_epochs), + indices=[[0, 0], [0, 1], [2, 0]], + shape=[3, 2]) } return features, tf.constant([[1], [0], [0]], dtype=tf.int32) @@ -407,15 +415,19 @@ class DNNClassifierTest(tf.test.TestCase): def testCustomMetrics(self): """Tests custom evaluation metrics.""" - def _input_fn_train(): + def _input_fn(num_epochs=None): # Create 4 rows, one of them (y = x), three of them (y=Not(x)) target = tf.constant([[1], [0], [0], [0]]) - features = {'x': tf.ones(shape=[4, 1], dtype=tf.float32),} + features = { + 'x': tf.train.limit_epochs( + tf.ones(shape=[4, 1], dtype=tf.float32), num_epochs=num_epochs), + } return features, target def _my_metric_op(predictions, targets): # For the case of binary classification, the 2nd column of "predictions" # denotes the model predictions. + targets = tf.to_float(targets) predictions = tf.slice(predictions, [0, 1], [-1, 1]) targets = math_ops.cast(targets, predictions.dtype) return tf.reduce_sum(tf.mul(predictions, targets)) @@ -425,9 +437,9 @@ class DNNClassifierTest(tf.test.TestCase): hidden_units=[3, 3], config=tf.contrib.learn.RunConfig(tf_random_seed=1)) - classifier.fit(input_fn=_input_fn_train, steps=100) + classifier.fit(input_fn=_input_fn, steps=100) scores = classifier.evaluate( - input_fn=_input_fn_train, + input_fn=_input_fn, steps=100, metrics={ 'my_accuracy': MetricSpec( @@ -443,7 +455,8 @@ class DNNClassifierTest(tf.test.TestCase): self.assertTrue( set(['loss', 'my_accuracy', 'my_precision', 'my_metric' ]).issubset(set(scores.keys()))) - predictions = classifier.predict(input_fn=_input_fn_train) + predict_input_fn = functools.partial(_input_fn, num_epochs=1) + predictions = np.array(list(classifier.predict(input_fn=predict_input_fn))) self.assertEqual(_sklearn.accuracy_score([1, 0, 0, 0], predictions), scores['my_accuracy']) @@ -451,7 +464,7 @@ class DNNClassifierTest(tf.test.TestCase): # "probabilities". with self.assertRaisesRegexp(KeyError, 'bad_type'): classifier.evaluate( - input_fn=_input_fn_train, + input_fn=_input_fn, steps=100, metrics={ 'bad_name': MetricSpec( @@ -462,11 +475,13 @@ class DNNClassifierTest(tf.test.TestCase): """Tests that insures you can save and reload a trained model.""" def _input_fn(num_epochs=None): features = { - 'age': tf.train.limit_epochs(tf.constant([[.8], [.2], [.1]]), - num_epochs=num_epochs), - 'language': tf.SparseTensor(values=['en', 'fr', 'zh'], - indices=[[0, 0], [0, 1], [2, 0]], - shape=[3, 2]) + 'age': tf.train.limit_epochs( + tf.constant([[.8], [.2], [.1]]), num_epochs=num_epochs), + 'language': tf.SparseTensor( + values=tf.train.limit_epochs( + ['en', 'fr', 'zh'], num_epochs=num_epochs), + indices=[[0, 0], [0, 1], [2, 0]], + shape=[3, 2]) } return features, tf.constant([[1], [0], [0]], dtype=tf.int32) @@ -485,7 +500,8 @@ class DNNClassifierTest(tf.test.TestCase): config=tf.contrib.learn.RunConfig(tf_random_seed=1)) classifier.fit(input_fn=_input_fn, steps=100) - predictions1 = classifier.predict(input_fn=_input_fn) + predict_input_fn = functools.partial(_input_fn, num_epochs=1) + predictions1 = classifier.predict(input_fn=predict_input_fn) del classifier classifier2 = tf.contrib.learn.DNNClassifier( @@ -494,18 +510,20 @@ class DNNClassifierTest(tf.test.TestCase): feature_columns=feature_columns, hidden_units=[3, 3], config=tf.contrib.learn.RunConfig(tf_random_seed=1)) - predictions2 = classifier2.predict(input_fn=_input_fn) + predictions2 = classifier2.predict(input_fn=predict_input_fn) self.assertEqual(list(predictions1), list(predictions2)) def testTrainWithPartitionedVariables(self): """Tests training with partitioned variables.""" def _input_fn(num_epochs=None): features = { - 'age': tf.train.limit_epochs(tf.constant([[.8], [.2], [.1]]), - num_epochs=num_epochs), - 'language': tf.SparseTensor(values=['en', 'fr', 'zh'], - indices=[[0, 0], [0, 1], [2, 0]], - shape=[3, 2]) + 'age': tf.train.limit_epochs( + tf.constant([[.8], [.2], [.1]]), num_epochs=num_epochs), + 'language': tf.SparseTensor( + values=tf.train.limit_epochs( + ['en', 'fr', 'zh'], num_epochs=num_epochs), + indices=[[0, 0], [0, 1], [2, 0]], + shape=[3, 2]) } return features, tf.constant([[1], [0], [0]], dtype=tf.int32) @@ -636,11 +654,13 @@ class DNNRegressorTest(tf.test.TestCase): """Tests regression using tensor data as input.""" def _input_fn(num_epochs=None): features = { - 'age': tf.train.limit_epochs(tf.constant([[0.8], [0.15], [0.]]), - num_epochs=num_epochs), - 'language': tf.SparseTensor(values=['en', 'fr', 'zh'], - indices=[[0, 0], [0, 1], [2, 0]], - shape=[3, 2]) + 'age': tf.train.limit_epochs( + tf.constant([[.8], [.15], [0.]]), num_epochs=num_epochs), + 'language': tf.SparseTensor( + values=tf.train.limit_epochs( + ['en', 'fr', 'zh'], num_epochs=num_epochs), + indices=[[0, 0], [0, 1], [2, 0]], + shape=[3, 2]) } return features, tf.constant([1., 0., 0.2], dtype=tf.float32) @@ -756,11 +776,13 @@ class DNNRegressorTest(tf.test.TestCase): target = [1., 0., 0.2] def _input_fn(num_epochs=None): features = { - 'age': tf.train.limit_epochs(tf.constant([[0.8], [0.15], [0.]]), - num_epochs=num_epochs), - 'language': tf.SparseTensor(values=['en', 'fr', 'zh'], - indices=[[0, 0], [0, 1], [2, 0]], - shape=[3, 2]) + 'age': tf.train.limit_epochs( + tf.constant([[0.8], [0.15], [0.]]), num_epochs=num_epochs), + 'language': tf.SparseTensor( + values=tf.train.limit_epochs( + ['en', 'fr', 'zh'], num_epochs=num_epochs), + indices=[[0, 0], [0, 1], [2, 0]], + shape=[3, 2]) } return features, tf.constant(target, dtype=tf.float32) @@ -788,11 +810,13 @@ class DNNRegressorTest(tf.test.TestCase): target = [1., 0., 0.2] def _input_fn(num_epochs=None): features = { - 'age': tf.train.limit_epochs(tf.constant([[0.8], [0.15], [0.]]), - num_epochs=num_epochs), - 'language': tf.SparseTensor(values=['en', 'fr', 'zh'], - indices=[[0, 0], [0, 1], [2, 0]], - shape=[3, 2]) + 'age': tf.train.limit_epochs( + tf.constant([[0.8], [0.15], [0.]]), num_epochs=num_epochs), + 'language': tf.SparseTensor( + values=tf.train.limit_epochs( + ['en', 'fr', 'zh'], num_epochs=num_epochs), + indices=[[0, 0], [0, 1], [2, 0]], + shape=[3, 2]) } return features, tf.constant(target, dtype=tf.float32) @@ -819,10 +843,13 @@ class DNNRegressorTest(tf.test.TestCase): def testCustomMetrics(self): """Tests custom evaluation metrics.""" - def _input_fn_train(): + def _input_fn(num_epochs=None): # Create 4 rows, one of them (y = x), three of them (y=Not(x)) target = tf.constant([[1.], [0.], [0.], [0.]]) - features = {'x': tf.ones(shape=[4, 1], dtype=tf.float32),} + features = { + 'x': tf.train.limit_epochs( + tf.ones(shape=[4, 1], dtype=tf.float32), num_epochs=num_epochs), + } return features, target def _my_metric_op(predictions, targets): @@ -833,9 +860,9 @@ class DNNRegressorTest(tf.test.TestCase): hidden_units=[3, 3], config=tf.contrib.learn.RunConfig(tf_random_seed=1)) - regressor.fit(input_fn=_input_fn_train, steps=100) + regressor.fit(input_fn=_input_fn, steps=100) scores = regressor.evaluate( - input_fn=_input_fn_train, + input_fn=_input_fn, steps=1, metrics={ 'my_error': tf.contrib.metrics.streaming_mean_squared_error, @@ -844,28 +871,31 @@ class DNNRegressorTest(tf.test.TestCase): self.assertIn('loss', set(scores.keys())) self.assertIn('my_error', set(scores.keys())) self.assertIn('my_metric', set(scores.keys())) - predictions = regressor.predict(input_fn=_input_fn_train) + predict_input_fn = functools.partial(_input_fn, num_epochs=1) + predictions = np.array(list(regressor.predict(input_fn=predict_input_fn))) self.assertAlmostEqual( _sklearn.mean_squared_error(np.array([1, 0, 0, 0]), predictions), scores['my_error']) # Tests that when the key is a tuple, an error is raised. - with self.assertRaises(TypeError): + with self.assertRaises(KeyError): regressor.evaluate( - input_fn=_input_fn_train, + input_fn=_input_fn, steps=1, - metrics={('my_error', 'predictions' - ): tf.contrib.metrics.streaming_mean_squared_error}) + metrics={('my_error', 'predictions'): + tf.contrib.metrics.streaming_mean_squared_error}) def testTrainSaveLoad(self): """Tests that insures you can save and reload a trained model.""" def _input_fn(num_epochs=None): features = { - 'age': tf.train.limit_epochs(tf.constant([[0.8], [0.15], [0.]]), - num_epochs=num_epochs), - 'language': tf.SparseTensor(values=['en', 'fr', 'zh'], - indices=[[0, 0], [0, 1], [2, 0]], - shape=[3, 2]) + 'age': tf.train.limit_epochs( + tf.constant([[0.8], [0.15], [0.]]), num_epochs=num_epochs), + 'language': tf.SparseTensor( + values=tf.train.limit_epochs( + ['en', 'fr', 'zh'], num_epochs=num_epochs), + indices=[[0, 0], [0, 1], [2, 0]], + shape=[3, 2]) } return features, tf.constant([1., 0., 0.2], dtype=tf.float32) @@ -900,11 +930,13 @@ class DNNRegressorTest(tf.test.TestCase): """Tests training with partitioned variables.""" def _input_fn(num_epochs=None): features = { - 'age': tf.train.limit_epochs(tf.constant([[0.8], [0.15], [0.]]), - num_epochs=num_epochs), - 'language': tf.SparseTensor(values=['en', 'fr', 'zh'], - indices=[[0, 0], [0, 1], [2, 0]], - shape=[3, 2]) + 'age': tf.train.limit_epochs( + tf.constant([[0.8], [0.15], [0.]]), num_epochs=num_epochs), + 'language': tf.SparseTensor( + values=tf.train.limit_epochs( + ['en', 'fr', 'zh'], num_epochs=num_epochs), + indices=[[0, 0], [0, 1], [2, 0]], + shape=[3, 2]) } return features, tf.constant([1., 0., 0.2], dtype=tf.float32) @@ -936,11 +968,13 @@ class DNNRegressorTest(tf.test.TestCase): """Tests that we can disable centered bias.""" def _input_fn(num_epochs=None): features = { - 'age': tf.train.limit_epochs(tf.constant([[0.8], [0.15], [0.]]), - num_epochs=num_epochs), - 'language': tf.SparseTensor(values=['en', 'fr', 'zh'], - indices=[[0, 0], [0, 1], [2, 0]], - shape=[3, 2]) + 'age': tf.train.limit_epochs( + tf.constant([[0.8], [0.15], [0.]]), num_epochs=num_epochs), + 'language': tf.SparseTensor( + values=tf.train.limit_epochs( + ['en', 'fr', 'zh'], num_epochs=num_epochs), + indices=[[0, 0], [0, 1], [2, 0]], + shape=[3, 2]) } return features, tf.constant([1., 0., 0.2], dtype=tf.float32) diff --git a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py index a38d57effa7..249ec2ca391 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py +++ b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py @@ -27,6 +27,7 @@ from tensorflow.contrib.learn.python.learn.estimators import estimator from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops from tensorflow.python.ops import clip_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import rnn @@ -119,6 +120,55 @@ def _select_last_activations(activations, sequence_lengths): return last_activations +def _concatenate_context_input(sequence_input, context_input): + """Replicates `context_input` accross all timesteps of `sequence_input`. + + Expands dimension 1 of `context_input` then tiles it `sequence_length` times. + This value is appended to `sequence_input` on dimension 2 and the result is + returned. + + Args: + sequence_input: a `Tensor` of dtype `float32` and shape `[batch_size, + padded_length, d0]`. + context_input: a `Tensor` of dtype `float32` and shape `[batch_size, d1]`. + + Returns: + A `Tensor` of dtype `float32` and shape `[batch_size, padded_length, + d0 + d1]`. + + Raises: + ValueError: if `sequence_input` does not have rank 3 or `context_input` does + not have rank 2. + """ + seq_rank_check = check_ops.assert_rank( + sequence_input, + 3, + message='sequence_input must have rank 3', + data=[array_ops.shape(sequence_input)]) + seq_type_check = check_ops.assert_type( + sequence_input, + dtypes.float32, + message='sequence_input must have dtype float32; got {}.'.format( + sequence_input.dtype)) + ctx_rank_check = check_ops.assert_rank( + context_input, + 2, + message='context_input must have rank 2', + data=[array_ops.shape(context_input)]) + ctx_type_check = check_ops.assert_type( + context_input, + dtypes.float32, + message='context_input must have dtype float32; got {}.'.format( + context_input.dtype)) + with ops.control_dependencies( + [seq_rank_check, seq_type_check, ctx_rank_check, ctx_type_check]): + padded_length = array_ops.shape(sequence_input)[1] + tiled_context_input = array_ops.tile( + array_ops.expand_dims(context_input, 1), + array_ops.concat(0, [[1], [padded_length], [1]])) + return array_ops.concat(2, [sequence_input, tiled_context_input]) + + @six.add_metaclass(abc.ABCMeta) class _DynamicRNNEstimator(estimator.BaseEstimator): """Estimator that uses a dynamic RNN for sequences.""" @@ -127,10 +177,11 @@ class _DynamicRNNEstimator(estimator.BaseEstimator): cell, target_column, optimizer, + sequence_feature_columns, + context_feature_columns=None, model_dir=None, config=None, gradient_clipping_norm=None, - inputs_key='inputs', sequence_length_key='sequence_length', initial_state_key='initial_state', dtype=None, @@ -145,13 +196,18 @@ class _DynamicRNNEstimator(estimator.BaseEstimator): target_column: an initialized `TargetColumn`, used to calculate loss and metrics. optimizer: an initialized `tensorflow.Optimizer`. + sequence_feature_columns: An iterable containing all the feature columns + describing sequence features. All items in the set should be instances + of classes derived from `FeatureColumn`. + context_feature_columns: An iterable containing all the feature columns + describing context features i.e. features that apply accross all time + steps. All items in the set should be instances of classes derived from + `FeatureColumn`. model_dir: The directory in which to save and restore the model graph, parameters, etc. config: A `RunConfig` instance. gradient_clipping_norm: parameter used for gradient clipping. If `None`, then no clipping is performed. - inputs_key: the key for input values in the features dict passed to - `fit()`. sequence_length_key: the key for the sequence length tensor in the features dict passed to `fit()`. initial_state_key: the key for input values in the features dict passed to @@ -168,14 +224,20 @@ class _DynamicRNNEstimator(estimator.BaseEstimator): targets which are the output of `input_fn` and returns features and targets which will be fed into the model. + Raises: + ValueError: `sequence_feature_columns` is `None` or []. """ super(_DynamicRNNEstimator, self).__init__( model_dir=model_dir, config=config) + # TODO(jamieas): consider supporting models with only context features. + if not sequence_feature_columns: + raise ValueError('sequence_feature_columns must be a non-empty list.') self._cell = cell self._target_column = target_column self._optimizer = optimizer + self._context_feature_columns = context_feature_columns + self._sequence_feature_columns = sequence_feature_columns self._gradient_clipping_norm = gradient_clipping_norm - self._inputs_key = inputs_key self._sequence_length_key = sequence_length_key self._initial_state_key = initial_state_key self._dtype = dtype or dtypes.float32 @@ -186,7 +248,29 @@ class _DynamicRNNEstimator(estimator.BaseEstimator): feature_engineering_fn or (lambda features, targets: (features, targets))) - def _construct_rnn(self, features): + def _get_model_input(self, features, weight_collections=None, scope=None): + # TODO(jamieas): add option to use context to construct initial state rather + # than appending it to sequence input. + initial_state = features.get(self._initial_state_key) + + sequence_input = layers.sequence_input_from_feature_columns( + columns_to_tensors=features, + feature_columns=self._sequence_feature_columns, + weight_collections=weight_collections, + scope=scope) + + if self._context_feature_columns is not None: + context_input = layers.input_from_feature_columns( + columns_to_tensors=features, + feature_columns=self._context_feature_columns, + weight_collections=weight_collections, + scope=scope) + + sequence_input = _concatenate_context_input(sequence_input, context_input) + + return initial_state, sequence_input + + def _construct_rnn(self, initial_state, sequence_input): """Apply an RNN to `features`. The `features` dict must contain `self._inputs_key`, and the corresponding @@ -201,28 +285,20 @@ class _DynamicRNNEstimator(estimator.BaseEstimator): `self._dtype`. Args: - features: a `dict` containing the input for the RNN and (optionally) an - initial state and information about sequence lengths. + initial_state: the initial state to pass the the RNN. If `None`, the + default starting state for `self._cell` is used. + sequence_input: a `Tensor` with shape `[batch_size, padded_length, d]` + that will be passed as input to the RNN. Returns: activations: the output of the RNN, projected to the appropriate number of dimensions. final_state: the final state output by the RNN. - - Raises: - KeyError: if `features` does not contain `self._inputs_key`. """ with ops.name_scope('RNN'): - inputs = features.get(self._inputs_key) - if inputs is None: - raise KeyError('features must contain the key {}'.format( - self._inputs_key)) - if inputs.dtype != self._dtype: - inputs = math_ops.cast(inputs, self._dtype) - initial_state = features.get(self._initial_state_key) rnn_outputs, final_state = rnn.dynamic_rnn( cell=self._cell, - inputs=inputs, + inputs=sequence_input, initial_state=initial_state, dtype=self._dtype, parallel_iterations=self._parallel_iterations, @@ -320,26 +396,26 @@ class _DynamicRNNEstimator(estimator.BaseEstimator): def _get_train_ops(self, features, targets): with ops.name_scope(self._name): - if isinstance(features, ops.Tensor): - features = {self._inputs_key: features} - activations, _ = self._construct_rnn(features) + features, targets = self._feature_engineering_fn(features, targets) + initial_state, sequence_input = self._get_model_input(features) + activations, _ = self._construct_rnn(initial_state, sequence_input) loss = self._activations_to_loss(features, activations, targets) train_op = self._loss_to_train_op(loss) return train_op, loss def _get_eval_ops(self, features, targets, metrics): with ops.name_scope(self._name): - if isinstance(features, ops.Tensor): - features = {self._inputs_key: features} - activations, _ = self._construct_rnn(features) + features, targets = self._feature_engineering_fn(features, targets) + initial_state, sequence_input = self._get_model_input(features) + activations, _ = self._construct_rnn(initial_state, sequence_input) return self._activations_to_eval_ops(features, activations, targets, metrics) def _get_predict_ops(self, features): with ops.name_scope(self._name): - if isinstance(features, ops.Tensor): - features = {self._inputs_key: features} - activations, state = self._construct_rnn(features) + features, _ = self._feature_engineering_fn(features, {}) + initial_state, sequence_input = self._get_model_input(features) + activations, state = self._construct_rnn(initial_state, sequence_input) predictions = self._activations_to_predictions(features, activations) return {'predictions': predictions, 'state': state} @@ -362,7 +438,7 @@ class _MultiValueRNNEstimator(_DynamicRNNEstimator): activations_shape = array_ops.shape(activations) flattened_activations = array_ops.reshape(activations, [-1, activations_shape[2]]) - predictions = self._target_column.activations_to_predictions( + predictions = self._target_column.logits_to_predictions( flattened_activations, proba=False) reshaped_predictions = array_ops.reshape( predictions, [activations_shape[0], activations_shape[1], -1]) @@ -392,7 +468,7 @@ class _SingleValueRNNEstimator(_DynamicRNNEstimator): with ops.name_scope('activations_to_predictions'): sequence_lengths = features.get(self._sequence_length_key) last_activations = _select_last_activations(activations, sequence_lengths) - return self._target_column.activations_to_predictions( + return self._target_column.logits_to_predictions( last_activations, proba=False) def _activations_to_eval_ops(self, features, activations, targets, metrics): @@ -469,6 +545,8 @@ def _get_rnn_cell(cell_type, num_units, num_layers): def multi_value_rnn_regressor(num_units, + sequence_feature_columns, + context_feature_columns=None, cell_type='basic_rnn', cell_dtype=dtypes.float32, num_rnn_layers=1, @@ -482,6 +560,13 @@ def multi_value_rnn_regressor(num_units, Args: num_units: the size of the RNN cells. + sequence_feature_columns: An iterable containing all the feature columns + describing sequence features. All items in the set should be instances + of classes derived from `FeatureColumn`. + context_feature_columns: An iterable containing all the feature columns + describing context features i.e. features that apply accross all time + steps. All items in the set should be instances of classes derived from + `FeatureColumn`. cell_type: subclass of `RNNCell` or one of 'basic_rnn,' 'lstm' or 'gru'. cell_dtype: the dtype of the state and output for the given `cell_type`. num_rnn_layers: number of RNN layers. @@ -503,6 +588,8 @@ def multi_value_rnn_regressor(num_units, return _MultiValueRNNEstimator(cell, target_column, optimizer, + sequence_feature_columns, + context_feature_columns, model_dir, config, gradient_clipping_norm, @@ -511,6 +598,8 @@ def multi_value_rnn_regressor(num_units, def multi_value_rnn_classifier(num_classes, num_units, + sequence_feature_columns, + context_feature_columns=None, cell_type='basic_rnn', cell_dtype=dtypes.float32, num_rnn_layers=1, @@ -525,6 +614,13 @@ def multi_value_rnn_classifier(num_classes, Args: num_classes: the number of classes for categorization. num_units: the size of the RNN cells. + sequence_feature_columns: An iterable containing all the feature columns + describing sequence features. All items in the set should be instances + of classes derived from `FeatureColumn`. + context_feature_columns: An iterable containing all the feature columns + describing context features i.e. features that apply accross all time + steps. All items in the set should be instances of classes derived from + `FeatureColumn`. cell_type: subclass of `RNNCell` or one of 'basic_rnn,' 'lstm' or 'gru'. cell_dtype: the dtype of the state and output for the given `cell_type`. num_rnn_layers: number of RNN layers. @@ -546,6 +642,8 @@ def multi_value_rnn_classifier(num_classes, return _MultiValueRNNEstimator(cell, target_column, optimizer, + sequence_feature_columns, + context_feature_columns, model_dir, config, gradient_clipping_norm, @@ -553,6 +651,8 @@ def multi_value_rnn_classifier(num_classes, def single_value_rnn_regressor(num_units, + sequence_feature_columns, + context_feature_columns=None, cell_type='basic_rnn', cell_dtype=dtypes.float32, num_rnn_layers=1, @@ -566,6 +666,13 @@ def single_value_rnn_regressor(num_units, Args: num_units: the size of the RNN cells. + sequence_feature_columns: An iterable containing all the feature columns + describing sequence features. All items in the set should be instances + of classes derived from `FeatureColumn`. + context_feature_columns: An iterable containing all the feature columns + describing context features i.e. features that apply accross all time + steps. All items in the set should be instances of classes derived from + `FeatureColumn`. cell_type: subclass of `RNNCell` or one of 'basic_rnn,' 'lstm' or 'gru'. cell_dtype: the dtype of the state and output for the given `cell_type`. num_rnn_layers: number of RNN layers. @@ -587,6 +694,8 @@ def single_value_rnn_regressor(num_units, return _SingleValueRNNEstimator(cell, target_column, optimizer, + sequence_feature_columns, + context_feature_columns, model_dir, config, gradient_clipping_norm, @@ -595,6 +704,8 @@ def single_value_rnn_regressor(num_units, def single_value_rnn_classifier(num_classes, num_units, + sequence_feature_columns, + context_feature_columns=None, cell_type='basic_rnn', cell_dtype=dtypes.float32, num_rnn_layers=1, @@ -609,6 +720,13 @@ def single_value_rnn_classifier(num_classes, Args: num_classes: the number of classes for categorization. num_units: the size of the RNN cells. + sequence_feature_columns: An iterable containing all the feature columns + describing sequence features. All items in the set should be instances + of classes derived from `FeatureColumn`. + context_feature_columns: An iterable containing all the feature columns + describing context features i.e. features that apply accross all time + steps. All items in the set should be instances of classes derived from + `FeatureColumn`. cell_type: subclass of `RNNCell` or one of 'basic_rnn,' 'lstm' or 'gru'. cell_dtype: the dtype of the state and output for the given `cell_type`. num_rnn_layers: number of RNN layers. @@ -630,6 +748,8 @@ def single_value_rnn_classifier(num_classes, return _SingleValueRNNEstimator(cell, target_column, optimizer, + sequence_feature_columns, + context_feature_columns, model_dir, config, gradient_clipping_norm, diff --git a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator_test.py b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator_test.py index 1ee3a8dd608..f14e65fff55 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator_test.py +++ b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator_test.py @@ -22,6 +22,7 @@ import numpy as np import tensorflow as tf from tensorflow.contrib.learn.python.learn.estimators import dynamic_rnn_estimator +from tensorflow.python.ops import rnn_cell class IdentityRNNCell(tf.nn.rnn_cell.RNNCell): @@ -44,16 +45,16 @@ class IdentityRNNCell(tf.nn.rnn_cell.RNNCell): class MockTargetColumn(object): - def __init__(self): - self._num_label_columns = None + def __init__(self, num_label_columns=None): + self._num_label_columns = num_label_columns def get_eval_ops(self, features, activations, targets, metrics): raise NotImplementedError( 'MockTargetColumn.get_eval_ops called unexpectedly.') - def activations_to_predictions(self, flattened_activations, proba=False): + def logits_to_predictions(self, flattened_activations, proba=False): raise NotImplementedError( - 'MockTargetColumn.activations_to_predictions called unexpectedly.') + 'MockTargetColumn.logits_to_predictions called unexpectedly.') def loss(self, activations, targets, features): raise NotImplementedError('MockTargetColumn.loss called unexpectedly.') @@ -88,67 +89,89 @@ def sequence_length_mask(values, lengths): class DynamicRnnEstimatorTest(tf.test.TestCase): - CELL_STATE_SIZE = 8 - CELL_OUTPUT_SIZE = 6 + NUM_RNN_CELL_UNITS = 8 + NUM_LABEL_COLUMNS = 4 def setUp(self): - self._rnn_cell = IdentityRNNCell(self.CELL_STATE_SIZE, - self.CELL_OUTPUT_SIZE) - self._mock_target_column = MockTargetColumn() + self._rnn_cell = rnn_cell.BasicRNNCell(self.NUM_RNN_CELL_UNITS) + self._mock_target_column = MockTargetColumn( + num_label_columns=self.NUM_LABEL_COLUMNS) + + location = tf.contrib.layers.sparse_column_with_keys( + 'location', keys=['west_side', 'east_side', 'nyc']) + location_onehot = tf.contrib.layers.one_hot_column(location) + context_features = [location_onehot] + + wire_cast = tf.contrib.layers.sparse_column_with_keys( + 'wire_cast', ['marlo', 'omar', 'stringer']) + wire_cast_embedded = tf.contrib.layers.embedding_column( + wire_cast, dimension=8) + measurements = tf.contrib.layers.real_valued_column( + 'measurements', dimension=2) + sequence_features = [measurements, wire_cast_embedded] + self._rnn_estimator = dynamic_rnn_estimator._MultiValueRNNEstimator( cell=self._rnn_cell, + sequence_feature_columns=sequence_features, + context_feature_columns=context_features, target_column=self._mock_target_column, optimizer=tf.train.GradientDescentOptimizer(0.1)) + self._columns_to_tensors = { + 'location': tf.SparseTensor( + indices=[[0, 0], [1, 0], [2, 0]], + values=['west_side', 'west_side', 'nyc'], + shape=[3, 1]), + 'wire_cast': tf.SparseTensor( + indices=[[0, 0, 0], [0, 1, 0], + [1, 0, 0], [1, 1, 0], [1, 1, 1], + [2, 0, 0]], + values=[b'marlo', b'stringer', + b'omar', b'stringer', b'marlo', + b'marlo'], + shape=[3, 2, 2]), + 'measurements': tf.random_uniform([3, 2, 2])} + + def testGetModelInput(self): + initial_state, sequence_input = self._rnn_estimator._get_model_input( + self._columns_to_tensors) + self.assertIsNone(initial_state) + with self.test_session() as sess: + sess.run(tf.initialize_all_variables()) + sess.run(tf.initialize_all_tables()) + sequence_input_val = sess.run(sequence_input) + expected_shape = np.array([ + 3, # expected batch size + 2, # padded sequence length + 3 + 8 + 2 # location keys + embedding dim + measurement dimension + ]) + self.assertAllEqual(expected_shape, sequence_input_val.shape) + def testConstructRNN(self): """Test `DynamicRNNEstimator._construct_rnn`.""" - batch_size = 4 - padded_length = 6 - num_classes = 4 + initial_state, sequence_input = self._rnn_estimator._get_model_input( + self._columns_to_tensors) + activations_t, final_state_t = self._rnn_estimator._construct_rnn( + initial_state, sequence_input) - # Set up mocks - self._mock_target_column.set_num_label_columns(num_classes) - np.random.seed(111) - mock_linear_layer_output = np.random.rand( - batch_size, padded_length, num_classes) - - # Create features - inputs = np.random.rand(batch_size, padded_length, self.CELL_OUTPUT_SIZE) - sequence_length = np.random.randint(0, padded_length + 1, batch_size) - features = {'inputs': tf.constant( - inputs, dtype=tf.float32), - 'sequence_length': tf.constant( - sequence_length, dtype=tf.int32)} - - # Map feature to activations with mocked linear layer. - with tf.test.mock.patch.object(dynamic_rnn_estimator, - 'layers') as mock_layers: - mock_layers.fully_connected.return_value = tf.constant( - mock_linear_layer_output, dtype=tf.float32) - activations_t, final_state_t = self._rnn_estimator._construct_rnn( - features) - _, fully_connected_kwargs = mock_layers.fully_connected.call_args - linear_layer_inputs_t = fully_connected_kwargs['inputs'] - linear_layer_output_dim = fully_connected_kwargs['num_outputs'] - - # Obtain values of linear layer input, activations and final state. + # Obtain values of activations and final state. with tf.Session() as sess: sess.run(tf.initialize_all_variables()) - linear_layer_inputs, activations, final_state = sess.run( - [linear_layer_inputs_t, activations_t, final_state_t]) + sess.run(tf.initialize_all_tables()) + activations, final_state = sess.run([activations_t, final_state_t]) - np.testing.assert_equal(num_classes, linear_layer_output_dim) - np.testing.assert_almost_equal(inputs, linear_layer_inputs) - np.testing.assert_almost_equal(mock_linear_layer_output, activations) - np.testing.assert_almost_equal( - np.zeros([batch_size, self._rnn_cell.state_size], dtype=float), - final_state) + expected_activations_shape = np.array([3, 2, self.NUM_LABEL_COLUMNS]) + self.assertAllEqual(expected_activations_shape, activations.shape) + expected_state_shape = np.array([3, self.NUM_RNN_CELL_UNITS]) + self.assertAllEqual(expected_state_shape, final_state.shape) class MultiValueRNNEstimatorTest(tf.test.TestCase): """Tests for `_MultiValueRNNEstimator` class.""" CELL_STATE_SIZE = 8 CELL_OUTPUT_SIZE = 6 + INPUTS_COLUMN = tf.contrib.layers.real_valued_column( + 'inputs', dimension=CELL_OUTPUT_SIZE) def setUp(self): self._rnn_cell = IdentityRNNCell(self.CELL_STATE_SIZE, @@ -156,6 +179,7 @@ class MultiValueRNNEstimatorTest(tf.test.TestCase): self._mock_target_column = MockTargetColumn() self._seq_estimator = dynamic_rnn_estimator._MultiValueRNNEstimator( cell=self._rnn_cell, + sequence_feature_columns=[self.INPUTS_COLUMN], target_column=self._mock_target_column, optimizer=tf.train.GradientDescentOptimizer(0.1)) @@ -251,13 +275,13 @@ class MultiValueRNNEstimatorTest(tf.test.TestCase): with tf.test.mock.patch.object( self._mock_target_column, - 'activations_to_predictions', + 'logits_to_predictions', return_value=flattened_argmax, - autospec=True) as mock_activations_to_predictions: + autospec=True) as mock_logits_to_predictions: predictions_t = self._seq_estimator._activations_to_predictions( None, tf.constant(activations, dtype=tf.float32)) (target_column_input_activations_t, - ), _ = mock_activations_to_predictions.call_args + ), _ = mock_logits_to_predictions.call_args with tf.Session() as sess: target_column_input_activations, predictions = sess.run( @@ -294,9 +318,14 @@ class MultiValueRNNEstimatorTest(tf.test.TestCase): return input_fn + seq_columns = [tf.contrib.layers.real_valued_column( + 'inputs', dimension=cell_size)] config = tf.contrib.learn.RunConfig(tf_random_seed=1234) sequence_estimator = dynamic_rnn_estimator.multi_value_rnn_regressor( - num_units=cell_size, learning_rate=learning_rate, config=config) + num_units=cell_size, + sequence_feature_columns=seq_columns, + learning_rate=learning_rate, + config=config) train_input_fn = get_sin_input_fn( batch_size, sequence_length, np.pi / 32, seed=1234) @@ -336,10 +365,13 @@ class MultiValueRNNEstimatorTest(tf.test.TestCase): return {'inputs': inputs}, labels return input_fn + seq_columns = [tf.contrib.layers.real_valued_column( + 'inputs', dimension=cell_size)] config = tf.contrib.learn.RunConfig(tf_random_seed=21212) sequence_estimator = dynamic_rnn_estimator.multi_value_rnn_classifier( num_classes=2, num_units=cell_size, + sequence_feature_columns=seq_columns, learning_rate=learning_rate, config=config) @@ -421,9 +453,12 @@ class SingleValueRNNEstimatorTest(tf.test.TestCase): return {'inputs': inputs}, labels return input_fn + seq_columns = [tf.contrib.layers.real_valued_column( + 'inputs', dimension=cell_size)] config = tf.contrib.learn.RunConfig(tf_random_seed=6) sequence_regressor = dynamic_rnn_estimator.single_value_rnn_regressor( num_units=cell_size, + sequence_feature_columns=seq_columns, cell_type=cell_type, optimizer_type=optimizer_type, learning_rate=learning_rate, @@ -467,10 +502,13 @@ class SingleValueRNNEstimatorTest(tf.test.TestCase): return {'inputs': inputs}, labels return input_fn + seq_columns = [tf.contrib.layers.real_valued_column( + 'inputs', dimension=cell_size)] config = tf.contrib.learn.RunConfig(tf_random_seed=77) sequence_classifier = dynamic_rnn_estimator.single_value_rnn_classifier( num_classes=2, num_units=cell_size, + sequence_feature_columns=seq_columns, cell_type=cell_type, optimizer_type=optimizer_type, learning_rate=learning_rate, diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py index 99afefe084e..cd8c12d3044 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py +++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py @@ -20,6 +20,7 @@ from __future__ import division from __future__ import print_function import abc +import collections import copy import inspect import itertools @@ -52,6 +53,8 @@ from tensorflow.contrib.learn.python.learn.utils import export from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import device_setter @@ -81,6 +84,12 @@ class ModeKeys(object): INFER = 'infer' +class ModelFnOps( + collections.namedtuple('ModelFnOps', ['predictions', 'loss', 'training_op', + 'default_metrics', 'signature_fn'])): + pass + + def _get_input_fn(x, y, input_fn, feed_fn, batch_size, shuffle=False, epochs=1): """Make inputs into input and feed functions.""" if input_fn is None: @@ -230,6 +239,9 @@ def _make_metrics_ops(metrics, features, targets, predictions): if isinstance(name, tuple): # Multi-head metrics. + if len(name) != 2: + raise ValueError('Invalid metric for {}. It returned a tuple with ' + 'len {}, expected 2.'.format(name, len(name))) if not isinstance(predictions, dict): raise ValueError( 'Metrics passed provide (name, prediction), ' @@ -371,7 +383,7 @@ class BaseEstimator( provided. """ logging.warning('The current implementation of partial_fit is not optimized' - 'for use in a loop. Consider using fit() instead.') + ' for use in a loop. Consider using fit() instead.') return self.fit(x=x, y=y, input_fn=input_fn, steps=steps, batch_size=batch_size, monitors=monitors) @@ -405,7 +417,7 @@ class BaseEstimator( AS_ITERABLE_DATE, AS_ITERABLE_INSTRUCTIONS, as_iterable=False) def predict( self, x=None, input_fn=None, batch_size=None, outputs=None, - as_iterable=False): + as_iterable=True): """Returns predictions for given features. Args: @@ -602,26 +614,26 @@ class BaseEstimator( def _check_inputs(self, features, targets): if self._features_info is not None: - logging.warning('Given features: %s, required signatures: %s.', - str(features), str(self._features_info)) + logging.debug('Given features: %s, required signatures: %s.', + str(features), str(self._features_info)) if not tensor_signature.tensors_compatible(features, self._features_info): raise ValueError('Features are incompatible with given information. ' 'Given features: %s, required signatures: %s.' % (str(features), str(self._features_info))) else: self._features_info = tensor_signature.create_signatures(features) - logging.info('Setting feature info to %s', str(self._features_info)) + logging.debug('Setting feature info to %s.', str(self._features_info)) if targets is not None: if self._targets_info is not None: - logging.warning('Given targets: %s, required signatures: %s.', - str(targets), str(self._targets_info)) + logging.debug('Given targets: %s, required signatures: %s.', + str(targets), str(self._targets_info)) if not tensor_signature.tensors_compatible(targets, self._targets_info): raise ValueError('Targets are incompatible with given information. ' 'Given targets: %s, required signatures: %s.' % (str(targets), str(self._targets_info))) else: self._targets_info = tensor_signature.create_signatures(targets) - logging.info('Setting targets info to %s', str(self._targets_info)) + logging.debug('Setting targets info to %s', str(self._targets_info)) def _train_model(self, input_fn, @@ -781,7 +793,7 @@ class BaseEstimator( return result def _infer_model( - self, input_fn, feed_fn=None, outputs=None, as_iterable=False): + self, input_fn, feed_fn=None, outputs=None, as_iterable=True): # Check that model has been trained. checkpoint_path = saver.latest_checkpoint(self._model_dir) if not checkpoint_path: @@ -883,8 +895,15 @@ class Estimator(BaseEstimator): Args: model_fn: Model function, takes features and targets tensors or dicts of - tensors and returns predictions and loss tensors. - Supports next three signatures for the function: + tensors and returns tuple of: + + * predictions: `Tensor`, `SparseTensor` or dictionary of same. + Can also be any type that is convertible to a `Tensor` or + `SparseTensor`, or dictionary of same. + * loss: Scalar loss `Tensor`. + * train_op: Training update `Tensor` or `Operation`. + + Supports next three signatures for the function: * `(features, targets) -> (predictions, loss, train_op)` * `(features, targets, mode) -> (predictions, loss, train_op)` @@ -929,7 +948,7 @@ class Estimator(BaseEstimator): 'arguments, but not None params (%s) are passed.' % (model_fn, params)) if params is None and 'params' in model_fn_args: - logging.warning('Estimator\'s model_fn (%s) has includes params ' + logging.warning('Estimator\'s model_fn (%s) includes params ' 'argument, but params are not passed to Estimator.', model_fn) self._model_fn = model_fn @@ -943,10 +962,48 @@ class Estimator(BaseEstimator): model_fn_args = _get_arguments(self._model_fn) if 'mode' in model_fn_args: if 'params' in model_fn_args: - return self._model_fn(features, targets, mode=mode, params=self.params) + predictions, loss, train_op = self._model_fn( + features, targets, mode=mode, params=self.params) else: - return self._model_fn(features, targets, mode=mode) - return self._model_fn(features, targets) + predictions, loss, train_op = self._model_fn( + features, targets, mode=mode) + else: + predictions, loss, train_op = self._model_fn(features, targets) + + # Validate train_op. + if train_op is None: + if mode == ModeKeys.TRAIN: + raise ValueError('Missing train_op.') + elif not isinstance(train_op, ops.Operation): + train_op = ops.convert_to_tensor(train_op).op + + # Validate loss. + if loss is None: + if mode in (ModeKeys.TRAIN, ModeKeys.EVAL): + raise ValueError('Missing loss.') + else: + loss = ops.convert_to_tensor(loss) + loss_shape = loss.get_shape() + if loss_shape.num_elements() not in (None, 1): + raise ValueError('Loss must be scalar: %s.' % loss) + if not loss_shape.is_compatible_with(tensor_shape.scalar()): + loss = array_ops.reshape(loss, []) + + # Validate predictions. + if predictions is None: + if mode == ModeKeys.INFER: + raise ValueError('Missing predictions.') + else: + if isinstance(predictions, dict): + predictions = { + k: contrib_framework.convert_to_tensor_or_sparse_tensor(v) + for k, v in six.iteritems(predictions) + } + else: + predictions = contrib_framework.convert_to_tensor_or_sparse_tensor( + predictions) + + return predictions, loss, train_op def _get_train_ops(self, features, targets): """Method that builds model graph and returns trainer ops. diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py index 0acdbf20c3e..7e36ed078f9 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py +++ b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py @@ -37,9 +37,9 @@ _IRIS_INPUT_DIM = 4 def boston_input_fn(num_epochs=None): boston = tf.contrib.learn.datasets.load_boston() - features = tf.reshape(tf.constant(boston.data), [-1, _BOSTON_INPUT_DIM]) - if num_epochs: - features = tf.train.limit_epochs(features, num_epochs=num_epochs) + features = tf.train.limit_epochs( + tf.reshape(tf.constant(boston.data), [-1, _BOSTON_INPUT_DIM]), + num_epochs=num_epochs) target = tf.reshape(tf.constant(boston.target), [-1, 1]) return features, target @@ -61,7 +61,10 @@ def boston_eval_fn(): def linear_model_params_fn(features, target, mode, params): - assert mode in ('train', 'eval', 'infer') + assert mode in ( + tf.contrib.learn.ModeKeys.TRAIN, + tf.contrib.learn.ModeKeys.EVAL, + tf.contrib.learn.ModeKeys.INFER) prediction, loss = ( tf.contrib.learn.models.linear_regression_zero_init(features, target) ) @@ -72,7 +75,10 @@ def linear_model_params_fn(features, target, mode, params): def linear_model_fn(features, target, mode): - assert mode in ('train', 'eval', 'infer') + assert mode in ( + tf.contrib.learn.ModeKeys.TRAIN, + tf.contrib.learn.ModeKeys.EVAL, + tf.contrib.learn.ModeKeys.INFER) prediction, loss = ( tf.contrib.learn.models.linear_regression_zero_init(features, target) ) @@ -120,6 +126,46 @@ class CheckCallsMonitor(tf.contrib.learn.monitors.BaseMonitor): class EstimatorTest(tf.test.TestCase): + def testInvalidModelFn_no_train_op(self): + def _invalid_model_fn(features, target): + # pylint: disable=unused-argument + tf.Variable(42.0, 'weight') + return None, None, None + est = tf.contrib.learn.Estimator(model_fn=_invalid_model_fn) + with self.assertRaisesRegexp(ValueError, 'Missing train_op'): + est.fit(input_fn=boston_input_fn, steps=1) + + def testInvalidModelFn_no_loss(self): + def _invalid_model_fn(features, target, mode): + # pylint: disable=unused-argument + w = tf.Variable(42.0, 'weight') + loss = 100.0 - w + train_op = w.assign_add(loss / 100.0) + if mode == tf.contrib.learn.ModeKeys.EVAL: + loss = None + return None, loss, train_op + est = tf.contrib.learn.Estimator(model_fn=_invalid_model_fn) + est.fit(input_fn=boston_input_fn, steps=1) + with self.assertRaisesRegexp(ValueError, 'Missing loss'): + est.evaluate(input_fn=boston_eval_fn, steps=1) + + def testInvalidModelFn_no_prediction(self): + def _invalid_model_fn(features, target): + # pylint: disable=unused-argument + w = tf.Variable(42.0, 'weight') + loss = 100.0 - w + train_op = w.assign_add(loss / 100.0) + return None, loss, train_op + est = tf.contrib.learn.Estimator(model_fn=_invalid_model_fn) + est.fit(input_fn=boston_input_fn, steps=1) + est.evaluate(input_fn=boston_eval_fn, steps=1) + with self.assertRaisesRegexp(ValueError, 'Missing prediction'): + est.predict(input_fn=boston_input_fn) + with self.assertRaisesRegexp(ValueError, 'Missing prediction'): + est.predict( + input_fn=functools.partial(boston_input_fn, num_epochs=1), + as_iterable=True) + def testCustomConfig(self): test_random_seed = 5783452 @@ -211,7 +257,7 @@ class EstimatorTest(tf.test.TestCase): metrics={'MSE': tf.contrib.metrics.streaming_mean_squared_error}) self.assertAllClose(scores2['MSE'], scores['MSE']) - predictions = est2.predict(x=boston.data) + predictions = np.array(list(est2.predict(x=boston.data))) other_score = _sklearn.mean_squared_error(predictions, float64_target) self.assertAllClose(other_score, scores['MSE']) @@ -238,7 +284,7 @@ class EstimatorTest(tf.test.TestCase): x=boston.data, y=float64_target, metrics={'MSE': tf.contrib.metrics.streaming_mean_squared_error}) - predictions = est.predict(x=boston.data) + predictions = np.array(list(est.predict(x=boston.data))) other_score = _sklearn.mean_squared_error(predictions, boston.target) self.assertAllClose(other_score, scores['MSE']) self.assertTrue('global_step' in scores) @@ -252,13 +298,17 @@ class EstimatorTest(tf.test.TestCase): x=iris.data, y=iris.target, metrics={('accuracy', 'class'): tf.contrib.metrics.streaming_accuracy}) - predictions = est.predict(x=iris.data) - predictions_class = est.predict(x=iris.data, outputs=['class']) - self.assertEqual(predictions['class'].shape[0], iris.target.shape[0]) - self.assertAllClose(predictions['class'], predictions_class['class']) - self.assertAllClose(predictions['class'], np.argmax(predictions['prob'], - axis=1)) - other_score = _sklearn.accuracy_score(iris.target, predictions['class']) + predictions = list(est.predict(x=iris.data)) + predictions_class = list(est.predict(x=iris.data, outputs=['class'])) + self.assertEqual(len(predictions), iris.target.shape[0]) + classes_batch = np.array([p['class'] for p in predictions]) + self.assertAllClose( + classes_batch, + np.array([p['class'] for p in predictions_class])) + self.assertAllClose( + classes_batch, + np.argmax(np.array([p['prob'] for p in predictions]), axis=1)) + other_score = _sklearn.accuracy_score(iris.target, classes_batch) self.assertAllClose(other_score, scores['accuracy']) self.assertTrue('global_step' in scores) self.assertEqual(scores['global_step'], 100) @@ -268,8 +318,8 @@ class EstimatorTest(tf.test.TestCase): est = tf.contrib.learn.Estimator(model_fn=logistic_model_no_mode_fn) est.fit(input_fn=iris_input_fn, steps=100) _ = est.evaluate(input_fn=iris_input_fn, steps=1) - predictions = est.predict(x=iris.data)['class'] - self.assertEqual(predictions.shape[0], iris.target.shape[0]) + predictions = list(est.predict(x=iris.data)) + self.assertEqual(len(predictions), iris.target.shape[0]) def testIrisIterator(self): iris = tf.contrib.learn.datasets.load_iris() @@ -278,8 +328,8 @@ class EstimatorTest(tf.test.TestCase): y_iter = itertools.islice(iris.target, 100) est.fit(x_iter, y_iter, steps=100) _ = est.evaluate(input_fn=iris_input_fn, steps=1) - predictions = est.predict(x=iris.data)['class'] - self.assertEqual(predictions.shape[0], iris.target.shape[0]) + predictions = list(est.predict(x=iris.data)) + self.assertEqual(len(predictions), iris.target.shape[0]) def testTrainInputFn(self): est = tf.contrib.learn.Estimator(model_fn=linear_model_fn) @@ -304,32 +354,16 @@ class EstimatorTest(tf.test.TestCase): est = tf.contrib.learn.Estimator(model_fn=linear_model_fn) boston = tf.contrib.learn.datasets.load_boston() est.fit(input_fn=boston_input_fn, steps=1) - output = est.predict(boston.data) - self.assertEqual(output.shape[0], boston.target.shape[0]) + output = list(est.predict(x=boston.data, batch_size=10)) + self.assertEqual(len(output), boston.target.shape[0]) def testPredictInputFn(self): - est = tf.contrib.learn.Estimator(model_fn=linear_model_fn) - boston = tf.contrib.learn.datasets.load_boston() - est.fit(input_fn=boston_input_fn, steps=1) - output = est.predict(input_fn=boston_input_fn) - self.assertEqual(output.shape[0], boston.target.shape[0]) - - def testPredictAsIterable(self): - est = tf.contrib.learn.Estimator(model_fn=linear_model_fn) - boston = tf.contrib.learn.datasets.load_boston() - est.fit(input_fn=boston_input_fn, steps=1) - self.assertEqual( - len(list(est.predict(boston.data, batch_size=10, as_iterable=True))), - boston.target.shape[0]) - - def testPredictInputFnAsIterable(self): est = tf.contrib.learn.Estimator(model_fn=linear_model_fn) boston = tf.contrib.learn.datasets.load_boston() est.fit(input_fn=boston_input_fn, steps=1) input_fn = functools.partial(boston_input_fn, num_epochs=1) - self.assertEqual( - len(list(est.predict(input_fn=input_fn, as_iterable=True))), - boston.target.shape[0]) + output = list(est.predict(input_fn=input_fn)) + self.assertEqual(len(output), boston.target.shape[0]) def testWrongInput(self): def other_input_fn(): diff --git a/tensorflow/contrib/learn/python/learn/tests/estimators_test.py b/tensorflow/contrib/learn/python/learn/estimators/estimators_test.py similarity index 78% rename from tensorflow/contrib/learn/python/learn/tests/estimators_test.py rename to tensorflow/contrib/learn/python/learn/estimators/estimators_test.py index 8862ba48cc0..35a6c5bf021 100644 --- a/tensorflow/contrib/learn/python/learn/tests/estimators_test.py +++ b/tensorflow/contrib/learn/python/learn/estimators/estimators_test.py @@ -20,6 +20,7 @@ from __future__ import division from __future__ import print_function import random +import numpy as np import tensorflow as tf from tensorflow.contrib.learn.python import learn @@ -28,36 +29,6 @@ from tensorflow.contrib.learn.python.learn.estimators._sklearn import accuracy_s from tensorflow.contrib.learn.python.learn.estimators._sklearn import train_test_split -# TODO(b/29580537): Remove when we deprecate feature column inference. -class InferredfeatureColumnTest(tf.test.TestCase): - """Custom optimizer tests.""" - - def testIrisMomentum(self): - random.seed(42) - - iris = datasets.load_iris() - x_train, x_test, y_train, y_test = train_test_split(iris.data, - iris.target, - test_size=0.2, - random_state=42) - - def custom_optimizer(): - return tf.train.MomentumOptimizer(learning_rate=0.01, momentum=0.9) - - cont_features = [ - tf.contrib.layers.real_valued_column("", dimension=4)] - classifier = learn.DNNClassifier( - feature_columns=cont_features, - hidden_units=[10, 20, 10], - n_classes=3, - optimizer=custom_optimizer, - config=learn.RunConfig(tf_random_seed=1)) - classifier.fit(x_train, y_train, steps=400) - score = accuracy_score(y_test, classifier.predict(x_test)) - - self.assertGreater(score, 0.65, "Failed with score = {0}".format(score)) - - class FeatureEngineeringFunctionTest(tf.test.TestCase): """Tests feature_engineering_fn.""" @@ -145,7 +116,8 @@ class CustomOptimizer(tf.test.TestCase): optimizer=custom_optimizer, config=learn.RunConfig(tf_random_seed=1)) classifier.fit(x_train, y_train, steps=400) - score = accuracy_score(y_test, classifier.predict(x_test)) + predictions = np.array(list(classifier.predict(x_test))) + score = accuracy_score(y_test, predictions) self.assertGreater(score, 0.65, "Failed with score = {0}".format(score)) diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py new file mode 100644 index 00000000000..4b19f84a7ae --- /dev/null +++ b/tensorflow/contrib/learn/python/learn/estimators/head.py @@ -0,0 +1,850 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Abstractions for the head(s) of a model. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import abc + +from tensorflow.contrib import losses +from tensorflow.contrib import metrics as metrics_lib +from tensorflow.contrib.learn.python.learn import metric_spec +from tensorflow.contrib.learn.python.learn.estimators import estimator +from tensorflow.contrib.session_bundle import exporter +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import logging_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn +from tensorflow.python.ops import variables +from tensorflow.python.training import training + + +# TODO(zakaria): add functions that creates a head and returns ModelOpFn + + +def _regression_head(label_name=None, + weight_column_name=None, + target_dimension=1, + enable_centered_bias=False, head_name=None): + """Creates a _Head for linear regression. + + Args: + label_name: String, name of the key in label dict. Can be null if label + is a tensor (single headed models). + weight_column_name: A string defining feature column name representing + weights. It is used to down weight or boost examples during training. It + will be multiplied by the loss of the example. + target_dimension: dimension of the target for multilabels. + enable_centered_bias: A bool. If True, estimator will learn a centered + bias variable for each class. Rest of the model structure learns the + residual after centered bias. + head_name: name of the head. If provided, predictions, summary and metrics + keys will be prefixed by the head_name and an underscore. + + Returns: + An instance of _Head + """ + return _RegressionHead(train_loss_fn=_mean_squared_loss, + eval_loss_fn=_mean_squared_loss, + label_name=label_name, + weight_column_name=weight_column_name, + target_dimension=target_dimension, + enable_centered_bias=enable_centered_bias, + head_name=head_name) + +# TODO(zakaria): Add logistic_regression_head + + +def _multi_class_head(n_classes, label_name=None, weight_column_name=None, + enable_centered_bias=False, head_name=None, + thresholds=None): + """Creates a _Head for multi class single label classification. + + The Head uses softmax cross entropy loss. + + Args: + n_classes: Integer, number of classes, must be >= 2 + label_name: String, name of the key in label dict. Can be null if label + is a tensor (single headed models). + weight_column_name: A string defining feature column name representing + weights. It is used to down weight or boost examples during training. It + will be multiplied by the loss of the example. + enable_centered_bias: A bool. If True, estimator will learn a centered + bias variable for each class. Rest of the model structure learns the + residual after centered bias. + head_name: name of the head. If provided, predictions, summary and metrics + keys will be prefixed by the head_name and an underscore. + thresholds: thresholds for eval metrics, defaults to [.5] + + Returns: + An instance of _MultiClassHead. + + Raises: + ValueError: if n_classes is < 2 + """ + if n_classes < 2: + raise ValueError("n_classes must be > 1 for classification.") + if n_classes == 2: + loss_fn = _log_loss_with_two_classes + else: + loss_fn = _softmax_cross_entropy_loss + return _MultiClassHead(train_loss_fn=loss_fn, + eval_loss_fn=loss_fn, + n_classes=n_classes, + label_name=label_name, + weight_column_name=weight_column_name, + enable_centered_bias=enable_centered_bias, + head_name=head_name, + thresholds=thresholds) + + +def _binary_svm_head(label_name=None, weight_column_name=None, + enable_centered_bias=False, head_name=None, + thresholds=None,): + """Creates a _TargetColumn for binary classification with SVMs. + + The target column uses binary hinge loss. + + Args: + label_name: String, name of the key in label dict. Can be null if label + is a tensor (single headed models). + weight_column_name: A string defining feature column name representing + weights. It is used to down weight or boost examples during training. It + will be multiplied by the loss of the example. + enable_centered_bias: A bool. If True, estimator will learn a centered + bias variable for each class. Rest of the model structure learns the + residual after centered bias. + head_name: name of the head. If provided, predictions, summary and metrics + keys will be prefixed by the head_name and an underscore. + thresholds: thresholds for eval metrics, defaults to [.5] + + Returns: + An instance of _TargetColumn. + + """ + return _BinarySvmHead(label_name=label_name, + weight_column_name=weight_column_name, + enable_centered_bias=enable_centered_bias, + head_name=head_name, + thresholds=thresholds) + + +def _multi_label_head(n_classes, label_name=None, weight_column_name=None, + enable_centered_bias=False, head_name=None, + thresholds=None): + """Creates a _Head for multi label classification. + + The Head uses softmax cross entropy loss. + + Args: + n_classes: Integer, number of classes, must be >= 2 + label_name: String, name of the key in label dict. Can be null if label + is a tensor (single headed models). + weight_column_name: A string defining feature column name representing + weights. It is used to down weight or boost examples during training. It + will be multiplied by the loss of the example. + enable_centered_bias: A bool. If True, estimator will learn a centered + bias variable for each class. Rest of the model structure learns the + residual after centered bias. + head_name: name of the head. If provided, predictions, summary and metrics + keys will be prefixed by the head_name and an underscore. + thresholds: thresholds for eval metrics, defaults to [.5] + + Returns: + An instance of _MultiClassHead. + + Raises: + ValueError: if n_classes is < 2 + """ + if n_classes < 2: + raise ValueError("n_classes must be > 1 for classification.") + return _MultiLabelHead(n_classes=n_classes, + label_name=label_name, + weight_column_name=weight_column_name, + enable_centered_bias=enable_centered_bias, + head_name=head_name, + thresholds=thresholds) + + +# TODO(zakaria): Make the classes public once we are ready for users to subclass +# them. +class _Head(object): + """Interface for the head/top of a model. + + Given logits or output of a hidden layer, a Head knows how to compute + predictions, loss, default metric and export signature. + """ + __metaclass__ = abc.ABCMeta + + @abc.abstractproperty + def logits_dimension(self): + raise NotImplementedError("Calling an abstract method.") + + def head_ops(self, features, target, mode, train_op_fn, logits=None, + logits_input=None): + """Returns ops for a model_fn. + + Args: + features: input dict. + target: target dict or tensor. + mode: estimator's ModeKeys + train_op_fn: function that takes a scalar loss and returns an op to + optimize with the loss. + logits: logits to be used for the head. + logits_input: tensor to build logits from. + + Returns: + `estimator.ModelFnOps` + + Raises: + ValueError: if mode is not recognized. + """ + _check_logits_input_not_supported(logits, logits_input) + if mode == estimator.ModeKeys.TRAIN: + loss, additional_train_op = self._training_loss(features, target, + logits, logits_input) + + train_op = train_op_fn(loss) if train_op_fn else None + + if additional_train_op: + if train_op: + train_op = control_flow_ops.group(train_op, *additional_train_op) + else: + train_op = control_flow_ops.group(*additional_train_op) + + return estimator.ModelFnOps(None, loss, train_op, + self._default_metric(), + self._create_signature_fn()) + if mode == estimator.ModeKeys.INFER: + predictions = self._infer_op(logits, logits_input) + return estimator.ModelFnOps(predictions, None, None, + self._default_metric(), + self._create_signature_fn()) + if mode == estimator.ModeKeys.EVAL: + predictions, loss = self._eval_op(features, target, logits, logits_input) + return estimator.ModelFnOps(predictions, loss, None, + self._default_metric(), + self._create_signature_fn()) + raise ValueError("mode=%s unrecognized" % str(mode)) + + @abc.abstractmethod + def _training_loss(self, features, target, logits=None, logits_input=None, + name="training_loss"): + raise NotImplementedError("Calling an abstract method.") + + @abc.abstractmethod + def _infer_op(self, logits=None, logits_input=None, name="infer_op"): + raise NotImplementedError("Calling an abstract method.") + + @abc.abstractmethod + def _eval_op(self, features, target, logits=None, logits_input=None, + name="eval_op"): + raise NotImplementedError("Calling an abstract method.") + + @abc.abstractmethod + def _default_metric(self): + raise NotImplementedError("Calling an abstract method.") + + @abc.abstractmethod + def _create_signature_fn(self): + """Creates signature function for the Head. + """ + raise NotImplementedError("Calling an abstract method.") + + +class _RegressionHead(_Head): + """_Head for regression.""" + + def __init__(self, train_loss_fn, eval_loss_fn, label_name, + weight_column_name, target_dimension, enable_centered_bias, + head_name): + """Base type for all single heads. + + Args: + train_loss_fn: loss_fn for training. + eval_loss_fn: loss_fn for eval. + label_name: String, name of the key in label dict. Can be null if label + is a tensor (single headed models). + weight_column_name: A string defining feature column name representing + weights. It is used to down weight or boost examples during training. It + will be multiplied by the loss of the example. + target_dimension: Integer, number of label columns. + enable_centered_bias: A bool. If True, estimator will learn a centered + bias variable for each class. Rest of the model structure learns the + residual after centered bias. + head_name: name of the head. If provided, predictions, summary and metrics + keys will be prefixed by the head_name and an underscore. + """ + self._train_loss_fn = train_loss_fn + self._eval_loss_fn = eval_loss_fn + self._logits_dimension = target_dimension + self._label_name = label_name + self._weight_column_name = weight_column_name + self._head_name = head_name + self._enable_centered_bias = enable_centered_bias + self._centered_bias_weight_collection = _head_prefixed(head_name, + "centered_bias") + + @property + def logits_dimension(self): + return self._logits_dimension + + def _training_loss(self, features, target, logits=None, + logits_input=None, name="training_loss"): + """Returns training loss tensor for this head. + + Training loss is different from the loss reported on the tensorboard as we + should respect the example weights when computing the gradient. + + L = sum_{i} w_{i} * l_{i} / B + + where B is the number of examples in the batch, l_{i}, w_{i} are individual + losses, and example weight. + + Args: + features: features dict. + target: either a tensor for labels or in multihead case, a dict of string + to target tensor. + logits: logits, a float tensor. + logits_input: Output of last hidden layer. + name: Op name. + + Returns: + A tuple of training Loss and additional_train_op (possibly None) + """ + target = _check_target(target, self._label_name) + + centered_bias_step = None + if self._enable_centered_bias: + logits = nn.bias_add(logits, _centered_bias( + self.logits_dimension, + self._centered_bias_weight_collection)) + centered_bias_step = [_centered_bias_step( + self.logits_dimension, + self._centered_bias_weight_collection, + target, + self._train_loss_fn)] + + loss_unweighted = self._train_loss_fn(logits, target) + loss, weighted_average_loss = _loss( + loss_unweighted, + _weight_tensor(features, self._weight_column_name), + name=name) + logging_ops.scalar_summary(_head_prefixed(self._head_name, "loss"), + weighted_average_loss) + return loss, centered_bias_step + + def _eval_op(self, features, target, logits=None, logits_input=None, + name="eval_op"): + target = _check_target(target, self._label_name) + if self._enable_centered_bias: + logits = nn.bias_add(logits, _centered_bias( + self.logits_dimension, + self._centered_bias_weight_collection)) + loss_unweighted = self._eval_loss_fn(logits, target) + loss, _ = _loss(loss_unweighted, + _weight_tensor(features, self._weight_column_name), + name=name) + + predictions = self._logits_to_prediction(logits) + + return predictions, loss + + def _infer_op(self, logits=None, logits_input=None): + if self._enable_centered_bias: + logits = nn.bias_add(logits, _centered_bias( + self.logits_dimension, + self._centered_bias_weight_collection)) + return self._logits_to_prediction(logits) + + def _logits_to_prediction(self, logits=None): + predictions = {} + if self.logits_dimension == 1: + predictions[PedictionKey.SCORES] = array_ops.squeeze( + logits, squeeze_dims=[1]) + else: + predictions[PedictionKey.SCORES] = logits + return predictions + + # pylint: disable=undefined-variable + def _create_signature_fn(self): + def _regression_signature_fn(examples, unused_features, predictions): + if isinstance(predictions, dict): + score = predictions[PedictionKey.SCORES] + else: + score = predictions + + default_signature = exporter.regression_signature( + input_tensor=examples, output_tensor=score) + # TODO(zakaria): add validation + return default_signature, {} + return _regression_signature_fn + + def _default_metric(self): + return {_head_prefixed(self._head_name, MetricKey.LOSS): + _weighted_average_loss_metric_spec(self._eval_loss_fn, + PedictionKey.SCORES, + self._label_name, + self._weight_column_name)} + + +class _MultiClassHead(_Head): + """_Head for classification.""" + + def __init__(self, train_loss_fn, eval_loss_fn, n_classes, label_name, + weight_column_name, enable_centered_bias, head_name, + thresholds=None): + """Base type for all single heads. + + Args: + train_loss_fn: loss_fn for training. + eval_loss_fn: loss_fn for eval. + n_classes: number of classes. + label_name: String, name of the key in label dict. Can be null if label + is a tensor (single headed models). + weight_column_name: A string defining feature column name representing + weights. It is used to down weight or boost examples during training. It + will be multiplied by the loss of the example. + enable_centered_bias: A bool. If True, estimator will learn a centered + bias variable for each class. Rest of the model structure learns the + residual after centered bias. + head_name: name of the head. If provided, predictions, summary and metrics + keys will be prefixed by the head_name and an underscore. + thresholds: thresholds for eval. + + Raises: + ValueError: if n_classes is invalid. + """ + if n_classes < 2: + raise ValueError("n_classes must be >= 2") + self._thresholds = thresholds if thresholds else [.5] + + self._train_loss_fn = train_loss_fn + self._eval_loss_fn = eval_loss_fn + self._logits_dimension = 1 if n_classes == 2 else n_classes + self._label_name = label_name + self._weight_column_name = weight_column_name + self._head_name = head_name + self._enable_centered_bias = enable_centered_bias + self._centered_bias_weight_collection = _head_prefixed(head_name, + "centered_bias") + + @property + def logits_dimension(self): + return self._logits_dimension + + def _training_loss(self, features, target, logits=None, + logits_input=None, name="training_loss"): + """Returns training loss tensor for this head. + + Training loss is different from the loss reported on the tensorboard as we + should respect the example weights when computing the gradient. + + L = sum_{i} w_{i} * l_{i} / B + + where B is the number of examples in the batch, l_{i}, w_{i} are individual + losses, and example weight. + + Args: + features: features dict. + target: either a tensor for labels or in multihead case, a dict of string + to target tensor. + logits: logits, a float tensor. + logits_input: Output of last hidden layer. + name: Op name. + + Returns: + A tuple of training Loss and additional_train_op (possibly None) + """ + target = _check_target(target, self._label_name) + + centered_bias_step = None + if self._enable_centered_bias: + logits = nn.bias_add(logits, _centered_bias( + self.logits_dimension, + self._centered_bias_weight_collection)) + centered_bias_step = [_centered_bias_step( + self.logits_dimension, + self._centered_bias_weight_collection, + target, + self._train_loss_fn)] + + loss_unweighted = self._train_loss_fn(logits, target) + loss, weighted_average_loss = _loss( + loss_unweighted, + _weight_tensor(features, self._weight_column_name), + name=name) + logging_ops.scalar_summary(_head_prefixed(self._head_name, "loss"), + weighted_average_loss) + return loss, centered_bias_step + + def _eval_op(self, features, target, logits=None, logits_input=None, + name="eval_op"): + target = _check_target(target, self._label_name) + if self._enable_centered_bias: + logits = nn.bias_add(logits, _centered_bias( + self.logits_dimension, + self._centered_bias_weight_collection)) + loss_unweighted = self._eval_loss_fn(logits, target) + loss, _ = _loss(loss_unweighted, + _weight_tensor(features, self._weight_column_name), + name=name) + + predictions = self._logits_to_prediction(logits) + + return predictions, loss + + def _infer_op(self, logits=None, logits_input=None): + if self._enable_centered_bias: + logits = nn.bias_add(logits, _centered_bias( + self.logits_dimension, + self._centered_bias_weight_collection)) + return self._logits_to_prediction(logits) + + def _logits_to_prediction(self, logits=None): + predictions = {PedictionKey.LOGITS: logits} + if self.logits_dimension == 1: + predictions[PedictionKey.LOGISTIC] = math_ops.sigmoid(logits) + logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) + predictions[PedictionKey.PROBABILITIES] = nn.softmax(logits) + # Workaround for argmax dropping the second demension. + predictions[PedictionKey.CLASSES] = array_ops.expand_dims( + math_ops.argmax(logits, 1), 1) + return predictions + + def _create_signature_fn(self): + """See superclass.""" + def _classification_signature_fn(examples, unused_features, predictions): + """Servo signature function.""" + if isinstance(predictions, dict): + default_signature = exporter.classification_signature( + input_tensor=examples, + classes_tensor=predictions[PedictionKey.CLASSES], + scores_tensor=predictions[PedictionKey.PROBABILITIES]) + else: + default_signature = exporter.classification_signature( + input_tensor=examples, + scores_tensor=predictions) + + # TODO(zakaria): add validation + return default_signature, {} + return _classification_signature_fn + + def _default_metric(self): + metrics = {_head_prefixed(self._head_name, MetricKey.LOSS): + _weighted_average_loss_metric_spec(self._eval_loss_fn, + PedictionKey.LOGITS, + self._label_name, + self._weight_column_name)} + + # TODO(b/29366811): This currently results in both an "accuracy" and an + # "accuracy/threshold_0.500000_mean" metric for binary classification. + metrics[_head_prefixed(self._head_name, MetricKey.ACCURACY)] = ( + metric_spec.MetricSpec(metrics_lib.streaming_accuracy, + PedictionKey.CLASSES, self._label_name, + self._weight_column_name)) + if self.logits_dimension == 1: + def _add_binary_metric(metric_key, metric_fn): + metrics[_head_prefixed(self._head_name, metric_key)] = ( + metric_spec.MetricSpec(metric_fn, + PedictionKey.LOGISTIC, + self._label_name)) + _add_binary_metric(MetricKey.PREDICTION_MEAN, _predictions_streaming_mean) + _add_binary_metric(MetricKey.TARGET_MEAN, _target_streaming_mean) + + # Also include the streaming mean of the label as an accuracy baseline, as + # a reminder to users. + _add_binary_metric(MetricKey.ACCURACY_BASELINE, _target_streaming_mean) + + _add_binary_metric(MetricKey.AUC, _streaming_auc) + + for threshold in self._thresholds: + _add_binary_metric(MetricKey.ACCURACY_MEAN % threshold, + _accuracy_at_threshold(threshold)) + # Precision for positive examples. + _add_binary_metric(MetricKey.PRECISION_MEAN % threshold, + _streaming_at_threshold( + metrics_lib.streaming_precision_at_thresholds, + threshold),) + # Recall for positive examples. + _add_binary_metric(MetricKey.RECALL_MEAN % threshold, + _streaming_at_threshold( + metrics_lib.streaming_recall_at_thresholds, + threshold)) + return metrics + + +def _check_target(target, label_name): + target = target[label_name] if isinstance(target, dict) else target + if isinstance(target, ops.SparseTensor): + raise ValueError("SparseTensor is not supported as a target/label.") + return target + + +class _BinarySvmHead(_MultiClassHead): + """_Head for binary classification using SVMs.""" + + def __init__(self, label_name, weight_column_name, enable_centered_bias, + head_name, thresholds): + def loss_fn(logits, target): + check_shape_op = control_flow_ops.Assert( + math_ops.less_equal(array_ops.rank(target), 2), + ["target's shape should be either [batch_size, 1] or [batch_size]"]) + with ops.control_dependencies([check_shape_op]): + target = array_ops.reshape( + target, shape=[array_ops.shape(target)[0], 1]) + return losses.hinge_loss(logits, target) + + super(_BinarySvmHead, self).__init__( + train_loss_fn=loss_fn, + eval_loss_fn=loss_fn, + n_classes=2, + label_name=label_name, + weight_column_name=weight_column_name, + enable_centered_bias=enable_centered_bias, + head_name=head_name, + thresholds=thresholds) + + def _logits_to_prediction(self, logits=None): + predictions = {} + # Workaround for argmax dropping the second demension. + predictions[PedictionKey.LOGITS] = array_ops.expand_dims( + math_ops.argmax(logits, 1), 1) + logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) + predictions[PedictionKey.CLASSES] = array_ops.expand_dims( + math_ops.argmax(logits, 1), 1) + + return predictions + + +class _MultiLabelHead(_MultiClassHead): + """_Head for multlabel classification.""" + + # TODO(zakaria): add signature and metric for multilabel. + def __init__(self, n_classes, label_name, + weight_column_name, enable_centered_bias, head_name, + thresholds): + + super(_MultiLabelHead, self).__init__( + train_loss_fn=_sigmoid_cross_entropy_loss, + eval_loss_fn=_sigmoid_cross_entropy_loss, + n_classes=n_classes, + label_name=label_name, + weight_column_name=weight_column_name, + enable_centered_bias=enable_centered_bias, + head_name=head_name, + thresholds=thresholds) + + def _logits_to_prediction(self, logits=None): + predictions = {PedictionKey.LOGITS: logits} + if self.logits_dimension == 1: + predictions[PedictionKey.LOGISTIC] = math_ops.sigmoid(logits) + logits = array_ops.concat(1, [array_ops.zeros_like(logits), logits]) + predictions[PedictionKey.PROBABILITIES] = math_ops.sigmoid(logits) + # Workaround for argmax dropping the second demension. + predictions[PedictionKey.CLASSES] = math_ops.to_int64( + math_ops.greater(logits, 0)) + return predictions + + +def _weighted_loss(loss, weight): + """Returns cumulative weighted loss.""" + unweighted_loss = array_ops.reshape(loss, shape=(-1,)) + weighted_loss = math_ops.mul(unweighted_loss, + array_ops.reshape( + weight, shape=(-1,))) + return weighted_loss + + +def _weight_tensor(features, weight_column_name): + if not weight_column_name: + return None + else: + return array_ops.reshape( + math_ops.to_float(features[weight_column_name]), + shape=(-1,)) + + +def _loss(loss_unweighted, weight, name): + """Returns loss.""" + if weight is None: + loss = math_ops.reduce_mean(loss_unweighted, name=name) + return loss, loss + else: + loss_weighted = _weighted_loss(loss_unweighted, weight) + weighted_average_loss = math_ops.div( + math_ops.reduce_sum(loss_weighted), + math_ops.to_float(math_ops.reduce_sum(weight)), + name="weighted_average_loss") + loss = math_ops.reduce_mean(loss_weighted, name=name) + return loss, weighted_average_loss + + +def _check_logits_input_not_supported(logits, logits_input): + if logits_input is not None or logits is None: + raise NotImplementedError("logits_input is not supported yet, " + "must pass logits") + + +def _centered_bias(logits_dimension, weight_collection): + """Creates and returns centered bias.""" + centered_bias = variables.Variable( + array_ops.zeros([logits_dimension]), + collections=[weight_collection, ops.GraphKeys.VARIABLES], + name="centered_bias_weight") + logging_ops.scalar_summary( + ["centered_bias_%d" % cb for cb in range(logits_dimension)], + array_ops.reshape(centered_bias, [-1])) + return centered_bias + + +def _centered_bias_step(logits_dimension, weight_collection, target, + train_loss_fn): + """Creates and returns training op for centered bias.""" + centered_bias = ops.get_collection(weight_collection) + batch_size = array_ops.shape(target)[0] + logits = array_ops.reshape( + array_ops.tile(centered_bias[0], [batch_size]), + [batch_size, logits_dimension]) + with ops.name_scope(None, "centered_bias", (target, logits)): + centered_bias_loss = math_ops.reduce_mean( + train_loss_fn(logits, target), name="training_loss") + # Learn central bias by an optimizer. 0.1 is a convervative lr for a + # single variable. + return training.AdagradOptimizer(0.1).minimize( + centered_bias_loss, var_list=centered_bias) + + +def _head_prefixed(head_name, val): + return "%s_%s" % (head_name, val) if head_name else val + + +# TODO(zakaria): use contrib losses. +def _mean_squared_loss(logits, target): + # To prevent broadcasting inside "-". + if len(target.get_shape()) == 1: + target = array_ops.expand_dims(target, dim=[1]) + # TODO(zakaria): make sure it does not recreate the broadcast bug. + if len(logits.get_shape()) == 1: + logits = array_ops.expand_dims(logits, dim=[1]) + logits.get_shape().assert_is_compatible_with(target.get_shape()) + return math_ops.square(logits - math_ops.to_float(target)) + + +def _log_loss_with_two_classes(logits, target): + # sigmoid_cross_entropy_with_logits requires [batch_size, 1] target. + if len(target.get_shape()) == 1: + target = array_ops.expand_dims(target, dim=[1]) + loss_vec = nn.sigmoid_cross_entropy_with_logits(logits, + math_ops.to_float(target)) + return loss_vec + + +def _softmax_cross_entropy_loss(logits, target): + # Check that we got int32/int64 for classification. + if (not target.dtype.is_compatible_with(dtypes.int64) and + not target.dtype.is_compatible_with(dtypes.int32)): + raise ValueError("Target's dtype should be int32, int64 or compatible. " + "Instead got %s." % target.dtype) + # sparse_softmax_cross_entropy_with_logits requires [batch_size] target. + if len(target.get_shape()) == 2: + target = array_ops.squeeze(target, squeeze_dims=[1]) + loss_vec = nn.sparse_softmax_cross_entropy_with_logits(logits, target) + return loss_vec + + +def _sigmoid_cross_entropy_loss(logits, target): + # sigmoid_cross_entropy_with_logits requires [batch_size, n_classes] target. + return nn.sigmoid_cross_entropy_with_logits(logits, math_ops.to_float(target)) + + +def _float_weights_or_none(weights): + if weights is None: + return None + return math_ops.to_float(weights) + + +def _weighted_average_loss_metric_spec(loss_fn, predictoin_key, + label_key, weight_key): + def _streaming_weighted_average_loss(predictions, target, weights=None): + loss_unweighted = loss_fn(predictions, target) + _, weighted_average_loss = _loss(loss_unweighted, + weights, + name="eval_loss") + return metrics_lib.streaming_mean(weighted_average_loss) + return metric_spec.MetricSpec(_streaming_weighted_average_loss, + predictoin_key, label_key, weight_key) + + +def _target_streaming_mean(unused_predictions, target, weights=None): + return metrics_lib.streaming_mean(target, weights=weights) + + +def _predictions_streaming_mean(predictions, unused_target, weights=None): + return metrics_lib.streaming_mean(predictions, weights=weights) + + +def _streaming_auc(predictions, target, weights=None): + return metrics_lib.streaming_auc(predictions, target, + weights=_float_weights_or_none(weights)) + + +def _accuracy_at_threshold(threshold): + + def _accuracy_metric(predictions, target, weights=None): + threshold_predictions = math_ops.to_float( + math_ops.greater_equal(predictions, threshold)) + return metrics_lib.streaming_accuracy(predictions=threshold_predictions, + labels=target, + weights=weights) + + return _accuracy_metric + + +def _streaming_at_threshold(streaming_metrics_fn, threshold): + + def _streaming_metrics(predictions, target, weights=None): + precision_tensor, update_op = streaming_metrics_fn( + predictions, labels=target, thresholds=[threshold], + weights=_float_weights_or_none(weights)) + return array_ops.squeeze(precision_tensor), update_op + + return _streaming_metrics + + +# PedictionKey.CLASSES +class PedictionKey(object): + CLASSES = "classes" + PROBABILITIES = "probabilities" + LOGITS = "logits" + LOGISTIC = "logistic" + SCORES = "scores" + + +class MetricKey(object): + LOSS = "loss" + AUC = "auc" + PREDICTION_MEAN = "labels/prediction_mean" + TARGET_MEAN = "labels/actual_target_mean" + ACCURACY = "accuracy" + ACCURACY_BASELINE = "accuracy/baseline_target_mean" + ACCURACY_MEAN = "accuracy/threshold_%f_mean" + PRECISION_MEAN = "precision/positive_threshold_%f_mean" + RECALL_MEAN = "recall/positive_threshold_%f_mean" diff --git a/tensorflow/contrib/learn/python/learn/estimators/head_test.py b/tensorflow/contrib/learn/python/learn/estimators/head_test.py new file mode 100644 index 00000000000..dcb292905c7 --- /dev/null +++ b/tensorflow/contrib/learn/python/learn/estimators/head_test.py @@ -0,0 +1,174 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for head.py.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from tensorflow.contrib.learn.python.learn.estimators import head as head_lib + + +class RegressionModelHeadTest(tf.test.TestCase): + + # TODO(zakaria): test multilabel regresssion. + def testRegression(self): + head = head_lib._regression_head() + with tf.Graph().as_default(), tf.Session() as sess: + prediction = tf.constant([[1.], [1.], [3.]]) + targets = tf.constant([[0.], [1.], [1.]]) + model_fn_ops = head.head_ops({}, targets, + tf.contrib.learn.ModeKeys.TRAIN, + None, logits=prediction) + self.assertAlmostEqual(5. / 3, sess.run(model_fn_ops.loss)) + + def testRegressionWithWeights(self): + head = head_lib._regression_head( + weight_column_name="label_weight") + with tf.Graph().as_default(), tf.Session() as sess: + features = {"label_weight": tf.constant([[2.], [5.], [0.]])} + prediction = tf.constant([[1.], [1.], [3.]]) + targets = tf.constant([[0.], [1.], [1.]]) + model_fn_ops = head.head_ops(features, targets, + tf.contrib.learn.ModeKeys.TRAIN, + None, logits=prediction) + self.assertAlmostEqual(2. / 3, sess.run(model_fn_ops.loss), places=3) + + def testErrorInSparseTensorTarget(self): + head = head_lib._regression_head() + with tf.Graph().as_default(): + prediction = tf.constant([[1.], [1.], [3.]]) + targets = tf.SparseTensor( + indices=tf.constant([[0, 0], [1, 0], [2, 0]], dtype=tf.int64), + values=tf.constant([0., 1., 1.]), + shape=[3, 1]) + with self.assertRaisesRegexp( + ValueError, "SparseTensor is not supported as a target"): + head.head_ops({}, targets, tf.contrib.learn.ModeKeys.TRAIN, None, + logits=prediction) + + +class MultiClassModelHeadTest(tf.test.TestCase): + + def testBinaryClassification(self): + head = head_lib._multi_class_head(n_classes=2) + with tf.Graph().as_default(), tf.Session() as sess: + logits = tf.constant([[1.], [1.]]) + targets = tf.constant([[1.], [0.]]) + # logloss: z:label, x:logit + # z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x)) + model_fn_ops = head.head_ops({}, targets, + tf.contrib.learn.ModeKeys.TRAIN, + None, logits=logits) + self.assertAlmostEqual(.81326163, sess.run(model_fn_ops.loss)) + + def testErrorInSparseTensorTarget(self): + head = head_lib._multi_class_head(n_classes=2) + with tf.Graph().as_default(): + prediction = tf.constant([[1.], [1.], [3.]]) + targets = tf.SparseTensor( + indices=tf.constant([[0, 0], [1, 0], [2, 0]], dtype=tf.int64), + values=tf.constant([0, 1, 1]), + shape=[3, 1]) + with self.assertRaisesRegexp( + ValueError, "SparseTensor is not supported as a target"): + head.head_ops({}, targets, tf.contrib.learn.ModeKeys.TRAIN, None, + logits=prediction) + + def testBinaryClassificationWithWeights(self): + head = head_lib._multi_class_head( + n_classes=2, weight_column_name="label_weight") + with tf.Graph().as_default(), tf.Session() as sess: + features = {"label_weight": tf.constant([[1.], [0.]])} + logits = tf.constant([[1.], [1.]]) + targets = tf.constant([[1.], [0.]]) + # logloss: z:label, x:logit + # z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x)) + model_fn_ops = head.head_ops(features, targets, + tf.contrib.learn.ModeKeys.TRAIN, + None, logits=logits) + self.assertAlmostEqual(.31326166 / 2, sess.run(model_fn_ops.loss)) + + def testMultiClass(self): + head = head_lib._multi_class_head(n_classes=3) + with tf.Graph().as_default(), tf.Session() as sess: + logits = tf.constant([[1., 0., 0.]]) + targets = tf.constant([2]) + # logloss: z:label, x:logit + # z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x)) + model_fn_ops = head.head_ops({}, targets, + tf.contrib.learn.ModeKeys.TRAIN, + None, logits=logits) + self.assertAlmostEqual(1.5514446, sess.run(model_fn_ops.loss)) + + def testMultiClassWithWeight(self): + head = head_lib._multi_class_head( + n_classes=3, weight_column_name="label_weight") + with tf.Graph().as_default(), tf.Session() as sess: + features = {"label_weight": tf.constant([0.1])} + logits = tf.constant([[1., 0., 0.]]) + targets = tf.constant([2]) + # logloss: z:label, x:logit + # z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x)) + model_fn_ops = head.head_ops(features, targets, + tf.contrib.learn.ModeKeys.TRAIN, + None, logits=logits) + self.assertAlmostEqual(.15514446, sess.run(model_fn_ops.loss)) + + def testMultiClassWithInvalidNClass(self): + try: + head_lib._multi_class_head(n_classes=1) + self.fail("Softmax with no n_classes did not raise error.") + except ValueError: + # Expected + pass + + +class BinarySvmModelHeadTest(tf.test.TestCase): + + def testBinarySVMDefaultWeights(self): + head = head_lib._binary_svm_head() + predictions = tf.constant([[-0.5], [1.2]]) + targets = tf.constant([0, 1]) + model_fn_ops = head.head_ops({}, targets, + tf.contrib.learn.ModeKeys.TRAIN, + None, logits=predictions) + # Prediction for first example is in the right side of the hyperplane (i.e., + # < 0) but it is within the [-1,1] margin. There is a 0.5 loss incurred by + # this example. The 2nd prediction is outside the margin so it incurs no + # loss at all. The overall (normalized) loss is therefore 0.5/(1+1) = 0.25. + with tf.Session() as sess: + self.assertAlmostEqual(0.25, sess.run(model_fn_ops.loss)) + + def testBinarySVMWithWeights(self): + head = head_lib._binary_svm_head( + weight_column_name="weights") + predictions = tf.constant([[-0.7], [0.2]]) + targets = tf.constant([0, 1]) + features = {"weights": tf.constant([2.0, 10.0])} + model_fn_ops = head.head_ops(features, targets, + tf.contrib.learn.ModeKeys.TRAIN, + None, logits=predictions) + # Prediction for both examples are in the right side of the hyperplane but + # within the margin. The (weighted) loss incurred is 2*0.3=0.6 and 10*0.8=8 + # respectively. The overall (normalized) loss is therefore 8.6/12. + with tf.Session() as sess: + self.assertAlmostEqual(8.6 / 2, sess.run(model_fn_ops.loss), places=3) + + +if __name__ == "__main__": + tf.test.main() diff --git a/tensorflow/contrib/learn/python/learn/estimators/linear.py b/tensorflow/contrib/learn/python/learn/estimators/linear.py index e7ed35712a9..f4d1fb977af 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/linear.py +++ b/tensorflow/contrib/learn/python/learn/estimators/linear.py @@ -34,7 +34,6 @@ from tensorflow.contrib.framework.python.ops import variables as contrib_variabl from tensorflow.contrib.layers.python.layers import target_column from tensorflow.contrib.learn.python.learn import evaluable from tensorflow.contrib.learn.python.learn import metric_spec -from tensorflow.contrib.learn.python.learn import session_run_hook from tensorflow.contrib.learn.python.learn import trainable from tensorflow.contrib.learn.python.learn.estimators import dnn_linear_combined from tensorflow.contrib.learn.python.learn.estimators import estimator @@ -53,12 +52,18 @@ from tensorflow.python.ops import nn from tensorflow.python.ops import partitioned_variables from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables +from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.training import session_run_hook from tensorflow.python.training import training as train _CLASSES = "classes" _LOGISTIC = "logistic" _PROBABILITIES = "probabilities" +# The default learning rate of 0.2 is a historical artifact of the initial +# implementation, but seems a reasonable choice. +_LEARNING_RATE = 0.2 + def _get_metric_args(metric): if hasattr(metric, "__code__"): @@ -86,7 +91,7 @@ def _wrap_metric(metric): def _get_optimizer(spec): if isinstance(spec, six.string_types): return layers.OPTIMIZER_CLS_NAMES[spec]( - learning_rate=0.2) + learning_rate=_LEARNING_RATE) elif callable(spec): return spec() return spec @@ -171,10 +176,45 @@ def _weighted_loss(loss, weight_tensor): def _linear_classifier_model_fn(features, targets, mode, params): - """Estimator's linear model_fn.""" + """Linear classifier model_fn. + + Args: + features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). + targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of + dtype `int32` or `int64` in the range `[0, n_classes)`. + mode: Defines whether this is training, evaluation or prediction. + See `ModeKeys`. + params: A dict of hyperparameters. + The following hyperparameters are expected: + * feature_columns: An iterable containing all the feature columns used by + the model. + * n_classes: number of target classes. + * weight_column_name: A string defining the weight feature column, or + None if there are no weights. + * optimizer: string, `Optimizer` object, or callable that defines the + optimizer to use for training. + * gradient_clip_norm: A float > 0. If provided, gradients are + clipped to their global norm with this clipping ratio. + * enable_centered_bias: A bool. If True, estimator will learn a centered + bias variable for each class. Rest of the model structure learns the + residual after centered bias. + * num_ps_replicas: The number of parameter server replicas. + * joint_weights: If True, the weights for all columns will be stored in a + single (possibly partitioned) variable. It's more efficient, but it's + incompatible with SDCAOptimizer, and requires all feature columns are + sparse and use the 'sum' combiner. + + Returns: + predictions: A dict of `Tensor` objects. + loss: A scalar containing the loss of the step. + train_op: The op for training. + + Raises: + ValueError: If mode is not any of the `ModeKeys`. + """ + feature_columns = params["feature_columns"] n_classes = params["n_classes"] weight_column_name = params["weight_column_name"] - feature_columns = params["feature_columns"] optimizer = params["optimizer"] gradient_clip_norm = params.get("gradient_clip_norm", None) enable_centered_bias = params.get("enable_centered_bias", True) @@ -184,25 +224,24 @@ def _linear_classifier_model_fn(features, targets, mode, params): if not isinstance(features, dict): features = {"": features} + parent_scope = "linear" num_label_columns = 1 if n_classes == 2 else n_classes loss_fn = _softmax_cross_entropy_loss if n_classes == 2: loss_fn = _log_loss_with_two_classes - feat_values = (features.values() if isinstance(features, dict) - else [features]) partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_op_scope( - feat_values, "linear", partitioner=partitioner) as scope: + features.values(), parent_scope, partitioner=partitioner) as scope: if joint_weights: logits, _, _ = ( layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, num_outputs=num_label_columns, - weight_collections=["linear"], + weight_collections=[parent_scope], scope=scope)) else: logits, _, _ = ( @@ -210,7 +249,7 @@ def _linear_classifier_model_fn(features, targets, mode, params): columns_to_tensors=features, feature_columns=feature_columns, num_outputs=num_label_columns, - weight_collections=["linear"], + weight_collections=[parent_scope], scope=scope)) if enable_centered_bias: @@ -252,11 +291,39 @@ def _linear_classifier_model_fn(features, targets, mode, params): def sdca_classifier_model_fn(features, targets, mode, params): - """Estimator's linear model_fn.""" + """Linear classifier model_fn that uses the SDCA optimizer. + + Args: + features: A dict of `Tensor` keyed by column name. + targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of + dtype `int32` or `int64` in the range `[0, n_classes)`. + mode: Defines whether this is training, evaluation or prediction. + See `ModeKeys`. + params: A dict of hyperparameters. + The following hyperparameters are expected: + * feature_columns: An iterable containing all the feature columns used by + the model. + * optimizer: An `SDCAOptimizer` instance. + * weight_column_name: A string defining the weight feature column, or + None if there are no weights. + * loss_type: A string. Must be either "logistic_loss" or "hinge_loss". + * update_weights_hook: A `SessionRunHook` object or None. Used to update + model weights. + + Returns: + predictions: A dict of `Tensor` objects. + loss: A scalar containing the loss of the step. + train_op: The op for training. + + Raises: + ValueError: If `optimizer` is not an `SDCAOptimizer` instance. + ValueError: If mode is not any of the `ModeKeys`. + """ feature_columns = params["feature_columns"] optimizer = params["optimizer"] weight_column_name = params["weight_column_name"] loss_type = params["loss_type"] + update_weights_hook = params.get("update_weights_hook") if not isinstance(optimizer, sdca_optimizer.SDCAOptimizer): raise ValueError("Optimizer must be of type SDCAOptimizer") @@ -283,9 +350,12 @@ def sdca_classifier_model_fn(features, targets, mode, params): train_op = None if mode == estimator.ModeKeys.TRAIN: global_step = contrib_variables.get_global_step() - train_op = optimizer.get_train_step( - columns_to_variables, weight_column_name, loss_type, features, - targets, global_step) + sdca_model, train_op = optimizer.get_train_step(columns_to_variables, + weight_column_name, + loss_type, features, + targets, global_step) + if update_weights_hook is not None: + update_weights_hook.set_parameters(sdca_model, train_op) predictions = {} predictions[_LOGISTIC] = math_ops.sigmoid(logits) @@ -298,10 +368,32 @@ def sdca_classifier_model_fn(features, targets, mode, params): # Ensures consistency with LinearComposableModel. def _get_default_optimizer(feature_columns): - learning_rate = min(0.2, 1.0 / math.sqrt(len(feature_columns))) + learning_rate = min(_LEARNING_RATE, 1.0 / math.sqrt(len(feature_columns))) return train.FtrlOptimizer(learning_rate=learning_rate) +class _SdcaUpdateWeightsHook(session_run_hook.SessionRunHook): + """SessionRunHook to update and shrink SDCA model weights.""" + + def __init__(self): + pass + + def set_parameters(self, sdca_model, train_op): + self._sdca_model = sdca_model + self._train_op = train_op + + def begin(self): + """Construct the update_weights op. + + The op is implicitly added to the default graph. + """ + self._update_op = self._sdca_model.update_weights(self._train_op) + + def before_run(self, run_context): + """Return the update_weights op so that it is executed during this run.""" + return session_run_hook.SessionRunArgs(self._update_op) + + class LinearClassifier(evaluable.Evaluable, trainable.Trainable): """Linear classifier model. @@ -431,15 +523,23 @@ class LinearClassifier(evaluable.Evaluable, trainable.Trainable): self._optimizer = _get_optimizer(optimizer) num_ps_replicas = config.num_ps_replicas if config else 0 + chief_hook = None if isinstance(optimizer, sdca_optimizer.SDCAOptimizer): assert not _joint_weight, ("_joint_weight is incompatible with the" " SDCAOptimizer") model_fn = sdca_classifier_model_fn + # We use a hook to perform the weight update and shrink step only on the + # chief. Because the SdcaModel constructed by the estimator within the + # call to fit() but we need to pass the hook to fit(), we pass the hook + # as a parameter to the model_fn and have that propagate the model to the + # hook. + chief_hook = _SdcaUpdateWeightsHook() params = { "feature_columns": feature_columns, "optimizer": self._optimizer, "weight_column_name": weight_column_name, "loss_type": "logistic_loss", + "update_weights_hook": chief_hook, } else: model_fn = _linear_classifier_model_fn @@ -461,6 +561,10 @@ class LinearClassifier(evaluable.Evaluable, trainable.Trainable): params=params, feature_engineering_fn=feature_engineering_fn) + self._additional_run_hook = None + if self._estimator.config.is_chief: + self._additional_run_hook = chief_hook + def get_estimator(self): return self._estimator @@ -468,22 +572,24 @@ class LinearClassifier(evaluable.Evaluable, trainable.Trainable): monitors=None, max_steps=None): """See trainable.Trainable.""" # TODO(roumposg): Remove when deprecated monitors are removed. - if monitors is not None: - deprecated_monitors = [ - m for m in monitors - if not isinstance(m, session_run_hook.SessionRunHook) - ] - for monitor in deprecated_monitors: - monitor.set_estimator(self) - monitor._lock_estimator() # pylint: disable=protected-access + if monitors is None: + monitors = [] + deprecated_monitors = [ + m for m in monitors + if not isinstance(m, session_run_hook.SessionRunHook) + ] + for monitor in deprecated_monitors: + monitor.set_estimator(self) + monitor._lock_estimator() # pylint: disable=protected-access + if self._additional_run_hook: + monitors.append(self._additional_run_hook) result = self._estimator.fit(x=x, y=y, input_fn=input_fn, steps=steps, batch_size=batch_size, monitors=monitors, max_steps=max_steps) - if monitors is not None: - for monitor in deprecated_monitors: - monitor._unlock_estimator() # pylint: disable=protected-access + for monitor in deprecated_monitors: + monitor._unlock_estimator() # pylint: disable=protected-access return result @@ -712,6 +818,12 @@ class LinearRegressor(dnn_linear_combined.DNNLinearCombinedRegressor): if enable_centered_bias is None: enable_centered_bias = True dnn_linear_combined._changing_default_center_bias() # pylint: disable=protected-access + + if isinstance(optimizer, sdca_optimizer.SDCAOptimizer): + enable_centered_bias = False + logging.warning("centered_bias is not supported with SDCA, " + "please disable it explicitly.") + self._weight_column_name = weight_column_name self._joint_weights = _joint_weights super(LinearRegressor, self).__init__( model_dir=model_dir, @@ -737,20 +849,22 @@ class LinearRegressor(dnn_linear_combined.DNNLinearCombinedRegressor): layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=self._linear_feature_columns, - num_outputs=self._target_column.num_label_columns, + num_outputs=self._head.logits_dimension, weight_collections=[self._linear_model.get_scope_name()], scope=self._linear_model.get_scope_name())) - with ops.control_dependencies([self._centered_bias()]): - loss = self._target_column.loss(logits, targets, features) - logging_ops.scalar_summary("loss", loss) + _add_bias_column(self._linear_feature_columns, features, bias, targets, + columns_to_variables) - _add_bias_column(self._linear_feature_columns, features, bias, targets, - columns_to_variables) + def _train_op_fn(unused_loss): + sdca_model, train_op = self._linear_optimizer.get_train_step( + columns_to_variables, self._weight_column_name, + self._loss_type(), features, targets, global_step) + return sdca_model.update_weights(train_op) - train_op = self._linear_optimizer.get_train_step( - columns_to_variables, self._target_column.weight_column_name, - self._loss_type(), features, targets, global_step) - return train_op, loss + model_fn_ops = self._head.head_ops(features, targets, + estimator.ModeKeys.TRAIN, _train_op_fn, + logits=logits) + return model_fn_ops.training_op, model_fn_ops.loss def _loss_type(self): return "squared_loss" diff --git a/tensorflow/contrib/learn/python/learn/estimators/linear_test.py b/tensorflow/contrib/learn/python/learn/estimators/linear_test.py index 3156b86970e..d19ae78951b 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/linear_test.py +++ b/tensorflow/contrib/learn/python/learn/estimators/linear_test.py @@ -257,10 +257,11 @@ class LinearClassifierTest(tf.test.TestCase): def testCustomMetrics(self): """Tests custom evaluation metrics.""" - def _input_fn_train(): + def _input_fn(num_epochs=None): # Create 4 rows, one of them (y = x), three of them (y=Not(x)) target = tf.constant([[1], [0], [0], [0]], dtype=tf.float32) - features = {'x': tf.ones(shape=[4, 1], dtype=tf.float32)} + features = {'x': tf.train.limit_epochs( + tf.ones(shape=[4, 1], dtype=tf.float32), num_epochs=num_epochs)} return features, target def _my_metric_op(predictions, targets): @@ -272,9 +273,9 @@ class LinearClassifierTest(tf.test.TestCase): classifier = tf.contrib.learn.LinearClassifier( feature_columns=[tf.contrib.layers.real_valued_column('x')]) - classifier.fit(input_fn=_input_fn_train, steps=100) + classifier.fit(input_fn=_input_fn, steps=100) scores = classifier.evaluate( - input_fn=_input_fn_train, + input_fn=_input_fn, steps=100, metrics={ 'my_accuracy': MetricSpec( @@ -289,7 +290,8 @@ class LinearClassifierTest(tf.test.TestCase): self.assertTrue( set(['loss', 'my_accuracy', 'my_precision', 'my_metric' ]).issubset(set(scores.keys()))) - predictions = classifier.predict(input_fn=_input_fn_train) + predict_input_fn = functools.partial(_input_fn, num_epochs=1) + predictions = np.array(list(classifier.predict(input_fn=predict_input_fn))) self.assertEqual(_sklearn.accuracy_score([1, 0, 0, 0], predictions), scores['my_accuracy']) @@ -297,14 +299,14 @@ class LinearClassifierTest(tf.test.TestCase): # "probabilities". with self.assertRaises(ValueError): classifier.evaluate( - input_fn=_input_fn_train, + input_fn=_input_fn, steps=100, metrics={('bad_name', 'bad_type'): tf.contrib.metrics.streaming_auc}) # Test the case where the tuple of the key doesn't have 2 elements. with self.assertRaises(ValueError): classifier.evaluate( - input_fn=_input_fn_train, + input_fn=_input_fn, steps=100, metrics={ ('bad_length_name', 'classes', 'bad_length'): @@ -987,10 +989,11 @@ class LinearRegressorTest(tf.test.TestCase): def testCustomMetrics(self): """Tests custom evaluation metrics.""" - def _input_fn_train(): + def _input_fn(num_epochs=None): # Create 4 rows, one of them (y = x), three of them (y=Not(x)) target = tf.constant([[1.], [0.], [0.], [0.]]) - features = {'x': tf.ones(shape=[4, 1], dtype=tf.float32),} + features = {'x': tf.train.limit_epochs( + tf.ones(shape=[4, 1], dtype=tf.float32), num_epochs=num_epochs)} return features, target def _my_metric_op(predictions, targets): @@ -1000,9 +1003,9 @@ class LinearRegressorTest(tf.test.TestCase): feature_columns=[tf.contrib.layers.real_valued_column('x')], config=tf.contrib.learn.RunConfig(tf_random_seed=1)) - regressor.fit(input_fn=_input_fn_train, steps=100) + regressor.fit(input_fn=_input_fn, steps=100) scores = regressor.evaluate( - input_fn=_input_fn_train, + input_fn=_input_fn, steps=1, metrics={ 'my_error': tf.contrib.metrics.streaming_mean_squared_error, @@ -1011,15 +1014,16 @@ class LinearRegressorTest(tf.test.TestCase): self.assertIn('loss', set(scores.keys())) self.assertIn('my_error', set(scores.keys())) self.assertIn('my_metric', set(scores.keys())) - predictions = regressor.predict(input_fn=_input_fn_train) + predict_input_fn = functools.partial(_input_fn, num_epochs=1) + predictions = np.array(list(regressor.predict(input_fn=predict_input_fn))) self.assertAlmostEqual( _sklearn.mean_squared_error(np.array([1, 0, 0, 0]), predictions), scores['my_error']) # Tests that when the key is a tuple, an error is raised. - with self.assertRaises(TypeError): + with self.assertRaises(KeyError): regressor.evaluate( - input_fn=_input_fn_train, + input_fn=_input_fn, steps=1, metrics={('my_error', 'predictions' ): tf.contrib.metrics.streaming_mean_squared_error}) diff --git a/tensorflow/contrib/learn/python/learn/tests/multioutput_test.py b/tensorflow/contrib/learn/python/learn/estimators/multioutput_test.py similarity index 95% rename from tensorflow/contrib/learn/python/learn/tests/multioutput_test.py rename to tensorflow/contrib/learn/python/learn/estimators/multioutput_test.py index a51f7468905..4feb67e7faa 100644 --- a/tensorflow/contrib/learn/python/learn/tests/multioutput_test.py +++ b/tensorflow/contrib/learn/python/learn/estimators/multioutput_test.py @@ -39,7 +39,7 @@ class MultiOutputTest(tf.test.TestCase): feature_columns=learn.infer_real_valued_columns_from_input(x), target_dimension=2) regressor.fit(x, y, steps=100) - score = mean_squared_error(regressor.predict(x), y) + score = mean_squared_error(np.array(list(regressor.predict(x))), y) self.assertLess(score, 10, "Failed with score = {0}".format(score)) diff --git a/tensorflow/contrib/learn/python/learn/tests/nonlinear_test.py b/tensorflow/contrib/learn/python/learn/estimators/nonlinear_test.py similarity index 100% rename from tensorflow/contrib/learn/python/learn/tests/nonlinear_test.py rename to tensorflow/contrib/learn/python/learn/estimators/nonlinear_test.py diff --git a/tensorflow/contrib/learn/python/learn/estimators/random_forest.py b/tensorflow/contrib/learn/python/learn/estimators/random_forest.py index 318891f0fd5..73810352394 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/random_forest.py +++ b/tensorflow/contrib/learn/python/learn/estimators/random_forest.py @@ -114,7 +114,7 @@ class TensorForestEstimator(estimator.BaseEstimator): as_iterable=False) def predict_proba( self, x=None, input_fn=None, batch_size=None, outputs=None, - as_iterable=False): + as_iterable=True): """Returns prediction probabilities for given features (classification). Args: @@ -148,7 +148,7 @@ class TensorForestEstimator(estimator.BaseEstimator): as_iterable=False) def predict( self, x=None, input_fn=None, axis=None, batch_size=None, outputs=None, - as_iterable=False): + as_iterable=True): """Returns predictions for given features. Args: diff --git a/tensorflow/contrib/learn/python/learn/tests/regression_test.py b/tensorflow/contrib/learn/python/learn/estimators/regression_test.py similarity index 100% rename from tensorflow/contrib/learn/python/learn/tests/regression_test.py rename to tensorflow/contrib/learn/python/learn/estimators/regression_test.py diff --git a/tensorflow/contrib/learn/python/learn/estimators/run_config.py b/tensorflow/contrib/learn/python/learn/estimators/run_config.py index 5d6e67484f3..20cc7485753 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/run_config.py +++ b/tensorflow/contrib/learn/python/learn/estimators/run_config.py @@ -130,6 +130,8 @@ class RunConfig(object): # If not explicitly specified in the constructor and the TF_CONFIG # environment variable is present, load cluster_spec from TF_CONFIG. config = json.loads(os.environ.get('TF_CONFIG') or '{}') + environment = config.get('environment', 'local') + if not cluster_spec and 'cluster' in config: cluster_spec = ClusterSpec(config['cluster']) self.cluster_spec = cluster_spec @@ -138,6 +140,7 @@ class RunConfig(object): # otherwise, if the TF_CONFIG environment variable is present, use that. # Otherwise, use the respective default (None / 0). task_env = config.get('task', {}) + self._job_name = job_name or task_env.get('type') or None self.task = task if task is not None else task_env.get('index') or 0 @@ -151,11 +154,13 @@ class RunConfig(object): self.num_ps_replicas = num_ps_replicas or _count_ps(self.cluster_spec) or 0 # Set is_chief. + # TODO(b/32117298): cleanup environment-specific logic for setting is_chief + # once the environments have been further unified. self._is_chief = is_chief if self._is_chief is None: if not self._job_name: self._is_chief = (self.task == 0) - elif config: + elif config and environment == 'cloud': # When the TF_CONFIG environment variable is set, we can set the # default of is_chief to 0 when job_name is "master" and task is 0. self._is_chief = (self._job_name == 'master' and self.task == 0) @@ -176,11 +181,19 @@ class RunConfig(object): 'job_name is \'%s\', but only masters or workers may be chiefs. ' 'Please check is_chief and job_name, which may have been set in ' 'TF_CONFIG environment variable.' % (self._job_name,)) - elif (self._is_chief is False and self._job_name == 'master' and - self.task == 0): - raise ValueError( - 'Master task 0 must be chief. Please check is_chief, job_name, and ' - 'task, which may have been set in TF_CONFIG environment variable.') + elif self._is_chief is False: + if environment == 'cloud': + if self._job_name == 'master' and self.task == 0: + raise ValueError( + 'Master task 0 must be chief for cloud. Please check is_chief, ' + 'job_name, and task, which may have been set in TF_CONFIG ' + 'environment variable.') + else: + if self._job_name == 'worker' and self.task == 0: + raise ValueError( + 'Worker task 0 must be chief. Please check is_chief, job_name, ' + 'and task, which may have been set in TF_CONFIG environment ' + 'variable.') self.evaluation_master = evaluation_master or '' diff --git a/tensorflow/contrib/learn/python/learn/tests/run_config_test.py b/tensorflow/contrib/learn/python/learn/estimators/run_config_test.py similarity index 84% rename from tensorflow/contrib/learn/python/learn/tests/run_config_test.py rename to tensorflow/contrib/learn/python/learn/estimators/run_config_test.py index 4164b450452..b72c720aa6d 100644 --- a/tensorflow/contrib/learn/python/learn/tests/run_config_test.py +++ b/tensorflow/contrib/learn/python/learn/estimators/run_config_test.py @@ -189,20 +189,47 @@ class RunConfigTest(tf.test.TestCase): # Basically, just make sure no exception is being raised. self.assertEquals(config.num_ps_replicas, 2) - def test_is_chief_from_tf_config(self): + def test_is_chief_from_cloud_tf_config(self): # is_chief should be true when ["task"]["type"] == "master" and - # index == 0. Note that test_values_from_tf_config covers the - # non-master case. + # index == 0 and ["task"]["environment"] == "cloud". Note that + # test_values_from_tf_config covers the non-master case. tf_config = {"cluster": {"ps": ["host1:1", "host2:2"], "master": ["host3:3"], "worker": ["host4:4", "host5:5", "host6:6"]}, "task": {"type": "master", - "index": 0}} + "index": 0}, + "environment": "cloud"} with patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}): config = run_config.RunConfig() self.assertTrue(config.is_chief) + def test_is_chief_from_noncloud_tf_config(self): + # is_chief should be true when ["task"]["type"] == "worker" and + # index == 0 if ["task"]["environment"] != "cloud". + tf_config = {"cluster": {"ps": ["host1:1", "host2:2"], + "master": ["host3:3"], + "worker": ["host4:4", "host5:5", "host6:6"]}, + "task": {"type": "worker", + "index": 0}, + "environment": "random"} + with patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}): + config = run_config.RunConfig() + + self.assertTrue(config.is_chief) + + # But task 0 for a job named "master" should not be. + tf_config = {"cluster": {"ps": ["host1:1", "host2:2"], + "master": ["host3:3"], + "worker": ["host4:4", "host5:5", "host6:6"]}, + "task": {"type": "master", + "index": 0}, + "environment": "random"} + with patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}): + config = run_config.RunConfig() + + self.assertFalse(config.is_chief) + def test_default_is_chief_from_tf_config_without_job_name(self): tf_config = {"cluster": {}, "task": {}} @@ -245,8 +272,15 @@ class RunConfigTest(tf.test.TestCase): with self.assertRaisesRegexp(ValueError, msg): run_config.RunConfig(is_chief=True, task=0, job_name="ps") - with self.assertRaisesRegexp(ValueError, "Master task 0 must be chief"): - run_config.RunConfig(is_chief=False, task=0, job_name="master") + msg = "Master task 0 must be chief for cloud" + with self.assertRaisesRegexp(ValueError, msg): + tf_config = {"environment": "cloud"} + with patch.dict("os.environ", {"TF_CONFIG": json.dumps(tf_config)}): + run_config.RunConfig(is_chief=False, task=0, job_name="master") + + msg = "Worker task 0 must be chief" + with self.assertRaisesRegexp(ValueError, msg): + run_config.RunConfig(is_chief=False, task=0, job_name="worker") if __name__ == "__main__": diff --git a/tensorflow/contrib/learn/python/learn/tests/stability_test.py b/tensorflow/contrib/learn/python/learn/estimators/stability_test.py similarity index 100% rename from tensorflow/contrib/learn/python/learn/tests/stability_test.py rename to tensorflow/contrib/learn/python/learn/estimators/stability_test.py diff --git a/tensorflow/contrib/learn/python/learn/estimators/svm.py b/tensorflow/contrib/learn/python/learn/estimators/svm.py index 84cf8ae71a4..25d0e79c737 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/svm.py +++ b/tensorflow/contrib/learn/python/learn/estimators/svm.py @@ -55,7 +55,7 @@ class SVM(trainable.Trainable, evaluable.Evaluable): method), should be set to (#concurrent train ops per worker) x (#workers). If num_loss_partitions is larger or equal to this value, convergence is guaranteed but becomes slower as num_loss_partitions increases. If it is set - to a smaller value, the optimizer is more agressive in reducing the global + to a smaller value, the optimizer is more aggressive in reducing the global loss but convergence is not guaranteed. The recommended value in tf.learn (where there is one process per worker) is the number of workers running the train steps. It defaults to 1 (single machine). @@ -146,6 +146,7 @@ class SVM(trainable.Trainable, evaluable.Evaluable): self._feature_columns = feature_columns self._model_dir = model_dir or tempfile.mkdtemp() + self._chief_hook = linear._SdcaUpdateWeightsHook() # pylint: disable=protected-access self._estimator = estimator.Estimator( model_fn=linear.sdca_classifier_model_fn, model_dir=self._model_dir, @@ -155,12 +156,19 @@ class SVM(trainable.Trainable, evaluable.Evaluable): "optimizer": self._optimizer, "weight_column_name": weight_column_name, "loss_type": "hinge_loss", + "update_weights_hook": self._chief_hook, }, feature_engineering_fn=feature_engineering_fn) + if not self._estimator.config.is_chief: + self._chief_hook = None def fit(self, x=None, y=None, input_fn=None, steps=None, batch_size=None, monitors=None, max_steps=None): """See trainable.Trainable.""" + if monitors is None: + monitors = [] + if self._chief_hook: + monitors.append(self._chief_hook) return self._estimator.fit(x=x, y=y, input_fn=input_fn, steps=steps, batch_size=batch_size, monitors=monitors, max_steps=max_steps) diff --git a/tensorflow/contrib/learn/python/learn/experiment.py b/tensorflow/contrib/learn/python/learn/experiment.py index c10d014c142..8468d04e488 100644 --- a/tensorflow/contrib/learn/python/learn/experiment.py +++ b/tensorflow/contrib/learn/python/learn/experiment.py @@ -223,6 +223,7 @@ class Experiment(object): logging.info("Waiting %f secs before starting eval.", delay_secs) time.sleep(delay_secs) + last_fitted_error_time = 0 while True: start = time.time() try: @@ -231,7 +232,13 @@ class Experiment(object): metrics=self._eval_metrics, name=name) except NotFittedError: - logging.warning("Estimator is not fitted yet, skipping evaluation.") + # Print warning message every 10 mins. + if time.time() - last_fitted_error_time > 600: + logging.warning( + "Estimator is not fitted yet. " + "Will start an evaluation when a checkpoint will be ready.") + last_fitted_error_time = time.time() + duration = time.time() - start if duration < throttle_delay_secs: difference = throttle_delay_secs - duration diff --git a/tensorflow/contrib/learn/python/learn/tests/experiment_test.py b/tensorflow/contrib/learn/python/learn/experiment_test.py similarity index 100% rename from tensorflow/contrib/learn/python/learn/tests/experiment_test.py rename to tensorflow/contrib/learn/python/learn/experiment_test.py diff --git a/tensorflow/contrib/learn/python/learn/graph_actions.py b/tensorflow/contrib/learn/python/learn/graph_actions.py index f74a6a204c5..583a12af8ca 100644 --- a/tensorflow/contrib/learn/python/learn/graph_actions.py +++ b/tensorflow/contrib/learn/python/learn/graph_actions.py @@ -126,6 +126,7 @@ def _monitored_train(graph, supervisor_save_model_secs=600, supervisor_save_model_steps=None, keep_checkpoint_max=5, + supervisor_save_summaries_secs=None, supervisor_save_summaries_steps=100, feed_fn=None, steps=None, @@ -164,7 +165,7 @@ def _monitored_train(graph, current loss. A `0` or negative value disables logging. supervisor_is_chief: Whether the current process is the chief supervisor in charge of restoring the model and running standard services. - supervisor_master: The master string to use when preparing the session. + supervisor_master: The master string to use when preparing the session. supervisor_save_model_secs: Save checkpoints every this many seconds. Can not be specified with `supervisor_save_model_steps`. supervisor_save_model_steps: Save checkpoints every this many steps. Can not @@ -173,8 +174,12 @@ def _monitored_train(graph, keep. As new files are created, older files are deleted. If None or 0, all checkpoint files are kept. This is simply passed as the max_to_keep arg to `tf.Saver` constructor. + supervisor_save_summaries_secs: Save summaries every + `supervisor_save_summaries_secs` seconds when training. supervisor_save_summaries_steps: Save summaries every - `supervisor_save_summaries_steps` seconds when training. + `supervisor_save_summaries_steps` steps when training. Exactly one of + `supervisor_save_model_steps` and `supervisor_save_model_secs` should be + specified, and the other should be None. feed_fn: A function that is called every iteration to produce a `feed_dict` passed to `session.run` calls. Optional. steps: Trains for this many steps (e.g. current global step + `steps`). @@ -267,6 +272,7 @@ def _monitored_train(graph, summary_writer=summary_writer)) all_hooks.append( basic_session_run_hooks.SummarySaverHook( + save_secs=supervisor_save_summaries_secs, save_steps=supervisor_save_summaries_steps, summary_writer=summary_writer, scaffold=scaffold)) diff --git a/tensorflow/contrib/learn/python/learn/tests/graph_actions_test.py b/tensorflow/contrib/learn/python/learn/graph_actions_test.py similarity index 100% rename from tensorflow/contrib/learn/python/learn/tests/graph_actions_test.py rename to tensorflow/contrib/learn/python/learn/graph_actions_test.py diff --git a/tensorflow/contrib/learn/python/learn/tests/grid_search_test.py b/tensorflow/contrib/learn/python/learn/grid_search_test.py similarity index 100% rename from tensorflow/contrib/learn/python/learn/tests/grid_search_test.py rename to tensorflow/contrib/learn/python/learn/grid_search_test.py diff --git a/tensorflow/contrib/learn/python/learn/tests/data_feeder_test.py b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder_test.py similarity index 100% rename from tensorflow/contrib/learn/python/learn/tests/data_feeder_test.py rename to tensorflow/contrib/learn/python/learn/learn_io/data_feeder_test.py diff --git a/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py b/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py index c41bcf45a1a..21ce65b7eb4 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py @@ -28,15 +28,33 @@ from tensorflow.python.ops import io_ops from tensorflow.python.ops import logging_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import parsing_ops +from tensorflow.python.ops import variables as var_ops from tensorflow.python.platform import gfile from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import input as input_ops from tensorflow.python.training import queue_runner + # Default name for key in the feature dict. KEY_FEATURE_NAME = '__key__' +def _check_enqueue_params(num_queue_runners, num_enqueue_threads): + """Check enqueue paramerters for deprecation of `num_queue_runners`.""" + if num_queue_runners is not None: + # TODO(yifanchen): Remove on Nov 21 2016. + logging.warning('`num_queue_runners` is deprecated, it will be removed on ' + 'Nov 21 2016') + if num_enqueue_threads is not None: + raise ValueError('`num_queue_runners` and `num_enqueue_threads` can not ' + 'both be set.') + elif num_enqueue_threads is None: + logging.warning('Default behavior will change and `num_queue_runners` ' + 'will be replaced by `num_enqueue_threads`.') + num_queue_runners = 2 + return num_queue_runners, num_enqueue_threads + + def read_batch_examples(file_pattern, batch_size, reader, randomize_input=True, num_epochs=None, queue_capacity=10000, num_threads=1, @@ -134,7 +152,107 @@ def read_keyed_batch_examples( Raises: ValueError: for invalid inputs. """ - # Retrieve files to read. + return _read_keyed_batch_examples_helper( + file_pattern, + batch_size, + reader, + randomize_input, + num_epochs, + queue_capacity, + num_threads, + read_batch_size, + parse_fn, + setup_shared_queue=False, + name=name) + + +def _read_keyed_batch_examples_shared_queue(file_pattern, + batch_size, + reader, + randomize_input=True, + num_epochs=None, + queue_capacity=10000, + num_threads=1, + read_batch_size=1, + parse_fn=None, + name=None): + """Adds operations to read, queue, batch `Example` protos. + + Given file pattern (or list of files), will setup a shared queue for file + names, setup a worker queue that pulls from the shared queue, read `Example` + protos using provided `reader`, use batch queue to create batches of examples + of size `batch_size`. This provides at most once visit guarantees. Note that + this only works if the parameter servers are not pre-empted or restarted or + the session is not restored from a checkpoint since the state of a queue + is not checkpointed and we will end up restarting from the entire list of + files. + + All queue runners are added to the queue runners collection, and may be + started via `start_queue_runners`. + + All ops are added to the default graph. + + Use `parse_fn` if you need to do parsing / processing on single examples. + + Args: + file_pattern: List of files or pattern of file paths containing + `Example` records. See `tf.gfile.Glob` for pattern rules. + batch_size: An int or scalar `Tensor` specifying the batch size to use. + reader: A function or class that returns an object with + `read` method, (filename tensor) -> (example tensor). + randomize_input: Whether the input should be randomized. + num_epochs: Integer specifying the number of times to read through the + dataset. If `None`, cycles through the dataset forever. + NOTE - If specified, creates a variable that must be initialized, so call + `tf.initialize_all_variables()` as shown in the tests. + queue_capacity: Capacity for input queue. + num_threads: The number of threads enqueuing examples. + read_batch_size: An int or scalar `Tensor` specifying the number of + records to read at once + parse_fn: Parsing function, takes `Example` Tensor returns parsed + representation. If `None`, no parsing is done. + name: Name of resulting op. + + Returns: + Returns tuple of: + - `Tensor` of string keys. + - String `Tensor` of batched `Example` proto. + + Raises: + ValueError: for invalid inputs. + """ + return _read_keyed_batch_examples_helper( + file_pattern, + batch_size, + reader, + randomize_input, + num_epochs, + queue_capacity, + num_threads, + read_batch_size, + parse_fn, + setup_shared_queue=True, + name=name) + + +def _get_shared_file_name_queue(file_names, shuffle, num_epochs, name): + # Creating a dummy variable so we can put the shared queue in ps if there is + # a PS and in the worker otherwise. TODO(rohanj): Figure out how to place an + # op on PS without this hack + with ops.Graph().as_default(): + dummy_var = var_ops.Variable(initial_value=0, name='dummy_var') + with ops.device(dummy_var.device): + shared_file_name_queue = input_ops.string_input_producer( + constant_op.constant( + file_names, name='input'), + shuffle=shuffle, + num_epochs=num_epochs, + name=name, + shared_name=name) + return shared_file_name_queue + + +def _get_file_names(file_pattern, randomize_input): if isinstance(file_pattern, list): file_names = file_pattern if not file_names: @@ -148,6 +266,46 @@ def read_keyed_batch_examples( # in `string_input_producer` if `randomize_input` is enabled. if not randomize_input: file_names = sorted(file_names) + return file_names + + +def _get_examples(file_name_queue, reader, num_threads, read_batch_size, + parse_fn): + with ops.name_scope('read'): + example_list = [] + for _ in range(num_threads): + if read_batch_size > 1: + keys, examples_proto = reader().read_up_to(file_name_queue, + read_batch_size) + else: + keys, examples_proto = reader().read(file_name_queue) + if parse_fn: + parsed_examples = parse_fn(examples_proto) + # Map keys into example map because batch_join doesn't support + # tuple of Tensor + dict. + if isinstance(parsed_examples, dict): + parsed_examples[KEY_FEATURE_NAME] = keys + example_list.append(parsed_examples) + else: + example_list.append((keys, parsed_examples)) + else: + example_list.append((keys, examples_proto)) + return example_list + + +def _read_keyed_batch_examples_helper(file_pattern, + batch_size, + reader, + randomize_input=True, + num_epochs=None, + queue_capacity=10000, + num_threads=1, + read_batch_size=1, + parse_fn=None, + setup_shared_queue=False, + name=None): + # Retrieve files to read. + file_names = _get_file_names(file_pattern, randomize_input) # Check input parameters are given and reasonable. if (not queue_capacity) or (queue_capacity <= 0): @@ -168,33 +326,25 @@ def read_keyed_batch_examples( raise ValueError('Invalid num_epochs %s.' % num_epochs) with ops.name_scope(name, 'read_batch_examples', [file_pattern]) as scope: - # Setup filename queue with shuffling. with ops.name_scope('file_name_queue') as file_name_queue_scope: - file_name_queue = input_ops.string_input_producer( - constant_op.constant(file_names, name='input'), - shuffle=randomize_input, num_epochs=num_epochs, - name=file_name_queue_scope) + if setup_shared_queue: + shared_file_name_queue = _get_shared_file_name_queue( + file_names, randomize_input, num_epochs, file_name_queue_scope) + file_name_queue = data_flow_ops.FIFOQueue( + capacity=1, dtypes=[dtypes.string], shapes=[[]]) + enqueue_op = file_name_queue.enqueue(shared_file_name_queue.dequeue()) + queue_runner.add_queue_runner( + queue_runner.QueueRunner(file_name_queue, [enqueue_op])) + else: + file_name_queue = input_ops.string_input_producer( + constant_op.constant( + file_names, name='input'), + shuffle=randomize_input, + num_epochs=num_epochs, + name=file_name_queue_scope) - # Create readers, one per thread and set them to read from filename queue. - with ops.name_scope('read'): - example_list = [] - for _ in range(num_threads): - if read_batch_size > 1: - keys, examples_proto = reader().read_up_to(file_name_queue, - read_batch_size) - else: - keys, examples_proto = reader().read(file_name_queue) - if parse_fn: - parsed_examples = parse_fn(examples_proto) - # Map keys into example map because batch_join doesn't support - # tuple of Tensor + dict. - if isinstance(parsed_examples, dict): - parsed_examples[KEY_FEATURE_NAME] = keys - example_list.append(parsed_examples) - else: - example_list.append((keys, parsed_examples)) - else: - example_list.append((keys, examples_proto)) + example_list = _get_examples(file_name_queue, reader, num_threads, + read_batch_size, parse_fn) enqueue_many = read_batch_size > 1 @@ -234,7 +384,8 @@ def read_keyed_batch_features(file_pattern, queue_capacity=10000, reader_num_threads=1, feature_queue_capacity=100, - num_queue_runners=2, + num_queue_runners=None, + num_enqueue_threads=None, parse_fn=None, name=None): """Adds operations to read, queue, batch and parse `Example` protos. @@ -265,10 +416,17 @@ def read_keyed_batch_features(file_pattern, queue_capacity: Capacity for input queue. reader_num_threads: The number of threads to read examples. feature_queue_capacity: Capacity of the parsed features queue. - num_queue_runners: Number of queue runners to start for the feature queue, - Adding multiple queue runners for the parsed example queue helps maintain + num_queue_runners: Deprecated. Defaults to 2 if this and + `num_enqueue_threads` are both `None`. This is the number of queue + runners to start for the feature queue. Adding multiple queue runners for + the parsed example queue helps maintain a full queue when the subsequent + computations overall are cheaper than parsing. This argument will be + deprecated and replaced with `num_enqueue_threads`. + num_enqueue_threads: Number of threads to enqueue the parsed example queue. + Using multiple threads to enqueue the parsed example queue helps maintain a full queue when the subsequent computations overall are cheaper than - parsing. + parsing. This argument will replace `num_queue_runners`. This and + `num_queue_runners` can not both be set. parse_fn: Parsing function, takes `Example` Tensor returns parsed representation. If `None`, no parsing is done. name: Name of resulting op. @@ -282,6 +440,9 @@ def read_keyed_batch_features(file_pattern, ValueError: for invalid inputs. """ + num_queue_runners, num_enqueue_threads = _check_enqueue_params( + num_queue_runners, num_enqueue_threads) + with ops.name_scope(name, 'read_batch_features', [file_pattern]) as scope: keys, examples = read_keyed_batch_examples( file_pattern, batch_size, reader, randomize_input=randomize_input, @@ -290,6 +451,88 @@ def read_keyed_batch_features(file_pattern, parse_fn=parse_fn, name=scope) # Parse the example. feature_map = parsing_ops.parse_example(examples, features) + return queue_parsed_features( + feature_map, + keys=keys, + feature_queue_capacity=feature_queue_capacity, + num_queue_runners=num_queue_runners, + num_enqueue_threads=num_enqueue_threads, + name=scope) + + +def _read_keyed_batch_features_shared_queue(file_pattern, + batch_size, + features, + reader, + randomize_input=True, + num_epochs=None, + queue_capacity=10000, + reader_num_threads=1, + feature_queue_capacity=100, + num_queue_runners=2, + parse_fn=None, + name=None): + """Adds operations to read, queue, batch and parse `Example` protos. + + Given file pattern (or list of files), will setup a shared queue for file + names, setup a worker queue that gets filenames from the shared queue, + read `Example` proto using provided `reader`, use batch queue to create + batches of examples of size `batch_size` and parse example given `features` + specification. + + All queue runners are added to the queue runners collection, and may be + started via `start_queue_runners`. + + All ops are added to the default graph. + + Args: + file_pattern: List of files or pattern of file paths containing + `Example` records. See `tf.gfile.Glob` for pattern rules. + batch_size: An int or scalar `Tensor` specifying the batch size to use. + features: A `dict` mapping feature keys to `FixedLenFeature` or + `VarLenFeature` values. + reader: A function or class that returns an object with + `read` method, (filename tensor) -> (example tensor). + randomize_input: Whether the input should be randomized. + num_epochs: Integer specifying the number of times to read through the + dataset. If None, cycles through the dataset forever. NOTE - If specified, + creates a variable that must be initialized, so call + tf.initialize_local_variables() as shown in the tests. + queue_capacity: Capacity for input queue. + reader_num_threads: The number of threads to read examples. + feature_queue_capacity: Capacity of the parsed features queue. + num_queue_runners: Number of queue runners to start for the feature queue, + Adding multiple queue runners for the parsed example queue helps maintain + a full queue when the subsequent computations overall are cheaper than + parsing. + parser_num_threads: (Deprecated) The number of threads to parse examples. + parse_fn: Parsing function, takes `Example` Tensor returns parsed + representation. If `None`, no parsing is done. + name: Name of resulting op. + + Returns: + Returns tuple of: + - `Tensor` of string keys. + - A dict of `Tensor` or `SparseTensor` objects for each in `features`. + + Raises: + ValueError: for invalid inputs. + """ + + with ops.name_scope(name, 'read_batch_features', [file_pattern]) as scope: + keys, examples = read_keyed_batch_examples_shared_queue( + file_pattern, + batch_size, + reader, + randomize_input=randomize_input, + num_epochs=num_epochs, + queue_capacity=queue_capacity, + num_threads=reader_num_threads, + read_batch_size=batch_size, + parse_fn=parse_fn, + name=scope) + # Parse the example. + feature_map = parsing_ops.parse_example(examples, features) return queue_parsed_features( feature_map, keys=keys, @@ -301,7 +544,8 @@ def read_keyed_batch_features(file_pattern, def queue_parsed_features(parsed_features, keys=None, feature_queue_capacity=100, - num_queue_runners=2, + num_queue_runners=None, + num_enqueue_threads=None, name=None): """Speeds up parsing by using queues to do it asynchronously. @@ -320,10 +564,17 @@ def queue_parsed_features(parsed_features, parsed_features: A dict of string key to `Tensor` or `SparseTensor` objects. keys: `Tensor` of string keys. feature_queue_capacity: Capacity of the parsed features queue. - num_queue_runners: Number of queue runners to start for the feature queue, - Adding multiple queue runners for the parsed example queue helps maintain + num_queue_runners: Deprecated. Defaults to 2 if this and + `num_enqueue_threads` are both `None`. This is the number of queue + runners to start for the feature queue. Adding multiple queue runners for + the parsed example queue helps maintain a full queue when the subsequent + computations overall are cheaper than parsing. This argument will be + deprecated and replaced with `num_enqueue_threads`. + num_enqueue_threads: Number of threads to enqueue the parsed example queue. + Using multiple threads to enqueue the parsed example queue helps maintain a full queue when the subsequent computations overall are cheaper than - parsing. + parsing. This argument will replace `num_queue_runners`. This and + `num_queue_runners` can not both be set. name: Name of resulting op. Returns: @@ -331,7 +582,12 @@ def queue_parsed_features(parsed_features, - `Tensor` corresponding to `keys` if provided, otherwise `None`. - A dict of string key to `Tensor` or `SparseTensor` objects corresponding to `parsed_features`. + Raises: + ValueError: for invalid inputs. """ + num_queue_runners, num_enqueue_threads = _check_enqueue_params( + num_queue_runners, num_enqueue_threads) + args = list(parsed_features.values()) if keys is not None: args += [keys] @@ -370,12 +626,31 @@ def queue_parsed_features(parsed_features, # Add multiple queue runners so that the queue is always full. Adding more # than two queue-runners may hog the cpu on the worker to fill up the queue. - for _ in range(num_queue_runners): - queue_runner.add_queue_runner( - queue_runner.QueueRunner( - input_queue, [input_queue.enqueue(tensors_to_enqueue)], - queue_closed_exception_types=(errors.OutOfRangeError, - errors.CancelledError))) + # + # Note: this can result in large last batch being lost as the multiple queue + # runner threads do not coordinate with each other. Please use + # `num_enqueue_threads` instead. + if num_queue_runners is not None: + for _ in range(num_queue_runners): + queue_runner.add_queue_runner( + queue_runner.QueueRunner( + input_queue, [input_queue.enqueue(tensors_to_enqueue)], + queue_closed_exception_types=(errors.OutOfRangeError, + errors.CancelledError))) + # Use a single QueueRunner with multiple threads to enqueue so the queue is + # always full. The threads are coordinated so the last batch will not be + # lost. + elif num_enqueue_threads is not None: + enqueue_ops = [input_queue.enqueue(tensors_to_enqueue) + for _ in range(num_enqueue_threads)] + queue_runner.add_queue_runner(queue_runner.QueueRunner( + input_queue, enqueue_ops, + queue_closed_exception_types=(errors.OutOfRangeError, + errors.CancelledError))) + else: + raise AssertionError( + 'Either `num_queue_runners` or `num_enqueue_threads` should have ' + 'been set.') dequeued_tensors = input_queue.dequeue() diff --git a/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py b/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py index c1b1c409454..a0c143e9bb5 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py @@ -19,13 +19,17 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import base64 import os import random import tempfile +from six.moves import xrange # pylint: disable=redefined-builtin import tensorflow as tf +from tensorflow.contrib.learn.python.learn.learn_io.graph_io import _read_keyed_batch_examples_shared_queue from tensorflow.python.framework import errors +from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.platform import gfile @@ -243,6 +247,63 @@ class GraphIOTest(tf.test.TestCase): coord.request_stop() + def test_read_keyed_batch_features_mutual_exclusive_args(self): + filename = self._create_temp_file("abcde") + features = {"sequence": tf.FixedLenFeature([], tf.string)} + with self.assertRaisesRegexp(ValueError, "can not both be set"): + _, _ = tf.contrib.learn.read_keyed_batch_features( + filename, 1, features, tf.TextLineReader, randomize_input=False, + num_queue_runners=2, num_enqueue_threads=2) + + def test_queue_parsed_features_mutual_exclusive_args(self): + parsed_features = {"a": tf.constant([10, 20, 30])} + with self.assertRaisesRegexp(ValueError, "can not both be set"): + _, _ = tf.contrib.learn.queue_parsed_features( + parsed_features, num_queue_runners=2, num_enqueue_threads=2) + + def test_read_text_lines_large(self): + gfile.Glob = self._orig_glob + sequence_prefix = "abcdefghijklmnopqrstuvwxyz123456789" + num_records = 49999 + lines = ["".join([sequence_prefix, str(l)]).encode("ascii") + for l in xrange(num_records)] + json_lines = ["".join(['{"features": { "feature": { "sequence": {', + '"bytes_list": { "value": ["', + base64.b64encode(l).decode("ascii"), + '"]}}}}}\n']) for l in lines] + filename = self._create_temp_file("".join(json_lines)) + batch_size = 10000 + queue_capacity = 10000 + name = "my_large_batch" + + features = {"sequence": tf.FixedLenFeature([], tf.string)} + + with tf.Graph().as_default() as g, self.test_session(graph=g) as session: + _, result = tf.contrib.learn.read_keyed_batch_features( + filename, batch_size, features, tf.TextLineReader, + randomize_input=False, num_epochs=1, queue_capacity=queue_capacity, + num_enqueue_threads=2, parse_fn=tf.decode_json_example, name=name) + session.run(tf.initialize_local_variables()) + coord = tf.train.Coordinator() + threads = tf.train.start_queue_runners(session, coord=coord) + + data = [] + try: + while not coord.should_stop(): + data.append(session.run(result)) + except errors.OutOfRangeError: + pass + finally: + coord.request_stop() + + coord.join(threads) + parsed_records = [item for sublist in [d["sequence"] for d in data] + for item in sublist] + # Check that the number of records matches expected and all records + # are present. + self.assertEqual(len(parsed_records), num_records) + self.assertEqual(set(parsed_records), set(lines)) + def test_read_text_lines_multifile(self): gfile.Glob = self._orig_glob filenames = self._create_sorted_temp_files(["ABC\n", "DEF\nGHK\n"]) @@ -261,6 +322,18 @@ class GraphIOTest(tf.test.TestCase): coord = tf.train.Coordinator() tf.train.start_queue_runners(session, coord=coord) + self.assertEqual("%s:1" % name, inputs.name) + file_name_queue_name = "%s/file_name_queue" % name + file_names_name = "%s/input" % file_name_queue_name + example_queue_name = "%s/fifo_queue" % name + test_util.assert_ops_in_graph({ + file_names_name: "Const", + file_name_queue_name: "FIFOQueue", + "%s/read/TextLineReader" % name: "TextLineReader", + example_queue_name: "FIFOQueue", + name: "QueueDequeueUpTo" + }, g) + self.assertAllEqual(session.run(inputs), [b"ABC"]) self.assertAllEqual(session.run(inputs), [b"DEF"]) self.assertAllEqual(session.run(inputs), [b"GHK"]) @@ -269,6 +342,120 @@ class GraphIOTest(tf.test.TestCase): coord.request_stop() + def test_read_text_lines_multifile_with_shared_queue(self): + gfile.Glob = self._orig_glob + filenames = self._create_sorted_temp_files(["ABC\n", "DEF\nGHK\n"]) + + batch_size = 1 + queue_capacity = 5 + name = "my_batch" + + with tf.Graph().as_default() as g, self.test_session(graph=g) as session: + _, inputs = _read_keyed_batch_examples_shared_queue( + filenames, + batch_size, + reader=tf.TextLineReader, + randomize_input=False, + num_epochs=1, + queue_capacity=queue_capacity, + name=name) + session.run(tf.initialize_local_variables()) + + coord = tf.train.Coordinator() + tf.train.start_queue_runners(session, coord=coord) + + self.assertEqual("%s:1" % name, inputs.name) + shared_file_name_queue_name = "%s/file_name_queue" % name + file_names_name = "%s/input" % shared_file_name_queue_name + example_queue_name = "%s/fifo_queue" % name + worker_file_name_queue_name = "%s/file_name_queue/fifo_queue" % name + test_util.assert_ops_in_graph({ + file_names_name: "Const", + shared_file_name_queue_name: "FIFOQueue", + "%s/read/TextLineReader" % name: "TextLineReader", + example_queue_name: "FIFOQueue", + worker_file_name_queue_name: "FIFOQueue", + name: "QueueDequeueUpTo" + }, g) + + self.assertAllEqual(session.run(inputs), [b"ABC"]) + self.assertAllEqual(session.run(inputs), [b"DEF"]) + self.assertAllEqual(session.run(inputs), [b"GHK"]) + with self.assertRaises(errors.OutOfRangeError): + session.run(inputs) + + coord.request_stop() + + def _get_qr(self, name): + for qr in ops.get_collection(ops.GraphKeys.QUEUE_RUNNERS): + if qr.name == name: + return qr + + def _run_queue(self, name, session): + qr = self._get_qr(name) + for op in qr.enqueue_ops: + session.run(op) + + def test_multiple_workers_with_shared_queue(self): + gfile.Glob = self._orig_glob + filenames = self._create_sorted_temp_files([ + "ABC\n", "DEF\n", "GHI\n", "JKL\n", "MNO\n", "PQR\n", "STU\n", "VWX\n", + "YZ\n" + ]) + + batch_size = 1 + queue_capacity = 5 + name = "my_batch" + shared_file_name_queue_name = "%s/file_name_queue" % name + example_queue_name = "%s/fifo_queue" % name + worker_file_name_queue_name = "%s/file_name_queue/fifo_queue" % name + + server = tf.train.Server.create_local_server() + + with tf.Graph().as_default() as g1, tf.Session( + server.target, graph=g1) as session: + _, inputs = _read_keyed_batch_examples_shared_queue( + filenames, + batch_size, + reader=tf.TextLineReader, + randomize_input=False, + num_epochs=1, + queue_capacity=queue_capacity, + name=name) + session.run(tf.initialize_local_variables()) + + # Run the three queues once manually. + self._run_queue(shared_file_name_queue_name, session) + self._run_queue(worker_file_name_queue_name, session) + self._run_queue(example_queue_name, session) + + self.assertAllEqual(session.run(inputs), [b"ABC"]) + + # Run the worker and the example queue. + self._run_queue(worker_file_name_queue_name, session) + self._run_queue(example_queue_name, session) + + self.assertAllEqual(session.run(inputs), [b"DEF"]) + + with tf.Graph().as_default() as g2, tf.Session( + server.target, graph=g2) as session: + _, inputs = _read_keyed_batch_examples_shared_queue( + filenames, + batch_size, + reader=tf.TextLineReader, + randomize_input=False, + num_epochs=1, + queue_capacity=queue_capacity, + name=name) + + # Run the worker and the example queue. + self._run_queue(worker_file_name_queue_name, session) + self._run_queue(example_queue_name, session) + + self.assertAllEqual(session.run(inputs), [b"GHI"]) + + self.assertTrue(g1 is not g2) + def test_batch_text_lines(self): gfile.Glob = self._orig_glob filename = self._create_temp_file("A\nB\nC\nD\nE\n") diff --git a/tensorflow/contrib/learn/python/learn/tests/io_test.py b/tensorflow/contrib/learn/python/learn/learn_io/io_test.py similarity index 97% rename from tensorflow/contrib/learn/python/learn/tests/io_test.py rename to tensorflow/contrib/learn/python/learn/learn_io/io_test.py index 80732337f97..a299cadaae9 100644 --- a/tensorflow/contrib/learn/python/learn/tests/io_test.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/io_test.py @@ -45,7 +45,7 @@ class IOTest(tf.test.TestCase): feature_columns=learn.infer_real_valued_columns_from_input(data), n_classes=3) classifier.fit(data, labels, steps=100) - score = accuracy_score(labels[0], classifier.predict(data)) + score = accuracy_score(labels[0], list(classifier.predict(data))) self.assertGreater(score, 0.5, "Failed with score = {0}".format(score)) else: print("No pandas installed. pandas-related tests are skipped.") @@ -61,7 +61,7 @@ class IOTest(tf.test.TestCase): feature_columns=learn.infer_real_valued_columns_from_input(data), n_classes=3) classifier.fit(data, labels, steps=100) - score = accuracy_score(labels, classifier.predict(data)) + score = accuracy_score(labels, list(classifier.predict(data))) self.assertGreater(score, 0.5, "Failed with score = {0}".format(score)) def test_string_data_formats(self): diff --git a/tensorflow/contrib/learn/python/learn/tests/learn_runner_test.py b/tensorflow/contrib/learn/python/learn/learn_runner_test.py similarity index 100% rename from tensorflow/contrib/learn/python/learn/tests/learn_runner_test.py rename to tensorflow/contrib/learn/python/learn/learn_runner_test.py diff --git a/tensorflow/contrib/learn/python/learn/tests/metric_spec_test.py b/tensorflow/contrib/learn/python/learn/metric_spec_test.py similarity index 100% rename from tensorflow/contrib/learn/python/learn/tests/metric_spec_test.py rename to tensorflow/contrib/learn/python/learn/metric_spec_test.py diff --git a/tensorflow/contrib/learn/python/learn/tests/monitors_test.py b/tensorflow/contrib/learn/python/learn/monitors_test.py similarity index 100% rename from tensorflow/contrib/learn/python/learn/tests/monitors_test.py rename to tensorflow/contrib/learn/python/learn/monitors_test.py diff --git a/tensorflow/contrib/learn/python/learn/ops/losses_ops.py b/tensorflow/contrib/learn/python/learn/ops/losses_ops.py index 7662191a5e2..f1163ee02cb 100644 --- a/tensorflow/contrib/learn/python/learn/ops/losses_ops.py +++ b/tensorflow/contrib/learn/python/learn/ops/losses_ops.py @@ -33,7 +33,7 @@ def mean_squared_error_regressor(tensor_in, labels, weights, biases, name=None): predictions = nn.xw_plus_b(tensor_in, weights, biases) if len(labels.get_shape()) == 1 and len(predictions.get_shape()) == 2: predictions = array_ops_.squeeze(predictions, squeeze_dims=[1]) - return predictions, loss_ops.sum_of_squares(predictions, labels) + return predictions, loss_ops.mean_squared_error(predictions, labels) def softmax_classifier(tensor_in, diff --git a/tensorflow/contrib/learn/python/learn/ops/tests/ops_test.py b/tensorflow/contrib/learn/python/learn/ops/ops_test.py similarity index 100% rename from tensorflow/contrib/learn/python/learn/ops/tests/ops_test.py rename to tensorflow/contrib/learn/python/learn/ops/ops_test.py diff --git a/tensorflow/contrib/learn/python/learn/ops/tests/seq2seq_ops_test.py b/tensorflow/contrib/learn/python/learn/ops/seq2seq_ops_test.py similarity index 100% rename from tensorflow/contrib/learn/python/learn/ops/tests/seq2seq_ops_test.py rename to tensorflow/contrib/learn/python/learn/ops/seq2seq_ops_test.py diff --git a/tensorflow/contrib/learn/python/learn/ops/tests/__init__.py b/tensorflow/contrib/learn/python/learn/ops/tests/__init__.py deleted file mode 100644 index 7376211abfe..00000000000 --- a/tensorflow/contrib/learn/python/learn/ops/tests/__init__.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Sequence-to-sequence tests.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function diff --git a/tensorflow/contrib/learn/python/learn/tests/basic_session_run_hooks_test.py b/tensorflow/contrib/learn/python/learn/tests/basic_session_run_hooks_test.py deleted file mode 100644 index 73988c01234..00000000000 --- a/tensorflow/contrib/learn/python/learn/tests/basic_session_run_hooks_test.py +++ /dev/null @@ -1,100 +0,0 @@ -# pylint: disable=g-bad-file-header -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for basic_session_run_hooks.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -import shutil -import tempfile -import time - -import tensorflow as tf - -from tensorflow.contrib import testing -from tensorflow.python.training import monitored_session - - -class StepCounterHookTest(tf.test.TestCase): - - def setUp(self): - self.log_dir = tempfile.mkdtemp() - - def tearDown(self): - shutil.rmtree(self.log_dir, ignore_errors=True) - - def test_step_counter(self): - with tf.Graph().as_default() as g, tf.Session() as sess: - global_step = tf.contrib.framework.get_or_create_global_step() - train_op = tf.assign_add(global_step, 1) - summary_writer = testing.FakeSummaryWriter(self.log_dir, g) - hook = tf.train.StepCounterHook( - summary_writer=summary_writer, every_n_steps=10) - hook.begin() - sess.run(tf.initialize_all_variables()) - mon_sess = monitored_session._HookedSession(sess, [hook]) - for _ in range(30): - time.sleep(0.01) - mon_sess.run(train_op) - hook.end(sess) - summary_writer.assert_summaries( - test_case=self, - expected_logdir=self.log_dir, - expected_graph=g, - expected_summaries={}) - for step in [11, 21]: - summary_value = summary_writer.summaries[step][0].value[0] - self.assertTrue(summary_value.tag, 'global_step/sec') - # check at least 10 steps per sec is recorded. - self.assertGreater(summary_value.simple_value, 10) - - -class SummarySaverHookTest(tf.test.TestCase): - - def test_summary_saver(self): - with tf.Graph().as_default() as g, tf.Session() as sess: - log_dir = 'log/dir' - summary_writer = testing.FakeSummaryWriter(log_dir, g) - var = tf.Variable(0.0) - tensor = tf.assign_add(var, 1.0) - summary_op = tf.scalar_summary('my_summary', tensor) - global_step = tf.contrib.framework.get_or_create_global_step() - train_op = tf.assign_add(global_step, 1) - hook = tf.train.SummarySaverHook( - summary_op=summary_op, save_steps=8, summary_writer=summary_writer) - hook.begin() - sess.run(tf.initialize_all_variables()) - mon_sess = monitored_session._HookedSession(sess, [hook]) - for i in range(30): - _ = i - mon_sess.run(train_op) - hook.end(sess) - summary_writer.assert_summaries( - test_case=self, - expected_logdir=log_dir, - expected_graph=g, - expected_summaries={ - 1: {'my_summary': 1.0}, - 9: {'my_summary': 2.0}, - 17: {'my_summary': 3.0}, - 25: {'my_summary': 4.0}, - }) - - -if __name__ == '__main__': - tf.test.main() diff --git a/tensorflow/contrib/learn/python/learn/tests/summary_writer_cache_test.py b/tensorflow/contrib/learn/python/learn/tests/summary_writer_cache_test.py deleted file mode 100644 index 2df47a75ba5..00000000000 --- a/tensorflow/contrib/learn/python/learn/tests/summary_writer_cache_test.py +++ /dev/null @@ -1,78 +0,0 @@ -# pylint: disable=g-bad-file-header -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for Runner.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import glob -import os - -import tensorflow as tf - -from tensorflow.contrib.learn.python.learn import summary_writer_cache - - -class SummaryWriterCacheTest(tf.test.TestCase): - """SummaryWriterCache tests.""" - - def _test_dir(self, test_name): - """Create an empty dir to use for tests. - - Args: - test_name: Name of the test. - - Returns: - Absolute path to the test directory. - """ - test_dir = os.path.join(self.get_temp_dir(), test_name) - if os.path.isdir(test_dir): - for f in glob.glob('%s/*' % test_dir): - os.remove(f) - else: - os.makedirs(test_dir) - return test_dir - - def test_cache(self): - with tf.Graph().as_default(): - dir1 = self._test_dir('test_cache_1') - dir2 = self._test_dir('test_cache_2') - sw1 = summary_writer_cache.SummaryWriterCache.get(dir1) - sw2 = summary_writer_cache.SummaryWriterCache.get(dir2) - sw3 = summary_writer_cache.SummaryWriterCache.get(dir1) - self.assertEqual(sw1, sw3) - self.assertFalse(sw1 == sw2) - sw1.close() - sw2.close() - events1 = glob.glob(os.path.join(dir1, 'event*')) - self.assertTrue(events1) - events2 = glob.glob(os.path.join(dir2, 'event*')) - self.assertTrue(events2) - events3 = glob.glob(os.path.join('nowriter', 'event*')) - self.assertFalse(events3) - - def test_clear(self): - with tf.Graph().as_default(): - dir1 = self._test_dir('test_clear') - sw1 = summary_writer_cache.SummaryWriterCache.get(dir1) - summary_writer_cache.SummaryWriterCache.clear() - sw2 = summary_writer_cache.SummaryWriterCache.get(dir1) - self.assertFalse(sw1 == sw2) - - -if __name__ == '__main__': - tf.test.main() diff --git a/tensorflow/contrib/learn/python/learn/utils/export.py b/tensorflow/contrib/learn/python/learn/utils/export.py index 00ad08dc110..5313dd3a4ea 100644 --- a/tensorflow/contrib/learn/python/learn/utils/export.py +++ b/tensorflow/contrib/learn/python/learn/utils/export.py @@ -19,7 +19,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib import layers from tensorflow.contrib.framework import deprecated from tensorflow.contrib.framework import deprecated_arg_values from tensorflow.contrib.framework.python.ops import variables as contrib_variables @@ -312,21 +311,10 @@ def _export_estimator(estimator, predictions) else: try: - # Some estimators provide a target_column of known type - target_column = estimator._get_target_column() - problem_type = target_column.problem_type - - if problem_type == layers.ProblemType.CLASSIFICATION: - signature_fn = classification_signature_fn - elif problem_type == layers.ProblemType.LINEAR_REGRESSION: - signature_fn = regression_signature_fn - elif problem_type == layers.ProblemType.LOGISTIC_REGRESSION: - signature_fn = logistic_regression_signature_fn - else: - raise ValueError( - 'signature_fn must be provided because the TargetColumn is a %s, ' - 'which does not have a standard problem type and so cannot use a ' - 'standard export signature.' % type(target_column).__name__) + # Some estimators provide a signature function. + # TODO(zakaria): check if the estimator has this function, + # raise helpful error if not + signature_fn = estimator._create_signature_fn() default_signature, named_graph_signatures = ( signature_fn(examples, features, predictions)) diff --git a/tensorflow/contrib/learn/python/learn/utils/export_test.py b/tensorflow/contrib/learn/python/learn/utils/export_test.py index 54e3e8962f7..0f1c7e6d807 100644 --- a/tensorflow/contrib/learn/python/learn/utils/export_test.py +++ b/tensorflow/contrib/learn/python/learn/utils/export_test.py @@ -47,6 +47,28 @@ class ExportTest(tf.test.TestCase): default_signature = signatures.default_signature return default_signature + def testExportMonitor_EstimatorProvidesSignature(self): + random.seed(42) + x = np.random.rand(1000) + y = 2 * x + 3 + cont_features = [tf.contrib.layers.real_valued_column('', dimension=1)] + regressor = learn.LinearRegressor(feature_columns=cont_features) + export_dir = tempfile.mkdtemp() + 'export/' + export_monitor = learn.monitors.ExportMonitor( + every_n_steps=1, export_dir=export_dir, exports_to_keep=2) + regressor.fit(x, y, steps=10, + monitors=[export_monitor]) + + self.assertTrue(tf.gfile.Exists(export_dir)) + # Only the written checkpoints are exported. + self.assertTrue(tf.gfile.Exists(export_dir + '00000001/export')) + self.assertTrue(tf.gfile.Exists(export_dir + '00000010/export')) + self.assertEquals(export_monitor.last_export_dir, os.path.join(export_dir, + '00000010')) + # Validate the signature + signature = self._get_default_signature(export_dir + '00000010/export.meta') + self.assertTrue(signature.HasField('regression_signature')) + def testExportMonitor(self): random.seed(42) x = np.random.rand(1000) diff --git a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py index 466d1aac51e..b749cd18664 100644 --- a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py +++ b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py @@ -189,6 +189,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest): train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() + lr.update_weights(train_op).run() # The high tolerance in unregularized_loss comparisons is due to the # fact that it's possible to trade off unregularized_loss vs. # regularization and still have a sum that is quite close to the @@ -248,6 +249,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest): for t in threads: t.join() + lr.update_weights(train_op).run() # The high tolerance in unregularized_loss comparisons is due to the # fact that it's possible to trade off unregularized_loss vs. @@ -294,6 +296,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest): train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() + lr.update_weights(train_op).run() # There is neither L1 nor L2 loss, so regularized and unregularized # losses should be exactly the same. @@ -346,6 +349,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest): train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() + lr.update_weights(train_op).run() self.assertAllClose(0.411608, unregularized_loss.eval(), atol=0.05) self.assertAllClose(0.525457, loss.eval(), atol=0.01) @@ -416,6 +420,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest): train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() + lr.update_weights(train_op).run() self.assertAllClose(0.226487 + 0.102902, unregularized_loss.eval(), @@ -456,6 +461,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest): train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() + lr.update_weights(train_op).run() self.assertAllClose(0.284860, unregularized_loss.eval(), atol=0.08) self.assertAllClose(0.408044, loss.eval(), atol=0.012) @@ -494,6 +500,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest): train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() + lr.update_weights(train_op).run() self.assertAllClose(0.411608, unregularized_loss.eval(), atol=0.05) self.assertAllClose(0.525457, loss.eval(), atol=0.01) predicted_labels = get_binary_predictions_for_logistic(predictions) @@ -580,6 +587,7 @@ class SdcaWithLinearLossTest(SdcaModelTest): train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() + lr.update_weights(train_op).run() # Predictions should be 2/3 of label due to minimizing regularized loss: # (label - 2 * weight)^2 / 2 + L2 * 2 * weight^2 @@ -626,6 +634,7 @@ class SdcaWithLinearLossTest(SdcaModelTest): train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() + lr.update_weights(train_op).run() # Predictions should be 1/5 of label due to minimizing regularized loss: # (label - 2 * weight)^2 + L2 * 16 * weight^2 @@ -661,6 +670,7 @@ class SdcaWithLinearLossTest(SdcaModelTest): train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() + lr.update_weights(train_op).run() # Predictions should be -4.0, 48/5 due to minimizing regularized loss: # (label - 2 * weight)^2 / 2 + L2 * 2 * weight^2 + L1 * 4 * weight @@ -696,6 +706,7 @@ class SdcaWithLinearLossTest(SdcaModelTest): train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() + lr.update_weights(train_op).run() # There are 4 (sparse) variable weights to be learned. 2 for age and 2 for # gender. Let w_1, w_2 be age weights, w_3, w_4 be gender weights, y_1, @@ -729,6 +740,7 @@ class SdcaWithLinearLossTest(SdcaModelTest): train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() + lr.update_weights(train_op).run() # The loss function for these particular features is given by: # 1/2(label_1-w_1)^2 + 1/2(label_2-w_2)^2 + \lambda/2 (w_1^2 + w_2^2). So, @@ -759,6 +771,7 @@ class SdcaWithLinearLossTest(SdcaModelTest): train_op = lr.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() + lr.update_weights(train_op).run() # The loss function for these particular features is given by: # 1/2 s_1 (label_1-w_1)^2 + 1/2 s_2(label_2-w_2)^2 + @@ -816,6 +829,7 @@ class SdcaWithHingeLossTest(SdcaModelTest): train_op = model.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() + model.update_weights(train_op).run() binary_predictions = get_binary_predictions_for_hinge(predictions) self.assertAllEqual([-1.0, 1.0], predictions.eval()) @@ -841,6 +855,7 @@ class SdcaWithHingeLossTest(SdcaModelTest): train_op = model.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() + model.update_weights(train_op).run() self.assertAllClose([1.0, -1.0], predictions.eval(), atol=0.05) self.assertAllEqual([1, 0], binary_predictions.eval()) @@ -871,6 +886,7 @@ class SdcaWithHingeLossTest(SdcaModelTest): train_op = model.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() + model.update_weights(train_op).run() # (1.0, 0.5) and (1.0, -0.5) are separable by x-axis but the datapoints # are within the margins so there is unregularized loss (1/2 per example). @@ -899,6 +915,7 @@ class SdcaWithHingeLossTest(SdcaModelTest): train_op = model.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() + model.update_weights(train_op).run() # Point (1.0, 0.5) has higher weight than (1.0, -0.5) so the model will # try to increase the margin from (1.0, 0.5). Due to regularization, @@ -953,6 +970,7 @@ class SdcaWithSmoothHingeLossTest(SdcaModelTest): train_op = model.minimize() for _ in range(_MAX_ITERATIONS): train_op.run() + model.update_weights(train_op).run() binary_predictions = get_binary_predictions_for_hinge(predictions) self.assertAllClose([-0.67, 0.67], predictions.eval(), atol=0.05) diff --git a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py index ad7c09e18f3..6c3886d247e 100644 --- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py +++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py @@ -278,7 +278,8 @@ class SdcaModel(object): ```python # Create a solver with the desired parameters. lr = tf.contrib.linear_optimizer.SdcaModel(examples, variables, options) - opt_op = lr.minimize() + min_op = lr.minimize() + opt_op = lr.update_weights(min_op) predictions = lr.predictions(examples) # Primal loss + L1 loss + L2 loss. @@ -565,35 +566,46 @@ class SdcaModel(object): for w, u in zip(self._slots['unshrinked_dense_features_weights'], dfw): update_ops.append(w.assign_add(u)) - with ops.control_dependencies(update_ops): - update_ops = [] - # Copy over unshrinked weights to user provided variables. - for i, name in enumerate( - ['sparse_features_weights', 'dense_features_weights']): - for var, slot_var in zip(self._variables[name], - self._slots['unshrinked_' + name]): - update_ops.append(var.assign(slot_var)) - - update_group = control_flow_ops.group(*update_ops) - - # Apply proximal step. - with ops.control_dependencies([update_group]): - shrink_ops = [] - for name in ['sparse_features_weights', 'dense_features_weights']: - for var in self._variables[name]: - with ops.device(var.device): - shrink_ops.append( - sdca_shrink_l1( - self._convert_n_to_tensor( - [var], as_ref=True), - l1=self._symmetric_l1_regularization(), - l2=self._symmetric_l2_regularization())) - shrink_l1 = control_flow_ops.group(*shrink_ops) if not global_step: - return shrink_l1 - with ops.control_dependencies([shrink_l1]): + return control_flow_ops.group(*update_ops) + with ops.control_dependencies(update_ops): return state_ops.assign_add(global_step, 1, name=name).op + def update_weights(self, train_op): + """Updates the model weights. + + This function must be called on at least one worker after `minimize`. + In distributed training this call can be omitted on non-chief workers to + speed up training. + + Args: + train_op: The operation returned by the `minimize` call. + + Returns: + An Operation that updates the model weights. + """ + with ops.control_dependencies([train_op]): + update_ops = [] + # Copy over unshrinked weights to user provided variables. + for name in ['sparse_features_weights', 'dense_features_weights']: + for var, slot_var in zip(self._variables[name], + self._slots['unshrinked_' + name]): + update_ops.append(var.assign(slot_var)) + + # Apply proximal step. + with ops.control_dependencies(update_ops): + update_ops = [] + for name in ['sparse_features_weights', 'dense_features_weights']: + for var in self._variables[name]: + with ops.device(var.device): + update_ops.append( + sdca_shrink_l1( + self._convert_n_to_tensor( + [var], as_ref=True), + l1=self._symmetric_l1_regularization(), + l2=self._symmetric_l2_regularization())) + return control_flow_ops.group(*update_ops) + def approximate_duality_gap(self): """Add operations to compute the approximate duality gap. diff --git a/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py b/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py index 334acf7a041..6ff4bf31753 100644 --- a/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py +++ b/tensorflow/contrib/linear_optimizer/python/sdca_optimizer.py @@ -49,7 +49,7 @@ class SDCAOptimizer(object): as `key` whose value is a `Tensor` of shape [batch_size] and dtype string. num_loss_partitions defines the number of partitions of the global loss function and should be set to (#concurrent train ops/per worker) x (#workers). - Convergence of (global) loss is guranteed if num_loss_partitions is larger or + Convergence of (global) loss is guaranteed if num_loss_partitions is larger or equal to the above product. Larger values for num_loss_partitions lead to slower convergence. The recommended value for num_loss_partitions in tf.learn (where currently there is one process per worker) is the number of workers @@ -181,4 +181,5 @@ class SDCAOptimizer(object): num_loss_partitions=self._num_loss_partitions, num_table_shards=self._num_table_shards, loss_type=loss_type)) - return sdca_model.minimize(global_step=global_step) + train_op = sdca_model.minimize(global_step=global_step) + return sdca_model, train_op diff --git a/tensorflow/contrib/losses/python/losses/loss_ops.py b/tensorflow/contrib/losses/python/losses/loss_ops.py index 704c9135656..023efd125d8 100644 --- a/tensorflow/contrib/losses/python/losses/loss_ops.py +++ b/tensorflow/contrib/losses/python/losses/loss_ops.py @@ -21,7 +21,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.framework import deprecated from tensorflow.contrib.framework.python.ops import add_arg_scope from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops @@ -43,9 +42,7 @@ __all__ = ["absolute_difference", "mean_squared_error", "sigmoid_cross_entropy", "softmax_cross_entropy", - "sparse_softmax_cross_entropy", - "sum_of_pairwise_squares", - "sum_of_squares"] + "sparse_softmax_cross_entropy"] def _scale_losses(losses, weight): @@ -486,8 +483,7 @@ def hinge_loss(logits, target, scope=None): return losses -@deprecated("2016-10-01", "Use mean_squared_error.") -def sum_of_squares(predictions, targets, weight=1.0, scope=None): +def mean_squared_error(predictions, targets, weight=1.0, scope=None): """Adds a Sum-of-Squares loss to the training procedure. `weight` acts as a coefficient for the loss. If a scalar is provided, then the @@ -512,7 +508,7 @@ def sum_of_squares(predictions, targets, weight=1.0, scope=None): ValueError: If the shape of `predictions` doesn't match that of `targets` or if the shape of `weight` is invalid. """ - with ops.name_scope(scope, "sum_of_squares_loss", + with ops.name_scope(scope, "mean_squared_error", [predictions, targets]) as scope: predictions.get_shape().assert_is_compatible_with(targets.get_shape()) if weight is None: @@ -523,17 +519,13 @@ def sum_of_squares(predictions, targets, weight=1.0, scope=None): return compute_weighted_loss(losses, weight) -mean_squared_error = sum_of_squares - - -@deprecated("2016-10-01", "Use mean_pairwise_squared_error.") -def sum_of_pairwise_squares(predictions, targets, weight=1.0, scope=None): +def mean_pairwise_squared_error(predictions, targets, weight=1.0, scope=None): """Adds a pairwise-errors-squared loss to the training procedure. - Unlike the sum_of_squares loss, which is a measure of the differences between - corresponding elements of `predictions` and `targets`, sum_of_pairwise_squares - is a measure of the differences between pairs of corresponding elements of - `predictions` and `targets`. + Unlike `mean_squared_error`, which is a measure of the differences between + corresponding elements of `predictions` and `targets`, + `mean_pairwise_squared_error` is a measure of the differences between pairs of + corresponding elements of `predictions` and `targets`. For example, if `targets`=[a, b, c] and `predictions`=[x, y, z], there are three pairs of differences are summed to compute the loss: @@ -566,7 +558,7 @@ def sum_of_pairwise_squares(predictions, targets, weight=1.0, scope=None): ValueError: If the shape of `predictions` doesn't match that of `targets` or if the shape of `weight` is invalid. """ - with ops.name_scope(scope, "sum_of_pairwise_squares_loss", + with ops.name_scope(scope, "mean_pairwise_squared_error", [predictions, targets]) as scope: predictions.get_shape().assert_is_compatible_with(targets.get_shape()) if weight is None: @@ -607,9 +599,6 @@ def sum_of_pairwise_squares(predictions, targets, weight=1.0, scope=None): return mean_loss -mean_pairwise_squared_error = sum_of_pairwise_squares - - def cosine_distance(predictions, targets, dim, weight=1.0, scope=None): """Adds a cosine-distance loss to the training procedure. diff --git a/tensorflow/contrib/losses/python/losses/loss_ops_test.py b/tensorflow/contrib/losses/python/losses/loss_ops_test.py index 88648bad26a..363caf4f3d5 100644 --- a/tensorflow/contrib/losses/python/losses/loss_ops_test.py +++ b/tensorflow/contrib/losses/python/losses/loss_ops_test.py @@ -779,12 +779,6 @@ class MeanSquaredErrorTest(tf.test.TestCase): self._predictions = tf.constant([4, 8, 12, 8, 1, 3], shape=(2, 3)) self._targets = tf.constant([1, 9, 2, -5, -2, 6], shape=(2, 3)) - def testDeprecatedName(self): - loss = tf.contrib.losses.sum_of_squares( - self._predictions, self._predictions) - with self.test_session(): - self.assertAlmostEqual(0.0, loss.eval(), 3) - def testValueErrorThrownWhenWeightIsNone(self): with self.test_session(): with self.assertRaises(ValueError): @@ -875,13 +869,6 @@ class MeanPairwiseSquaresErrorTest(tf.test.TestCase): self._expected_losses = np.divide(total, 9.0) - def testDeprecatedName(self): - loss = tf.contrib.losses.sum_of_pairwise_squares( - predictions=tf.constant(self._predictions), - targets=tf.constant(self._targets)) - with self.test_session(): - self.assertAlmostEqual(np.sum(self._expected_losses), loss.eval(), 3) - def testValueErrorThrownWhenWeightIsNone(self): with self.test_session(): with self.assertRaises(ValueError): diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile index 59eabe2bbb3..3f1cd91a5b0 100644 --- a/tensorflow/contrib/makefile/Makefile +++ b/tensorflow/contrib/makefile/Makefile @@ -136,7 +136,7 @@ $(shell mkdir -p $(DEPDIR) >/dev/null) # Settings for the target compiler. CXX := $(CC_PREFIX) gcc -OPTFLAGS := -O0 +OPTFLAGS := -O2 CXXFLAGS := --std=c++11 -DIS_SLIM_BUILD -fno-exceptions -DNDEBUG $(OPTFLAGS) LDFLAGS := \ -L/usr/local/lib @@ -229,6 +229,8 @@ ifeq ($(TARGET),ANDROID) --sysroot $(NDK_ROOT)/platforms/android-21/arch-arm \ -Wno-narrowing \ -march=armv7-a \ +-mfloat-abi=softfp \ +-mfpu=neon \ -fPIE INCLUDES = \ diff --git a/tensorflow/contrib/makefile/build_all_android.sh b/tensorflow/contrib/makefile/build_all_android.sh index bf36be23f7c..80cc54539c1 100755 --- a/tensorflow/contrib/makefile/build_all_android.sh +++ b/tensorflow/contrib/makefile/build_all_android.sh @@ -66,6 +66,7 @@ if [[ "${USE_HEXAGON}" == "true" ]]; then HEXAGON_INCLUDE="${HEXAGON_PARENT_DIR}/include" fi +# Recommend make -j<#jobs> e.g. -j8 to speed up build on multi-core machine if [[ -z "${BUILD_TARGET}" ]]; then make -f tensorflow/contrib/makefile/Makefile \ TARGET=ANDROID NDK_ROOT="${NDK_ROOT}" CC_PREFIX="${CC_PREFIX}" \ diff --git a/tensorflow/contrib/metrics/python/ops/confusion_matrix_ops.py b/tensorflow/contrib/metrics/python/ops/confusion_matrix_ops.py index a4469029c1e..1820f6bf17d 100644 --- a/tensorflow/contrib/metrics/python/ops/confusion_matrix_ops.py +++ b/tensorflow/contrib/metrics/python/ops/confusion_matrix_ops.py @@ -33,27 +33,34 @@ def confusion_matrix(predictions, labels, num_classes=None, dtype=dtypes.int32, Calculate the Confusion Matrix for a pair of prediction and label 1-D int arrays. - Considering a prediction array such as: `[1, 2, 3]` - And a label array such as: `[2, 2, 3]` - - The confusion matrix returned would be the following one: - - ```python - [[0, 0, 0] - [0, 1, 0] - [0, 1, 0] - [0, 0, 1]] - ``` - - If `weights` is not None, then the confusion matrix elements are the - corresponding `weights` elements. - - Where the matrix rows represent the prediction labels and the columns + The matrix rows represent the prediction labels and the columns represents the real labels. The confusion matrix is always a 2-D array - of shape [n, n], where n is the number of valid labels for a given + of shape `[n, n]`, where `n` is the number of valid labels for a given classification task. Both prediction and labels must be 1-D arrays of the same shape in order for this function to work. + If `num_classes` is None, then `num_classes` will be set to the one plus + the maximum value in either predictions or labels. + Class labels are expected to start at 0. E.g., if `num_classes` was + three, then the possible labels would be `[0, 1, 2]`. + + If `weights` is not `None`, then each prediction contributes its + corresponding weight to the total value of the confusion matrix cell. + + For example: + + ```python + tf.contrib.metrics.confusion_matrix([1, 2, 4], [2, 2, 4]) ==> + [[0 0 0 0 0] + [0 0 1 0 0] + [0 0 1 0 0] + [0 0 0 0 0] + [0 0 0 0 1]] + ``` + + Note that the possible labels are assumed to be `[0, 1, 2, 3, 4]`, + resulting in a 5x5 confusion matrix. + Args: predictions: A 1-D array representing the predictions for a given classification. diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py index 6cbe01e6713..2359fbd5691 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py @@ -22,6 +22,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.contrib.framework import deprecated from tensorflow.contrib.framework import deprecated_args from tensorflow.contrib.framework import tensor_util from tensorflow.contrib.framework.python.ops import variables as contrib_variables @@ -113,13 +114,15 @@ def _safe_scalar_div(numerator, denominator, name): name=name) -def _create_local(name, shape=None, collections=None, dtype=dtypes.float32): +def _create_local(name, shape, collections=None, validate_shape=True, + dtype=dtypes.float32): """Creates a new local variable. Args: name: The name of the new or existing variable. shape: Shape of the new or existing variable. collections: A list of collection names to which the Variable will be added. + validate_shape: Whether to validate the shape of the variable. dtype: Data type of the variables. Returns: @@ -132,7 +135,8 @@ def _create_local(name, shape=None, collections=None, dtype=dtypes.float32): initial_value=array_ops.zeros(shape, dtype=dtype), name=name, trainable=False, - collections=collections) + collections=collections, + validate_shape=validate_shape) def _count_condition(values, weights=None, metrics_collections=None, @@ -1225,6 +1229,8 @@ def _at_k_name(name, k, class_id=None): return name +@deprecated('2016-11-08', 'Please use `streaming_sparse_recall_at_k`, ' + 'and reshape labels from [batch_size] to [batch_size, 1].') @deprecated_args(IGNORE_MASK_DATE, IGNORE_MASK_INSTRUCTIONS, 'ignore_mask') def streaming_recall_at_k(predictions, labels, k, ignore_mask=None, weights=None, metrics_collections=None, @@ -1328,7 +1334,7 @@ def streaming_sparse_recall_at_k(predictions, labels: `int64` `Tensor` or `SparseTensor` with shape [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of target classes for the associated prediction. Commonly, N=1 and `labels` - has shape [batch_size, num_labels]. [D1, ... DN] must match `labels`. + has shape [batch_size, num_labels]. [D1, ... DN] must match `predictions`. Values should be in range [0, num_classes], where num_classes is the last dimension of `predictions`. k: Integer, k for @k metric. @@ -1429,7 +1435,7 @@ def streaming_sparse_precision_at_k(predictions, [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of target classes for the associated prediction. Commonly, N=1 and `labels` has shape [batch_size, num_labels]. [D1, ... DN] must match - `predictions_idx`. Values should be in range [0, num_classes], where + `predictions`. Values should be in range [0, num_classes], where num_classes is the last dimension of `predictions`. k: Integer, k for @k metric. class_id: Integer class ID for which we want binary metrics. This should be @@ -1596,7 +1602,7 @@ def sparse_average_precision_at_k(predictions, labels, k): [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of target classes for the associated prediction. Commonly, N=1 and `labels` has shape [batch_size, num_labels]. [D1, ... DN] must match - `predictions_idx`. Values should be in range [0, num_classes], where + `predictions`. Values should be in range [0, num_classes], where num_classes is the last dimension of `predictions`. k: Integer, k for @k metric. This will calculate an average precision for range `[1,k]`, as documented above. @@ -1698,7 +1704,7 @@ def streaming_sparse_average_precision_at_k(predictions, [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of target classes for the associated prediction. Commonly, N=1 and `labels` has shape [batch_size, num_labels]. [D1, ... DN] must match - `predictions_idx`. Values should be in range [0, num_classes], where + `predictions`. Values should be in range [0, num_classes], where num_classes is the last dimension of `predictions`. k: Integer, k for @k metric. This will calculate an average precision for range `[1,k]`, as documented above. @@ -1770,9 +1776,8 @@ def _select_class_id(ids, selected_id): selected_id: Int id to select. Returns: - `SparseTensor` of same dimensions as `ids`, except for the last dimension, - which might be smaller. This contains only the entries equal to - `selected_id`. + `SparseTensor` of same dimensions as `ids`. This contains only the entries + equal to `selected_id`. """ if isinstance(ids, (ops.SparseTensor, ops.SparseTensorValue)): return sparse_ops.sparse_retain( @@ -1782,7 +1787,7 @@ def _select_class_id(ids, selected_id): # tf.equal and tf.reduce_any? # Shape of filled IDs is the same as `ids` with the last dim collapsed to 1. - ids_shape = array_ops.shape(ids) + ids_shape = array_ops.shape(ids, out_type=dtypes.int64) ids_last_dim = array_ops.size(ids_shape) - 1 filled_selected_id_shape = math_ops.reduced_shape( ids_shape, array_ops.reshape(ids_last_dim, [1])) @@ -1790,7 +1795,9 @@ def _select_class_id(ids, selected_id): # Intersect `ids` with the selected ID. filled_selected_id = array_ops.fill( filled_selected_id_shape, math_ops.to_int64(selected_id)) - return set_ops.set_intersection(filled_selected_id, ids) + result = set_ops.set_intersection(filled_selected_id, ids) + return ops.SparseTensor( + indices=result.indices, values=result.values, shape=ids_shape) def _maybe_select_class_id(labels, predictions_idx, selected_id=None): @@ -2827,7 +2834,8 @@ def streaming_concat(values, # applied to contiguous slices init_size = 0 if max_size is None else max_size init_shape = [init_size] + fixed_shape - array = _create_local('array', shape=init_shape, dtype=values.dtype) + array = _create_local( + 'array', shape=init_shape, validate_shape=False, dtype=values.dtype) size = _create_local('size', shape=[], dtype=dtypes.int32) perm = [0 if n == axis else n + 1 if n < axis else n for n in range(ndim)] @@ -2900,6 +2908,7 @@ def aggregate_metric_map(names_to_tuples): This function is useful for pairing metric names with their associated value and update ops when the list of metrics is long. For example: + ```python metrics_to_values, metrics_to_updates = slim.metrics.aggregate_metric_map({ 'Mean Absolute Error': new_slim.metrics.streaming_mean_absolute_error( predictions, labels, weights), @@ -2910,6 +2919,7 @@ def aggregate_metric_map(names_to_tuples): 'RMSE Log': new_slim.metrics.streaming_root_mean_squared_error( predictions, labels, weights), }) + ``` Args: names_to_tuples: a map of metric names to tuples, each of which contain the diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py index efcd1de4fe6..40a68794563 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py @@ -132,6 +132,10 @@ def _binary_3d_label_to_sparse(labels): return tf.SparseTensor.from_value(_binary_3d_label_to_sparse_value(labels)) +def _assert_nan(test_case, actual): + test_case.assertTrue(math.isnan(actual), 'Expected NAN, got %s.' % actual) + + class StreamingMeanTest(tf.test.TestCase): def setUp(self): @@ -1603,6 +1607,9 @@ class StreamingPrecisionRecallThresholdsTest(tf.test.TestCase): self.assertAlmostEqual(expected_rec, rec.eval(), 2) +# TODO(ptucker): Remove when we remove `streaming_recall_at_k`. +# This op will be deprecated soon in favor of `streaming_sparse_recall_at_k`. +# Until then, this test validates that both ops yield the same results. class StreamingRecallAtKTest(tf.test.TestCase): def setUp(self): @@ -1639,57 +1646,78 @@ class StreamingRecallAtKTest(tf.test.TestCase): predictions = tf.constant(self._np_predictions, shape=(self._batch_size, self._num_classes), dtype=tf.float32) - labels = tf.constant(self._np_labels, shape=(self._batch_size,)) + labels = tf.constant( + self._np_labels, shape=(self._batch_size,), dtype=tf.int64) recall, update_op = metrics.streaming_recall_at_k( predictions, labels, k=1) + sp_recall, sp_update_op = metrics.streaming_sparse_recall_at_k( + predictions, tf.reshape(labels, (self._batch_size, 1)), k=1) with self.test_session() as sess: sess.run(tf.initialize_local_variables()) self.assertEqual(0.25, sess.run(update_op)) self.assertEqual(0.25, recall.eval()) + self.assertEqual(0.25, sess.run(sp_update_op)) + self.assertEqual(0.25, sp_recall.eval()) def testSingleUpdateKIs2(self): predictions = tf.constant(self._np_predictions, shape=(self._batch_size, self._num_classes), dtype=tf.float32) - labels = tf.constant(self._np_labels, shape=(self._batch_size,)) + labels = tf.constant( + self._np_labels, shape=(self._batch_size,), dtype=tf.int64) recall, update_op = metrics.streaming_recall_at_k( predictions, labels, k=2) + sp_recall, sp_update_op = metrics.streaming_sparse_recall_at_k( + predictions, tf.reshape(labels, (self._batch_size, 1)), k=2) with self.test_session() as sess: sess.run(tf.initialize_local_variables()) self.assertEqual(0.5, sess.run(update_op)) self.assertEqual(0.5, recall.eval()) + self.assertEqual(0.5, sess.run(sp_update_op)) + self.assertEqual(0.5, sp_recall.eval()) def testSingleUpdateKIs3(self): predictions = tf.constant(self._np_predictions, shape=(self._batch_size, self._num_classes), dtype=tf.float32) - labels = tf.constant(self._np_labels, shape=(self._batch_size,)) + labels = tf.constant( + self._np_labels, shape=(self._batch_size,), dtype=tf.int64) recall, update_op = metrics.streaming_recall_at_k( predictions, labels, k=3) + sp_recall, sp_update_op = metrics.streaming_sparse_recall_at_k( + predictions, tf.reshape(labels, (self._batch_size, 1)), k=3) with self.test_session() as sess: sess.run(tf.initialize_local_variables()) self.assertEqual(1.0, sess.run(update_op)) self.assertEqual(1.0, recall.eval()) + self.assertEqual(1.0, sess.run(sp_update_op)) + self.assertEqual(1.0, sp_recall.eval()) def testSingleUpdateSomeMissingKIs2(self): predictions = tf.constant(self._np_predictions, shape=(self._batch_size, self._num_classes), dtype=tf.float32) - labels = tf.constant(self._np_labels, shape=(self._batch_size,)) + labels = tf.constant( + self._np_labels, shape=(self._batch_size,), dtype=tf.int64) weights = tf.constant([0, 1, 1, 1], shape=(self._batch_size,), dtype=tf.float32) mask = tf.constant([False, False, True, False], shape=(self._batch_size,), dtype=tf.bool) recall, update_op = metrics.streaming_recall_at_k( predictions, labels, k=2, ignore_mask=mask, weights=weights) + sp_recall, sp_update_op = metrics.streaming_sparse_recall_at_k( + predictions, tf.reshape(labels, (self._batch_size, 1)), k=2, + ignore_mask=mask, weights=weights) with self.test_session() as sess: sess.run(tf.initialize_local_variables()) self.assertEqual(1.0, sess.run(update_op)) self.assertEqual(1.0, recall.eval()) + self.assertEqual(1.0, sess.run(sp_update_op)) + self.assertEqual(1.0, sp_recall.eval()) class StreamingSparsePrecisionTest(tf.test.TestCase): @@ -1718,8 +1746,8 @@ class StreamingSparsePrecisionTest(tf.test.TestCase): # Run per-step op and assert expected values. if math.isnan(expected): - self.assertTrue(math.isnan(update.eval())) - self.assertTrue(math.isnan(metric.eval())) + _assert_nan(self, update.eval()) + _assert_nan(self, metric.eval()) else: self.assertEqual(expected, update.eval()) self.assertEqual(expected, metric.eval()) @@ -1735,7 +1763,7 @@ class StreamingSparsePrecisionTest(tf.test.TestCase): ignore_mask = tf.constant(ignore_mask, tf.bool) predictions = tf.constant(predictions, tf.float32) metric = metric_ops.sparse_average_precision_at_k( - predictions=predictions, labels=labels, k=k) + predictions, labels, k) self.assertAllEqual(expected, metric.eval()) def _test_streaming_sparse_average_precision_at_k( @@ -1745,7 +1773,7 @@ class StreamingSparsePrecisionTest(tf.test.TestCase): weights = tf.constant(weights, tf.float32) predictions = tf.constant(predictions, tf.float32) metric, update = metrics.streaming_sparse_average_precision_at_k( - predictions=predictions, labels=labels, k=k, weights=weights) + predictions, labels, k, weights=weights) # Fails without initialized vars. self.assertRaises(tf.OpError, metric.eval) @@ -1755,8 +1783,8 @@ class StreamingSparsePrecisionTest(tf.test.TestCase): # Run per-step op and assert expected values. if math.isnan(expected): - self.assertTrue(math.isnan(update.eval())) - self.assertTrue(math.isnan(metric.eval())) + _assert_nan(self, update.eval()) + _assert_nan(self, metric.eval()) else: self.assertAlmostEqual(expected, update.eval()) self.assertAlmostEqual(expected, metric.eval()) @@ -1849,89 +1877,97 @@ class StreamingSparsePrecisionTest(tf.test.TestCase): predictions, labels, k, expected=streaming_average_precision[i], weights=weights) - def test_one_label_at_k1_no_predictions(self): + def test_one_label_at_k1_nan(self): predictions = [[0.1, 0.3, 0.2, 0.4], [0.1, 0.2, 0.3, 0.4]] - labels = [[0, 0, 0, 1], [0, 0, 1, 0]] - sp_labels = _binary_2d_label_to_sparse_value(labels) + sparse_labels = _binary_2d_label_to_sparse_value( + [[0, 0, 0, 1], [0, 0, 1, 0]]) + dense_labels = np.array([[3], [2]], dtype=np.int64) - # Classes 0,1,2 have 0 predictions, class 4 is out of range. - for class_id in [0, 1, 2, 4]: - self._test_streaming_sparse_precision_at_k( - predictions, sp_labels, k=1, expected=NAN, class_id=class_id) + for labels in (sparse_labels, dense_labels): + # Classes 0,1,2 have 0 predictions, class 4 is out of range. + for class_id in (0, 1, 2, 4): + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=1, expected=NAN, class_id=class_id) def test_one_label_at_k1(self): predictions = [[0.1, 0.3, 0.2, 0.4], [0.1, 0.2, 0.3, 0.4]] - labels = [[0, 0, 0, 1], [0, 0, 1, 0]] - sp_labels = _binary_2d_label_to_sparse_value(labels) + sparse_labels = _binary_2d_label_to_sparse_value( + [[0, 0, 0, 1], [0, 0, 1, 0]]) + dense_labels = np.array([[3], [2]], dtype=np.int64) - # Class 3: 1 label, 2 predictions, 1 correct. - self._test_streaming_sparse_precision_at_k( - predictions, sp_labels, k=1, expected=1.0 / 2, class_id=3) + for labels in (sparse_labels, dense_labels): + # Class 3: 1 label, 2 predictions, 1 correct. + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=1, expected=1.0 / 2, class_id=3) - # All classes: 2 labels, 2 predictions, 1 correct. - self._test_streaming_sparse_precision_at_k( - predictions, sp_labels, k=1, expected=1.0 / 2) + # All classes: 2 labels, 2 predictions, 1 correct. + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=1, expected=1.0 / 2) def test_three_labels_at_k5_no_predictions(self): predictions = [ [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9], [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6] ] - labels = [ + sparse_labels = _binary_2d_label_to_sparse_value([ [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], [0, 1, 1, 0, 0, 1, 0, 0, 0, 0] - ] - sp_labels = _binary_2d_label_to_sparse_value(labels) + ]) + dense_labels = np.array([[2, 7, 8], [1, 2, 5]], dtype=np.int64) - # Classes 1,3,8 have 0 predictions, class 10 is out of range. - for class_id in [1, 3, 8, 10]: - self._test_streaming_sparse_precision_at_k( - predictions, sp_labels, k=5, expected=NAN, class_id=class_id) + for labels in (sparse_labels, dense_labels): + # Classes 1,3,8 have 0 predictions, class 10 is out of range. + for class_id in (1, 3, 8, 10): + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=5, expected=NAN, class_id=class_id) def test_three_labels_at_k5_no_labels(self): predictions = [ [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9], [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6] ] - labels = [ + sparse_labels = _binary_2d_label_to_sparse_value([ [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], [0, 1, 1, 0, 0, 1, 0, 0, 0, 0] - ] - sp_labels = _binary_2d_label_to_sparse_value(labels) + ]) + dense_labels = np.array([[2, 7, 8], [1, 2, 5]], dtype=np.int64) - # Classes 0,4,6,9: 0 labels, >=1 prediction. - for class_id in [0, 4, 6, 9]: - self._test_streaming_sparse_precision_at_k( - predictions, sp_labels, k=5, expected=0.0, class_id=class_id) + for labels in (sparse_labels, dense_labels): + # Classes 0,4,6,9: 0 labels, >=1 prediction. + for class_id in (0, 4, 6, 9): + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=5, expected=0.0, class_id=class_id) def test_three_labels_at_k5(self): predictions = [ [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9], [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6] ] - labels = [ + sparse_labels = _binary_2d_label_to_sparse_value([ [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], [0, 1, 1, 0, 0, 1, 0, 0, 0, 0] - ] - sp_labels = _binary_2d_label_to_sparse_value(labels) + ]) + dense_labels = np.array([[2, 7, 8], [1, 2, 5]], dtype=np.int64) - # Class 2: 2 labels, 2 correct predictions. - self._test_streaming_sparse_precision_at_k( - predictions, sp_labels, k=5, expected=2.0 / 2, class_id=2) + for labels in (sparse_labels, dense_labels): + # Class 2: 2 labels, 2 correct predictions. + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=5, expected=2.0 / 2, + class_id=2) - # Class 5: 1 label, 1 correct prediction. - self._test_streaming_sparse_precision_at_k( - predictions, sp_labels, k=5, expected=1.0 / 1, class_id=5) + # Class 5: 1 label, 1 correct prediction. + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=5, expected=1.0 / 1, class_id=5) - # Class 7: 1 label, 1 incorrect prediction. - self._test_streaming_sparse_precision_at_k( - predictions, sp_labels, k=5, expected=0.0 / 1, class_id=7) + # Class 7: 1 label, 1 incorrect prediction. + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=5, expected=0.0 / 1, class_id=7) - # All classes: 10 predictions, 3 correct. - self._test_streaming_sparse_precision_at_k( - predictions, sp_labels, k=5, expected=3.0 / 10) + # All classes: 10 predictions, 3 correct. + self._test_streaming_sparse_precision_at_k( + predictions, labels, k=5, expected=3.0 / 10) - def test_3d_no_predictions(self): + def test_3d_nan(self): predictions = [[ [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9], [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6] @@ -1939,19 +1975,18 @@ class StreamingSparsePrecisionTest(tf.test.TestCase): [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6], [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9] ]] - labels = [[ + labels = _binary_3d_label_to_sparse_value([[ [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], [0, 1, 1, 0, 0, 1, 0, 0, 0, 0] ], [ [0, 1, 1, 0, 0, 1, 0, 1, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 1, 0] - ]] - sp_labels = _binary_3d_label_to_sparse_value(labels) + ]]) # Classes 1,3,8 have 0 predictions, class 10 is out of range. - for class_id in [1, 3, 8, 10]: + for class_id in (1, 3, 8, 10): self._test_streaming_sparse_precision_at_k( - predictions, sp_labels, k=5, expected=NAN, class_id=class_id) + predictions, labels, k=5, expected=NAN, class_id=class_id) def test_3d_no_labels(self): predictions = [[ @@ -1961,19 +1996,18 @@ class StreamingSparsePrecisionTest(tf.test.TestCase): [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6], [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9] ]] - labels = [[ + labels = _binary_3d_label_to_sparse_value([[ [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], [0, 1, 1, 0, 0, 1, 0, 0, 0, 0] ], [ [0, 1, 1, 0, 0, 1, 0, 1, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 1, 0] - ]] - sp_labels = _binary_3d_label_to_sparse_value(labels) + ]]) # Classes 0,4,6,9: 0 labels, >=1 prediction. - for class_id in [0, 4, 6, 9]: + for class_id in (0, 4, 6, 9): self._test_streaming_sparse_precision_at_k( - predictions, sp_labels, k=5, expected=0.0, class_id=class_id) + predictions, labels, k=5, expected=0.0, class_id=class_id) def test_3d(self): predictions = [[ @@ -1983,30 +2017,29 @@ class StreamingSparsePrecisionTest(tf.test.TestCase): [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6], [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9] ]] - labels = [[ + labels = _binary_3d_label_to_sparse_value([[ [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], [0, 1, 1, 0, 0, 1, 0, 0, 0, 0] ], [ [0, 1, 1, 0, 0, 1, 0, 1, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 1, 0] - ]] - sp_labels = _binary_3d_label_to_sparse_value(labels) + ]]) # Class 2: 4 predictions, all correct. self._test_streaming_sparse_precision_at_k( - predictions, sp_labels, k=5, expected=4.0 / 4, class_id=2) + predictions, labels, k=5, expected=4.0 / 4, class_id=2) # Class 5: 2 predictions, both correct. self._test_streaming_sparse_precision_at_k( - predictions, sp_labels, k=5, expected=2.0 / 2, class_id=5) + predictions, labels, k=5, expected=2.0 / 2, class_id=5) # Class 7: 2 predictions, 1 correct. self._test_streaming_sparse_precision_at_k( - predictions, sp_labels, k=5, expected=1.0 / 2, class_id=7) + predictions, labels, k=5, expected=1.0 / 2, class_id=7) # All classes: 20 predictions, 7 correct. self._test_streaming_sparse_precision_at_k( - predictions, sp_labels, k=5, expected=7.0 / 20) + predictions, labels, k=5, expected=7.0 / 20) def test_3d_ignore_all(self): predictions = [[ @@ -2016,28 +2049,26 @@ class StreamingSparsePrecisionTest(tf.test.TestCase): [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6], [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9] ]] - labels = [[ + labels = _binary_3d_label_to_sparse_value([[ [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], [0, 1, 1, 0, 0, 1, 0, 0, 0, 0] ], [ [0, 1, 1, 0, 0, 1, 0, 1, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 1, 0] - ]] - sp_labels = _binary_3d_label_to_sparse_value(labels) + ]]) for class_id in xrange(10): self._test_streaming_sparse_precision_at_k( - predictions, sp_labels, k=5, expected=NAN, class_id=class_id, + predictions, labels, k=5, expected=NAN, class_id=class_id, weights=[[0], [0]]) self._test_streaming_sparse_precision_at_k( - predictions, sp_labels, k=5, expected=NAN, class_id=class_id, + predictions, labels, k=5, expected=NAN, class_id=class_id, weights=[[0, 0], [0, 0]]) self._test_streaming_sparse_precision_at_k( - predictions, sp_labels, k=5, expected=NAN, - ignore_mask=[[False], [True]], weights=[[0], [1]]) + predictions, labels, k=5, expected=NAN, ignore_mask=[[False], [True]], + weights=[[0], [1]]) self._test_streaming_sparse_precision_at_k( - predictions, sp_labels, k=5, expected=NAN, - weights=[[0, 0], [0, 0]]) + predictions, labels, k=5, expected=NAN, weights=[[0, 0], [0, 0]]) def test_3d_ignore_some(self): predictions = [[ @@ -2047,43 +2078,42 @@ class StreamingSparsePrecisionTest(tf.test.TestCase): [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6], [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9] ]] - labels = [[ + labels = _binary_3d_label_to_sparse_value([[ [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], [0, 1, 1, 0, 0, 1, 0, 0, 0, 0] ], [ [0, 1, 1, 0, 0, 1, 0, 1, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 1, 0] - ]] - sp_labels = _binary_3d_label_to_sparse_value(labels) + ]]) # Class 2: 2 predictions, both correct. self._test_streaming_sparse_precision_at_k( - predictions, sp_labels, k=5, expected=2.0 / 2.0, class_id=2, + predictions, labels, k=5, expected=2.0 / 2.0, class_id=2, ignore_mask=[[False], [False]], weights=[[1], [0]]) # Class 2: 2 predictions, both correct. self._test_streaming_sparse_precision_at_k( - predictions, sp_labels, k=5, expected=2.0 / 2.0, class_id=2, + predictions, labels, k=5, expected=2.0 / 2.0, class_id=2, ignore_mask=[[False], [False]], weights=[[0], [1]]) # Class 7: 1 incorrect prediction. self._test_streaming_sparse_precision_at_k( - predictions, sp_labels, k=5, expected=0.0 / 1.0, class_id=7, + predictions, labels, k=5, expected=0.0 / 1.0, class_id=7, ignore_mask=[[False], [True]], weights=[[1], [1]]) # Class 7: 1 correct prediction. self._test_streaming_sparse_precision_at_k( - predictions, sp_labels, k=5, expected=1.0 / 1.0, class_id=7, + predictions, labels, k=5, expected=1.0 / 1.0, class_id=7, ignore_mask=[[True], [False]], weights=[[1], [1]]) # Class 7: no predictions. self._test_streaming_sparse_precision_at_k( - predictions, sp_labels, k=5, expected=NAN, class_id=7, + predictions, labels, k=5, expected=NAN, class_id=7, weights=[[1, 0], [0, 1]]) # Class 7: 2 predictions, 1 correct. self._test_streaming_sparse_precision_at_k( - predictions, sp_labels, k=5, expected=1.0 / 2.0, class_id=7, + predictions, labels, k=5, expected=1.0 / 2.0, class_id=7, weights=[[0, 1], [1, 0]]) def test_sparse_tensor_value(self): @@ -2127,177 +2157,172 @@ class StreamingSparseRecallTest(tf.test.TestCase): # Run per-step op and assert expected values. if math.isnan(expected): - self.assertTrue(math.isnan(update.eval())) - self.assertTrue(math.isnan(metric.eval())) + _assert_nan(self, update.eval()) + _assert_nan(self, metric.eval()) else: self.assertEqual(expected, update.eval()) self.assertEqual(expected, metric.eval()) - def test_one_label_at_k1_empty_classes(self): + def test_one_label_at_k1_nan(self): predictions = [[0.1, 0.3, 0.2, 0.4], [0.1, 0.2, 0.3, 0.4]] - labels = [[0, 0, 0, 1], [0, 0, 1, 0]] - sp_labels = _binary_2d_label_to_sparse_value(labels) + sparse_labels = _binary_2d_label_to_sparse_value( + [[0, 0, 0, 1], [0, 0, 1, 0]]) + dense_labels = np.array([[3], [2]], dtype=np.int64) # Classes 0,1 have 0 labels, 0 predictions, class 4 is out of range. - for class_id in [0, 1, 4]: - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=1, expected=NAN, - class_id=class_id) + for labels in (sparse_labels, dense_labels): + for class_id in (0, 1, 4): + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=NAN, + class_id=class_id) def test_one_label_at_k1_no_predictions(self): predictions = [[0.1, 0.3, 0.2, 0.4], [0.1, 0.2, 0.3, 0.4]] - labels = [[0, 0, 0, 1], [0, 0, 1, 0]] - sp_labels = _binary_2d_label_to_sparse_value(labels) + sparse_labels = _binary_2d_label_to_sparse_value( + [[0, 0, 0, 1], [0, 0, 1, 0]]) + dense_labels = np.array([[3], [2]], dtype=np.int64) - # Class 2: 0 predictions. - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=1, expected=0.0, - class_id=2) + for labels in (sparse_labels, dense_labels): + # Class 2: 0 predictions. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=0.0, + class_id=2) def test_one_label_at_k1(self): predictions = [[0.1, 0.3, 0.2, 0.4], [0.1, 0.2, 0.3, 0.4]] - labels = [[0, 0, 0, 1], [0, 0, 1, 0]] - sp_labels = _binary_2d_label_to_sparse_value(labels) + sparse_labels = _binary_2d_label_to_sparse_value( + [[0, 0, 0, 1], [0, 0, 1, 0]]) + dense_labels = np.array([[3], [2]], dtype=np.int64) - # Class 3: 1 label, 2 predictions, 1 correct. - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 1, - class_id=3) + for labels in (sparse_labels, dense_labels): + # Class 3: 1 label, 2 predictions, 1 correct. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=1.0 / 1, + class_id=3) - # All classes: 2 labels, 2 predictions, 1 correct. - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 2) + # All classes: 2 labels, 2 predictions, 1 correct. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=1.0 / 2) def test_one_label_at_k1_weighted(self): predictions = [[0.1, 0.3, 0.2, 0.4], [0.1, 0.2, 0.3, 0.4]] - labels = [[0, 0, 0, 1], [0, 0, 1, 0]] - sp_labels = _binary_2d_label_to_sparse_value(labels) + sparse_labels = _binary_2d_label_to_sparse_value( + [[0, 0, 0, 1], [0, 0, 1, 0]]) + dense_labels = np.array([[3], [2]], dtype=np.int64) - # Class 3: 1 label, 2 predictions, 1 correct. - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=1, expected=NAN, - class_id=3, weights=(0.0,)) - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 1, - class_id=3, weights=(1.0,)) - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 1, - class_id=3, weights=(2.0,)) - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=1, expected=NAN, - class_id=3, weights=(0.0, 0.0)) - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=1, expected=NAN, - class_id=3, weights=(0.0, 1.0)) - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 1, - class_id=3, weights=(1.0, 0.0)) - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 1, - class_id=3, weights=(1.0, 1.0)) - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=1, expected=2.0 / 2, - class_id=3, weights=(2.0, 3.0)) - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=1, expected=3.0 / 3, - class_id=3, weights=(3.0, 2.0)) - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=1, expected=0.3 / 0.3, - class_id=3, weights=(0.3, 0.6)) - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=1, expected=0.6 / 0.6, - class_id=3, weights=(0.6, 0.3)) + for labels in (sparse_labels, dense_labels): + # Class 3: 1 label, 2 predictions, 1 correct. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=NAN, class_id=3, weights=(0.0,)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=1.0 / 1, class_id=3, + weights=(1.0,)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=1.0 / 1, class_id=3, + weights=(2.0,)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=NAN, class_id=3, + weights=(0.0, 0.0)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=NAN, class_id=3, + weights=(0.0, 1.0)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=1.0 / 1, class_id=3, + weights=(1.0, 0.0)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=1.0 / 1, class_id=3, + weights=(1.0, 1.0)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=2.0 / 2, class_id=3, + weights=(2.0, 3.0)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=3.0 / 3, class_id=3, + weights=(3.0, 2.0)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=0.3 / 0.3, class_id=3, + weights=(0.3, 0.6)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=0.6 / 0.6, class_id=3, + weights=(0.6, 0.3)) - # All classes: 2 labels, 2 predictions, 1 correct. - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=1, expected=NAN, - weights=(0.0,)) - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 2, - weights=(1.0,)) - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 2, - weights=(2.0,)) - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 1, - weights=(1.0, 0.0)) - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=1, expected=0.0 / 1, - weights=(0.0, 1.0)) - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=1, expected=1.0 / 2, - weights=(1.0, 1.0)) - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=1, expected=2.0 / 5, - weights=(2.0, 3.0)) - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=1, expected=3.0 / 5, - weights=(3.0, 2.0)) - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=1, expected=0.3 / 0.9, - weights=(0.3, 0.6)) - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=1, expected=0.6 / 0.9, - weights=(0.6, 0.3)) + # All classes: 2 labels, 2 predictions, 1 correct. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=NAN, weights=(0.0,)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=1.0 / 2, weights=(1.0,)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=1.0 / 2, weights=(2.0,)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=1.0 / 1, weights=(1.0, 0.0)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=0.0 / 1, weights=(0.0, 1.0)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=1.0 / 2, weights=(1.0, 1.0)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=2.0 / 5, weights=(2.0, 3.0)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=3.0 / 5, weights=(3.0, 2.0)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=0.3 / 0.9, weights=(0.3, 0.6)) + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=1, expected=0.6 / 0.9, weights=(0.6, 0.3)) - def test_three_labels_at_k5_no_labels(self): + def test_three_labels_at_k5_nan(self): predictions = [ [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9], [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6]] - labels = [ + sparse_labels = _binary_2d_label_to_sparse_value([ [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], - [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]] - sp_labels = _binary_2d_label_to_sparse_value(labels) + [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]]) + dense_labels = np.array([[2, 7, 8], [1, 2, 5]], dtype=np.int64) - # Classes 0,3,4,6,9 have 0 labels, class 10 is out of range. - for class_id in [0, 3, 4, 6, 9, 10]: - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=5, expected=NAN, - class_id=class_id) + for labels in (sparse_labels, dense_labels): + # Classes 0,3,4,6,9 have 0 labels, class 10 is out of range. + for class_id in (0, 3, 4, 6, 9, 10): + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=NAN, class_id=class_id) def test_three_labels_at_k5_no_predictions(self): predictions = [ [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9], [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6]] - labels = [ + sparse_labels = _binary_2d_label_to_sparse_value([ [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], - [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]] - sp_labels = _binary_2d_label_to_sparse_value(labels) + [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]]) + dense_labels = np.array([[2, 7, 8], [1, 2, 5]], dtype=np.int64) - # Class 8: 1 label, no predictions. - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=5, expected=0.0 / 1, - class_id=8) + for labels in (sparse_labels, dense_labels): + # Class 8: 1 label, no predictions. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=0.0 / 1, class_id=8) def test_three_labels_at_k5(self): predictions = [ [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9], [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6]] - labels = [ + sparse_labels = _binary_2d_label_to_sparse_value([ [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], - [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]] - sp_labels = _binary_2d_label_to_sparse_value(labels) + [0, 1, 1, 0, 0, 1, 0, 0, 0, 0]]) + dense_labels = np.array([[2, 7, 8], [1, 2, 5]], dtype=np.int64) - # Class 2: 2 labels, both correct. - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=5, expected=2.0 / 2, - class_id=2) + for labels in (sparse_labels, dense_labels): + # Class 2: 2 labels, both correct. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=2.0 / 2, class_id=2) - # Class 5: 1 label, incorrect. - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=5, expected=1.0 / 1, - class_id=5) + # Class 5: 1 label, incorrect. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=1.0 / 1, class_id=5) - # Class 7: 1 label, incorrect. - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=5, expected=0.0 / 1, - class_id=7) + # Class 7: 1 label, incorrect. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=0.0 / 1, class_id=7) - # All classes: 6 labels, 3 correct. - self._test_streaming_sparse_recall_at_k( - predictions=predictions, labels=sp_labels, k=5, expected=3.0 / 6) + # All classes: 6 labels, 3 correct. + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=3.0 / 6) - def test_3d_no_labels(self): + def test_3d_nan(self): predictions = [[ [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9], [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6] @@ -2305,19 +2330,26 @@ class StreamingSparseRecallTest(tf.test.TestCase): [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6], [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9] ]] - labels = [[ + sparse_labels = _binary_3d_label_to_sparse_value([[ [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], [0, 1, 1, 0, 0, 1, 0, 0, 0, 0] ], [ [0, 1, 1, 0, 0, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 1, 1, 0] - ]] - sp_labels = _binary_3d_label_to_sparse_value(labels) + ]]) + dense_labels = np.array([[ + [2, 7, 8], + [1, 2, 5] + ], [ + [1, 2, 5], + [2, 7, 8], + ]], dtype=np.int64) - # Classes 0,3,4,6,9 have 0 labels, class 10 is out of range. - for class_id in [0, 3, 4, 6, 9, 10]: - self._test_streaming_sparse_recall_at_k( - predictions, sp_labels, k=5, expected=NAN, class_id=class_id) + for labels in (sparse_labels, dense_labels): + # Classes 0,3,4,6,9 have 0 labels, class 10 is out of range. + for class_id in (0, 3, 4, 6, 9, 10): + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=NAN, class_id=class_id) def test_3d_no_predictions(self): predictions = [[ @@ -2327,19 +2359,26 @@ class StreamingSparseRecallTest(tf.test.TestCase): [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6], [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9] ]] - labels = [[ + sparse_labels = _binary_3d_label_to_sparse_value([[ [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], [0, 1, 1, 0, 0, 1, 0, 0, 0, 0] ], [ [0, 1, 1, 0, 0, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 1, 1, 0] - ]] - sp_labels = _binary_3d_label_to_sparse_value(labels) + ]]) + dense_labels = np.array([[ + [2, 7, 8], + [1, 2, 5] + ], [ + [1, 2, 5], + [2, 7, 8], + ]], dtype=np.int64) - # Classes 1,8 have 0 predictions, >=1 label. - for class_id in [1, 8]: - self._test_streaming_sparse_recall_at_k( - predictions, sp_labels, k=5, expected=0.0, class_id=class_id) + for labels in (sparse_labels, dense_labels): + # Classes 1,8 have 0 predictions, >=1 label. + for class_id in (1, 8): + self._test_streaming_sparse_recall_at_k( + predictions, labels, k=5, expected=0.0, class_id=class_id) def test_3d(self): predictions = [[ @@ -2349,30 +2388,29 @@ class StreamingSparseRecallTest(tf.test.TestCase): [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6], [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9] ]] - labels = [[ + labels = _binary_3d_label_to_sparse_value([[ [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], [0, 1, 1, 0, 0, 1, 0, 0, 0, 0] ], [ [0, 1, 1, 0, 0, 1, 0, 1, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 1, 0] - ]] - sp_labels = _binary_3d_label_to_sparse_value(labels) + ]]) # Class 2: 4 labels, all correct. self._test_streaming_sparse_recall_at_k( - predictions, sp_labels, k=5, expected=4.0 / 4, class_id=2) + predictions, labels, k=5, expected=4.0 / 4, class_id=2) # Class 5: 2 labels, both correct. self._test_streaming_sparse_recall_at_k( - predictions, sp_labels, k=5, expected=2.0 / 2, class_id=5) + predictions, labels, k=5, expected=2.0 / 2, class_id=5) # Class 7: 2 labels, 1 incorrect. self._test_streaming_sparse_recall_at_k( - predictions, sp_labels, k=5, expected=1.0 / 2, class_id=7) + predictions, labels, k=5, expected=1.0 / 2, class_id=7) # All classes: 12 labels, 7 correct. self._test_streaming_sparse_recall_at_k( - predictions, sp_labels, k=5, expected=7.0 / 12) + predictions, labels, k=5, expected=7.0 / 12) def test_3d_ignore_all(self): predictions = [[ @@ -2382,27 +2420,26 @@ class StreamingSparseRecallTest(tf.test.TestCase): [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6], [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9] ]] - labels = [[ + labels = _binary_3d_label_to_sparse_value([[ [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], [0, 1, 1, 0, 0, 1, 0, 0, 0, 0] ], [ [0, 1, 1, 0, 0, 1, 0, 1, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 1, 0] - ]] - sp_labels = _binary_3d_label_to_sparse_value(labels) + ]]) for class_id in xrange(10): self._test_streaming_sparse_recall_at_k( - predictions, sp_labels, k=5, expected=NAN, class_id=class_id, + predictions, labels, k=5, expected=NAN, class_id=class_id, weights=[[0], [0]]) self._test_streaming_sparse_recall_at_k( - predictions, sp_labels, k=5, expected=NAN, class_id=class_id, + predictions, labels, k=5, expected=NAN, class_id=class_id, weights=[[0, 0], [0, 0]]) self._test_streaming_sparse_recall_at_k( - predictions, sp_labels, k=5, expected=NAN, - ignore_mask=[[False], [True]], weights=[[0], [1]]) + predictions, labels, k=5, expected=NAN, ignore_mask=[[False], [True]], + weights=[[0], [1]]) self._test_streaming_sparse_recall_at_k( - predictions, sp_labels, k=5, expected=NAN, weights=[[0, 0], [0, 0]]) + predictions, labels, k=5, expected=NAN, weights=[[0, 0], [0, 0]]) def test_3d_ignore_some(self): predictions = [[ @@ -2412,43 +2449,42 @@ class StreamingSparseRecallTest(tf.test.TestCase): [0.3, 0.0, 0.7, 0.2, 0.4, 0.9, 0.5, 0.8, 0.1, 0.6], [0.5, 0.1, 0.6, 0.3, 0.8, 0.0, 0.7, 0.2, 0.4, 0.9] ]] - labels = [[ + labels = _binary_3d_label_to_sparse_value([[ [0, 0, 1, 0, 0, 0, 0, 1, 1, 0], [0, 1, 1, 0, 0, 1, 0, 0, 0, 0] ], [ [0, 1, 1, 0, 0, 1, 0, 1, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 1, 0] - ]] - sp_labels = _binary_3d_label_to_sparse_value(labels) + ]]) # Class 2: 2 labels, both correct. self._test_streaming_sparse_recall_at_k( - predictions, sp_labels, k=5, expected=2.0 / 2.0, class_id=2, + predictions, labels, k=5, expected=2.0 / 2.0, class_id=2, ignore_mask=[[False], [False]], weights=[[1], [0]]) # Class 2: 2 labels, both correct. self._test_streaming_sparse_recall_at_k( - predictions, sp_labels, k=5, expected=2.0 / 2.0, class_id=2, + predictions, labels, k=5, expected=2.0 / 2.0, class_id=2, ignore_mask=[[False], [False]], weights=[[0], [1]]) # Class 7: 1 label, correct. self._test_streaming_sparse_recall_at_k( - predictions, sp_labels, k=5, expected=1.0 / 1.0, class_id=7, + predictions, labels, k=5, expected=1.0 / 1.0, class_id=7, ignore_mask=[[True], [False]], weights=[[1], [1]]) # Class 7: 1 label, incorrect. self._test_streaming_sparse_recall_at_k( - predictions, sp_labels, k=5, expected=0.0 / 1.0, class_id=7, + predictions, labels, k=5, expected=0.0 / 1.0, class_id=7, ignore_mask=[[False], [True]], weights=[[1], [1]]) # Class 7: 2 labels, 1 correct. self._test_streaming_sparse_recall_at_k( - predictions, sp_labels, k=5, expected=1.0 / 2.0, class_id=7, + predictions, labels, k=5, expected=1.0 / 2.0, class_id=7, weights=[[1, 0], [1, 0]]) # Class 7: No labels. self._test_streaming_sparse_recall_at_k( - predictions, sp_labels, k=5, expected=NAN, class_id=7, + predictions, labels, k=5, expected=NAN, class_id=7, weights=[[0, 1], [0, 1]]) def test_sparse_tensor_value(self): @@ -3678,6 +3714,22 @@ class StreamingConcatTest(tf.test.TestCase): with self.assertRaises(ValueError): metrics.streaming_concat(tf.placeholder(tf.float32, [None, None])) + def testStreamingConcatReset(self): + with self.test_session() as sess: + values = tf.placeholder(tf.int32, [None]) + concatenated, update_op = metrics.streaming_concat(values) + sess.run(tf.initialize_local_variables()) + + self.assertAllEqual([], concatenated.eval()) + + sess.run([update_op], feed_dict={values: [0, 1, 2]}) + self.assertAllEqual([0, 1, 2], concatenated.eval()) + + sess.run(tf.initialize_local_variables()) + + sess.run([update_op], feed_dict={values: [3, 4]}) + self.assertAllEqual([3, 4], concatenated.eval()) + class AggregateMetricsTest(tf.test.TestCase): @@ -3928,7 +3980,8 @@ class ExpandAndTileTest(tf.test.TestCase): indices=[[0, i[0], i[1]] for i in x.indices], values=x.values, shape=[1, 3, 3]) self._assert_sparse_tensors_equal( - expected_result_dim0, metric_ops.expand_and_tile(x, multiple=1).eval()) + expected_result_dim0, + metric_ops.expand_and_tile(x, multiple=1).eval()) for dim in (-2, 0): self._assert_sparse_tensors_equal( expected_result_dim0, diff --git a/tensorflow/contrib/quantization/kernels/hexagon/BUILD b/tensorflow/contrib/quantization/kernels/hexagon/BUILD index b57a2ac1b59..df0dbf94ce3 100644 --- a/tensorflow/contrib/quantization/kernels/hexagon/BUILD +++ b/tensorflow/contrib/quantization/kernels/hexagon/BUILD @@ -11,6 +11,7 @@ licenses(["notice"]) # Apache 2.0 load( "//tensorflow:tensorflow.bzl", "tf_cc_test", + "tf_kernel_library", ) filegroup( @@ -43,3 +44,36 @@ tf_cc_test( "//tensorflow/core/kernels:ops_util", ], ) + +tf_cc_test( + name = "graph_transferer_test", + size = "small", + srcs = ["graph_transferer_test.cc"], + deps = [ + "//tensorflow/cc:cc_ops", + "//tensorflow/contrib/quantization/kernels/hexagon:graph_transferer", + "//tensorflow/core:core_cpu", + "//tensorflow/core:direct_session", + "//tensorflow/core:lib", + "//tensorflow/core:ops", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) + +tf_kernel_library( + name = "graph_transferer", + srcs = [ + "graph_transferer.cc", + ], + hdrs = [ + "graph_transferer.h", + ], + deps = [ + "//tensorflow/core", + "//tensorflow/core:framework", + "//third_party/eigen3", + ], +) diff --git a/tensorflow/contrib/quantization/kernels/hexagon/graph_transferer.cc b/tensorflow/contrib/quantization/kernels/hexagon/graph_transferer.cc new file mode 100644 index 00000000000..2bcb6ac652c --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/hexagon/graph_transferer.cc @@ -0,0 +1,23 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/quantization/kernels/hexagon/graph_transferer.h" + +namespace tensorflow { +void GraphTransferer::LoadGraphFromProto( + ::tensorflow::protobuf::MessageLite* proto) { + // TODO(satok): implement +} +} // namespace tensorflow diff --git a/tensorflow/contrib/quantization/kernels/hexagon/graph_transferer.h b/tensorflow/contrib/quantization/kernels/hexagon/graph_transferer.h new file mode 100644 index 00000000000..5d83283c1b9 --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/hexagon/graph_transferer.h @@ -0,0 +1,40 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +vcyou may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_HEXAGON_GRAPH_LOADER_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_HEXAGON_GRAPH_LOADER_H_ + +#include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/protobuf.h" + +namespace tensorflow { + +// GraphTransferer transfers graph definitions into SoC memory. +// This functionality is effective if SoC is capable to run +// the graph on that chip. +// TODO(satok): support transferring subgraphs to be able to split graphs +// to avoid unsupported ops in SoC. +class GraphTransferer { + public: + GraphTransferer() = default; + void LoadGraphFromProto(::tensorflow::protobuf::MessageLite* proto); + + private: + TF_DISALLOW_COPY_AND_ASSIGN(GraphTransferer); +}; + +} // namespace tensorflow + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_QUANTIZATION_KERNELS_HEXAGON_GRAPH_TRANSFERER_H diff --git a/tensorflow/contrib/quantization/kernels/hexagon/graph_transferer_test.cc b/tensorflow/contrib/quantization/kernels/hexagon/graph_transferer_test.cc new file mode 100644 index 00000000000..21d53816559 --- /dev/null +++ b/tensorflow/contrib/quantization/kernels/hexagon/graph_transferer_test.cc @@ -0,0 +1,57 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/quantization/kernels/hexagon/graph_transferer.h" +#include "tensorflow/cc/ops/const_op.h" +#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/core/graph/graph_def_builder.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/public/session.h" +#include "tensorflow/core/public/session_options.h" + +namespace tensorflow { + +class GraphTransfererTest : public ::testing::Test { + protected: + void SetUp() final { + SessionOptions session_options; + session_options.env = Env::Default(); + _session = std::unique_ptr(NewSession(session_options)); + } + + std::unique_ptr _session; +}; + +static GraphDef CreateSmallGraphDef() { + Scope root = Scope::NewRootScope(); + ops::Output node_a = ops::Const(root.WithOpName("a"), 1); + ops::Output node_b = ops::Const(root.WithOpName("b"), 2); + ops::Add(root.WithOpName("a_plus_b"), node_a, node_b); + + GraphDef def; + TF_CHECK_OK(root.ToGraphDef(&def)); + return def; +} + +TEST_F(GraphTransfererTest, LoadGraph) { + GraphDef def = CreateSmallGraphDef(); + _session->Create(def); + + GraphTransferer gt; + gt.LoadGraphFromProto(&def); +} + +} // namespace tensorflow diff --git a/tensorflow/contrib/quantization/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc b/tensorflow/contrib/quantization/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc index 3d139fbe0a0..f5b7f482e27 100644 --- a/tensorflow/contrib/quantization/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc +++ b/tensorflow/contrib/quantization/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc @@ -30,7 +30,7 @@ limitations under the License. #include "tensorflow/core/platform/test.h" #ifdef USE_HEXAGON_LIBS -#include "tensorflow/core/platform/hexagon/gemm_wrapper.h" +#include "tensorflow/core/platform/hexagon/soc_interface.h" #include "tensorflow/core/platform/profile_utils/cpu_utils.h" #endif @@ -42,9 +42,9 @@ class QuantizedMatMulOpForHexagonTest : public OpsTestBase { #ifdef USE_HEXAGON_LIBS profile_utils::CpuUtils::EnableClockCycleProfiling(true); LOG(INFO) << "Hexagon libs are linked (wrapper version = " - << hexagon_gemm_wrapper_GetWrapperVersion() + << soc_interface_GetWrapperVersion() << ", hexagon binary version = " - << hexagon_gemm_wrapper_GetHexagonBinaryVersion() << ")"; + << soc_interface_GetHexagonBinaryVersion() << ")"; LOG(INFO) << "Cpu frequency = " << profile_utils::CpuUtils::GetCycleCounterFrequency(); #else @@ -58,15 +58,14 @@ class QuantizedMatMulOpForHexagonTest : public OpsTestBase { TEST_F(QuantizedMatMulOpForHexagonTest, EvaluateSharedLibOverhead) { const uint64 overhead_shared_lib_start = profile_utils::CpuUtils::GetCurrentClockCycle(); - const int wrapper_version = hexagon_gemm_wrapper_GetWrapperVersion(); + const int wrapper_version = soc_interface_GetWrapperVersion(); const uint64 overhead_shared_lib_end = profile_utils::CpuUtils::GetCurrentClockCycle(); const uint64 overhead_shared_lib_diff = (overhead_shared_lib_end - overhead_shared_lib_start); const uint64 overhead_hexagon_rpc_start = profile_utils::CpuUtils::GetCurrentClockCycle(); - const int hexagon_binary_version = - hexagon_gemm_wrapper_GetHexagonBinaryVersion(); + const int hexagon_binary_version = soc_interface_GetHexagonBinaryVersion(); const uint64 overhead_hexagon_rpc_end = profile_utils::CpuUtils::GetCurrentClockCycle(); const uint64 overhead_hexagon_rpc_diff = diff --git a/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py b/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py index 5fd9653b50b..30134016845 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py @@ -359,7 +359,7 @@ class LSTMBlockCellTest(tf.test.TestCase): inp = tf.convert_to_tensor( np.random.randn(batch_size, input_size), dtype=tf.float32) inputs.append(inp) - seq_lengths = [3, 4, 5] + seq_lengths = tf.constant([3, 4, 5]) initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=19890213) with tf.variable_scope("basic", initializer=initializer): @@ -400,7 +400,7 @@ class LSTMBlockCellTest(tf.test.TestCase): outputs = [] state = None for i, inp in enumerate(inputs): - lengths = [int(i < l) for l in seq_lengths] + lengths = [int(i < l) for l in seq_lengths.eval()] output, state = cell( [inp], initial_state=state, diff --git a/tensorflow/contrib/rnn/python/ops/lstm_ops.py b/tensorflow/contrib/rnn/python/ops/lstm_ops.py index 329016e71e0..2ca5c039e27 100644 --- a/tensorflow/contrib/rnn/python/ops/lstm_ops.py +++ b/tensorflow/contrib/rnn/python/ops/lstm_ops.py @@ -532,7 +532,7 @@ class LSTMBlockWrapper(fused_rnn_cell.FusedRNNCell): dtype = initial_state[0].dtype # create the actual cell - if sequence_length: + if sequence_length is not None: sequence_length = ops.convert_to_tensor(sequence_length) initial_cell_state, initial_output = initial_state # pylint: disable=unpacking-non-sequence cell_states, outputs = self._call_cell(inputs, initial_cell_state, diff --git a/tensorflow/contrib/session_bundle/BUILD b/tensorflow/contrib/session_bundle/BUILD index fc0a02429d8..ce4f2c8780c 100644 --- a/tensorflow/contrib/session_bundle/BUILD +++ b/tensorflow/contrib/session_bundle/BUILD @@ -106,20 +106,12 @@ filegroup( cc_library( name = "session_bundle", - srcs = ["session_bundle.cc"], hdrs = ["session_bundle.h"], - copts = if_ios(["-DGOOGLE_LOGGING"]), visibility = ["//visibility:public"], deps = [ + ":session_bundle_lite", ":signature", - ] + if_not_mobile([ - ":manifest_proto_cc", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:protos_all_cc", - # mobile not supported yet - ]), + ], ) # This is a lite version of the session_bundle target that does not link in any @@ -139,6 +131,7 @@ cc_library( "//tensorflow/core:framework", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", + "//tensorflow/core/util/tensor_bundle:naming", # mobile not supported yet ]), ) @@ -198,21 +191,6 @@ py_test( ], ) -cc_library( - name = "signature", - srcs = ["signature.cc"], - hdrs = ["signature.h"], - visibility = ["//visibility:public"], - deps = if_not_mobile([ - ":manifest_proto_cc", - "//tensorflow/core:core_cpu", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:tensorflow_opensource", - # mobile not supported yet - ]), -) - # This is a lite version of the signature target that does not link in any # Tensorflow ops in order to minimize its size. Clients using this should # link any required ops manually. @@ -230,6 +208,17 @@ cc_library( ]), ) +cc_library( + name = "signature", + hdrs = ["signature.h"], + visibility = ["//visibility:public"], + deps = [ + ":signature_lite", + ] + if_not_mobile([ + "//tensorflow/core:tensorflow_opensource", + ]), +) + cc_test( name = "signature_test", size = "small", diff --git a/tensorflow/contrib/session_bundle/session_bundle.cc b/tensorflow/contrib/session_bundle/session_bundle.cc index 7e0242c9981..577370dc471 100644 --- a/tensorflow/contrib/session_bundle/session_bundle.cc +++ b/tensorflow/contrib/session_bundle/session_bundle.cc @@ -36,6 +36,7 @@ limitations under the License. #include "tensorflow/core/protobuf/meta_graph.pb.h" #include "tensorflow/core/protobuf/saver.pb.h" #include "tensorflow/core/public/session_options.h" +#include "tensorflow/core/util/tensor_bundle/naming.h" namespace tensorflow { namespace serving { @@ -85,7 +86,7 @@ void AddAssetsTensorsToInputs(const StringPiece export_dir, io::JoinPath(export_dir, kAssetsDirectory, asset.filename())); inputs->push_back( {asset.tensor_binding().tensor_name(), assets_file_tensor}); - } + } } // Historically, model exporter(exporter.py) takes only saver with @@ -104,7 +105,7 @@ void AddAssetsTensorsToInputs(const StringPiece export_dir, // distributed among the export.index and export.data-?????-of-????? files. string GetVariablesFilename(const StringPiece export_dir) { const char kVariablesFilename[] = "export"; - const char kVariablesIndexFilename[] = "export.index"; // V2 ckpts + const string kVariablesIndexFilename = MetaFilename("export"); // V2 ckpts const char kVariablesFilenamePattern[] = "export-\?\?\?\?\?-of-\?\?\?\?\?"; if (Env::Default()->FileExists( io::JoinPath(export_dir, kVariablesFilename)) || @@ -158,8 +159,7 @@ Status LoadSessionBundleFromPathUsingRunOptionsInternal( // Use serving graph_def in MetaGraphDef collection_def. if (graph_collection_def.any_list().value_size() != 1) { return errors::FailedPrecondition( - "Expected exactly one serving GraphDef in : ", - DebugStringIfAvailable(bundle->meta_graph_def)); + "Expected exactly one serving GraphDef in : ", export_dir); } const auto& any = graph_collection_def.any_list().value(0); GraphDef graph_def; @@ -194,9 +194,8 @@ Status LoadSessionBundleFromPathUsingRunOptionsInternal( const auto init_op_it = collection_def_map.find(kInitOpKey); if (init_op_it != collection_def_map.end()) { if (init_op_it->second.node_list().value_size() != 1) { - return errors::FailedPrecondition( - strings::StrCat("Expected exactly one serving init op in : ", - DebugStringIfAvailable(bundle->meta_graph_def))); + return errors::FailedPrecondition(strings::StrCat( + "Expected exactly one serving init op in : ", export_dir)); } TF_RETURN_IF_ERROR(RunInitOp(run_options, export_dir, asset_files, init_op_it->second.node_list().value(0), diff --git a/tensorflow/contrib/slim/python/slim/evaluation.py b/tensorflow/contrib/slim/python/slim/evaluation.py index 9f702bfdea0..5c28a265381 100644 --- a/tensorflow/contrib/slim/python/slim/evaluation.py +++ b/tensorflow/contrib/slim/python/slim/evaluation.py @@ -379,7 +379,8 @@ def evaluation_loop(master, variables_to_restore=None, eval_interval_secs=60, max_number_of_evaluations=None, - session_config=None): + session_config=None, + timeout=None): """Runs TF-Slim's Evaluation Loop. Args: @@ -406,6 +407,8 @@ def evaluation_loop(master, If the value is left as 'None', the evaluation continues indefinitely. session_config: An instance of `tf.ConfigProto` that will be used to configure the `Session`. If left as `None`, the default will be used. + timeout: The maximum amount of time to wait between checkpoints. If left as + `None`, then the process will wait indefinitely. Returns: The value of `final_op` or `None` if `final_op` is `None`. @@ -429,7 +432,8 @@ def evaluation_loop(master, number_of_evaluations = 0 for checkpoint_path in checkpoints_iterator(checkpoint_dir, - eval_interval_secs): + eval_interval_secs, + timeout): logging.info('Starting evaluation at ' + time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime())) @@ -457,7 +461,9 @@ def evaluation_loop(master, number_of_evaluations >= max_number_of_evaluations): logging.info('Reached max_number_of_evaluations=%s. Exit', max_number_of_evaluations) - break + return final_op_value + logging.info( + 'Timed-out waiting for new checkpoint file. Exiting evaluation loop.') return final_op_value diff --git a/tensorflow/contrib/slim/python/slim/evaluation_test.py b/tensorflow/contrib/slim/python/slim/evaluation_test.py index f78de7ad658..d72a0296ec8 100644 --- a/tensorflow/contrib/slim/python/slim/evaluation_test.py +++ b/tensorflow/contrib/slim/python/slim/evaluation_test.py @@ -255,6 +255,36 @@ class EvaluationTest(tf.test.TestCase): '/non-existent-dir', timeout=0)) self.assertEqual(ret, []) + def testEvaluationLoopTimeout(self): + _, update_op = slim.metrics.streaming_accuracy( + self._predictions, self._labels) + init_op = tf.group(tf.initialize_all_variables(), + tf.initialize_local_variables()) + + # Create checkpoint and log directories. + chkpt_dir = os.path.join(self.get_temp_dir(), 'tmp_logs/') + gfile.MakeDirs(chkpt_dir) + logdir = os.path.join(self.get_temp_dir(), 'tmp_logs2/') + gfile.MakeDirs(logdir) + + # Save initialized variables to checkpoint directory. + saver = tf.train.Saver() + with self.test_session() as sess: + init_op.run() + saver.save(sess, os.path.join(chkpt_dir, 'chkpt')) + + # Run the evaluation loop with a timeout. + with self.test_session() as sess: + start = time.time() + slim.evaluation.evaluation_loop( + '', chkpt_dir, logdir, eval_op=update_op, + eval_interval_secs=2.0, timeout=6.0) + end = time.time() + # Check we've waited for the timeout. + self.assertGreater(end - start, 6.0) + # Then the timeout kicked in and stops the loop. + self.assertLess(end - start, 7.5) + class SingleEvaluationTest(tf.test.TestCase): diff --git a/tensorflow/contrib/tensor_forest/core/ops/tree_predictions_op.cc b/tensorflow/contrib/tensor_forest/core/ops/tree_predictions_op.cc index 4a81d6d6c67..77d7f4290d0 100644 --- a/tensorflow/contrib/tensor_forest/core/ops/tree_predictions_op.cc +++ b/tensorflow/contrib/tensor_forest/core/ops/tree_predictions_op.cc @@ -185,10 +185,6 @@ class TreePredictions : public OpKernel { }; } else { num_data = static_cast(input_data.shape().dim_size(0)); - int32 num_features = 0; - if (num_data > 0) { - num_features = input_data.NumElements() / num_data; - } decide_function = [&input_data]( int32 i, int32 feature, float bias, DataColumnTypes type) { const auto input_matrix = input_data.matrix(); diff --git a/tensorflow/contrib/training/__init__.py b/tensorflow/contrib/training/__init__.py index fc0e324bcf7..1290854260c 100644 --- a/tensorflow/contrib/training/__init__.py +++ b/tensorflow/contrib/training/__init__.py @@ -30,10 +30,12 @@ like to store state in the forward direction across segments of an example. ## Online data resampling To resample data with replacement on a per-example basis, use -['resample_at_rate'](#resample_at_rate), providing the desired rate -for each example. If you wish to specify relative rates, rather than -absolute ones, use ['weighted_resample'](#weighted_resample) (which -also returns the actual resampling rate used for each output example). +['rejection_sample'](#rejection_sample) or +['resample_at_rate'](#resample_at_rate). For `rejection_sample`, provide +a boolean Tensor describing whether to accept or reject. For `resample_at_rate`, +providing the desired rate for each example. If you wish to specify relative +rates, rather than absolute ones, use ['weighted_resample'](#weighted_resample) +(which also returns the actual resampling rate used for each output example). Use ['stratified_sample'](#stratified_sample) or ['stratified_sample_unknown_dist'](#stratified_sample_unknown_dist) to @@ -43,6 +45,7 @@ have a binary classification dataset that is 99.9% class 1, a common approach is to resample from the data so that the data is more balanced. +@@rejection_sample @@resample_at_rate @@stratified_sample @@stratified_sample_unknown_dist diff --git a/tensorflow/contrib/training/python/training/sampling_ops.py b/tensorflow/contrib/training/python/training/sampling_ops.py index c703e22e24d..05f5ec6b39c 100644 --- a/tensorflow/contrib/training/python/training/sampling_ops.py +++ b/tensorflow/contrib/training/python/training/sampling_ops.py @@ -27,14 +27,96 @@ from tensorflow.python.ops import data_flow_ops from tensorflow.python.ops import logging_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops +from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.training import input as input_ops from tensorflow.python.training import queue_runner -__all__ = ['stratified_sample', + +__all__ = ['rejection_sample', + 'stratified_sample', 'stratified_sample_unknown_dist',] +def rejection_sample(tensors, accept_prob_fn, batch_size, queue_threads=1, + enqueue_many=False, prebatch_capacity=16, + prebatch_threads=1, runtime_checks=False, name=None): + """Stochastically creates batches by rejection sampling. + + Each list of non-batched tensors is evaluated by `accept_prob_fn`, to produce + a scalar tensor between 0 and 1. This tensor corresponds to the probability of + being accepted. When `batch_size` tensor groups have been accepted, the batch + queue will return a mini-batch. + + Args: + tensors: List of tensors for data. All tensors are either one item or a + batch, according to enqueue_many. + accept_prob_fn: A python lambda that takes a non-batch tensor from each + item in `tensors`, and produces a scalar tensor. + batch_size: Size of batch to be returned. + queue_threads: The number of threads for the queue that will hold the final + batch. + enqueue_many: Bool. If true, interpret input tensors as having a batch + dimension. + prebatch_capacity: Capacity for the large queue that is used to convert + batched tensors to single examples. + prebatch_threads: Number of threads for the large queue that is used to + convert batched tensors to single examples. + runtime_checks: Bool. If true, insert runtime checks on the output of + `accept_prob_fn`. Using `True` might have a performance impact. + name: Optional prefix for ops created by this function. + Raises: + ValueError: enqueue_many is True and labels doesn't have a batch + dimension, or if enqueue_many is False and labels isn't a scalar. + ValueError: enqueue_many is True, and batch dimension on data and labels + don't match. + ValueError: if a zero initial probability class has a nonzero target + probability. + Returns: + A list of tensors of the same length as `tensors`, with batch dimension + `batch_size`. + + Example: + # Get tensor for a single data and label example. + data, label = data_provider.Get(['data', 'label']) + + # Get stratified batch according to data tensor. + accept_prob_fn = lambda x: (tf.tanh(x[0]) + 1) / 2 + data_batch = tf.contrib.training.rejection_sample( + [data, label], accept_prob_fn, 16) + + # Run batch through network. + ... + """ + with variable_scope.variable_scope(name, 'rejection_sample', tensors): + tensor_list = ops.convert_n_to_tensor_or_indexed_slices(tensors) + # Reduce the case of a batched example to that of a batch of a single + # example by taking a batch of size one. + if enqueue_many: + # Validate that batch dimension of the input is consistent. + tensor_list = _verify_data_inputs(tensor_list) + + # Make a single queue to hold input examples. Reshape output so examples + # don't have singleton batch dimension. + batched = input_ops.batch(tensor_list, + batch_size=1, + num_threads=prebatch_threads, + capacity=prebatch_capacity, + enqueue_many=True) + tensor_list = [array_ops.squeeze(x, [0]) for x in batched] + + # Set up a queue containing batches that have the distribution. + cur_prob = accept_prob_fn(tensor_list) + if runtime_checks: + cur_prob = array_ops.identity(control_flow_ops.with_dependencies( + [check_ops.assert_less_equal(0.0, cur_prob), + check_ops.assert_less_equal(cur_prob, 1.0)], + cur_prob), name='prob_with_checks') + keep_input = random_ops.random_uniform([]) < cur_prob + return _conditional_batch( + tensor_list, keep_input, batch_size, num_threads=queue_threads) + + def stratified_sample(tensors, labels, target_probs, batch_size, init_probs=None, enqueue_many=False, queue_capacity=16, threads_per_queue=1, name=None): @@ -145,8 +227,12 @@ def stratified_sample(tensors, labels, target_probs, batch_size, # Set up second queue containing batches that have the desired class # proportions. cur_prob = array_ops.gather(accept_probs, label) + keep_input = random_ops.random_uniform([]) < cur_prob batched = _conditional_batch( - val_list + [label], cur_prob, batch_size, threads_per_queue) + val_list + [label], + keep_input, + batch_size, + num_threads=threads_per_queue) return batched[:-1], batched[-1] @@ -260,6 +346,18 @@ def _estimate_data_distribution(labels, num_classes, smoothing_constant=10): return math_ops.cast(init_prob_estimate, dtypes.float32) +def _verify_data_inputs(tensor_list): + """Verify that batched data inputs are well-formed.""" + for tensor in tensor_list: + # Data tensor should have a batch dimension. + tensor_shape = tensor.get_shape().with_rank_at_least(1) + + # Data batch dimensions must be compatible. + tensor_shape[0].assert_is_compatible_with(tensor_list[0].get_shape()[0]) + + return tensor_list + + def _verify_input(tensor_list, labels, probs_list): """Verify that batched inputs are well-formed.""" checked_probs_list = [] @@ -374,16 +472,16 @@ def _calculate_acceptance_probabilities(init_probs, target_probs): return ratio_l / max_ratio -def _conditional_batch(tensors, accept_prob, batch_size, queue_threads=10): +def _conditional_batch(tensors, keep_input, batch_size, num_threads=10): """Conditionally enqueue tensors based on accept_prob. Specifically, enqueue the element if accept_prob > rand_unif([0, 1]). Args: tensors: List of tensors to enqueue. - accept_prob: Acceptance probability per example. + keep_input: Bool. Whether to enqueue or not. batch_size: Size of batch. - queue_threads: Number of threads enqueuing in the final queue. + num_threads: Number of enqueueing threads. Returns: List of batched tensors. @@ -391,7 +489,7 @@ def _conditional_batch(tensors, accept_prob, batch_size, queue_threads=10): Raises: ValueError: `accept_prob` isn't 0D. """ - accept_prob.get_shape().assert_has_rank(0) + keep_input.get_shape().assert_has_rank(0) # Determine shapes and types of to-be-enqueued-tensors. shapes_list = [] dtypes_list = [] @@ -409,13 +507,12 @@ def _conditional_batch(tensors, accept_prob, batch_size, queue_threads=10): # Conditionally enqueue. # Reshape enqueue op to match no_op's shape. - eq_tf = math_ops.less(random_ops.random_uniform([]), accept_prob) conditional_enqueue = control_flow_ops.cond( - eq_tf, + keep_input, lambda: final_q.enqueue(tensors), control_flow_ops.no_op) queue_runner.add_queue_runner(queue_runner.QueueRunner( - final_q, [conditional_enqueue] * queue_threads)) + final_q, [conditional_enqueue] * num_threads)) out_tensor = final_q.dequeue_many(batch_size) # Queues return a single tensor if the list of enqued tensors is one. Since we diff --git a/tensorflow/contrib/training/python/training/sampling_ops_test.py b/tensorflow/contrib/training/python/training/sampling_ops_test.py index 2d663d7954f..bbc0a284cd1 100644 --- a/tensorflow/contrib/training/python/training/sampling_ops_test.py +++ b/tensorflow/contrib/training/python/training/sampling_ops_test.py @@ -24,7 +24,7 @@ from tensorflow.contrib.training.python.training import sampling_ops from tensorflow.python.platform import tf_logging as logging -class SamplingOpsTest(tf.test.TestCase): +class StratifiedSampleTest(tf.test.TestCase): def testGraphBuildAssertionFailures(self): val = [tf.zeros([1, 3]), tf.ones([1, 5])] @@ -383,17 +383,79 @@ class SamplingOpsTest(tf.test.TestCase): self.normalBehaviorHelper(curried_sampler) + +class RejectionSampleTest(tf.test.TestCase): + + def testGraphConstructionFailures(self): + accept_prob_fn = lambda _: tf.constant(1.0) + batch_size = 32 + # Data must have batch dimension if `enqueue_many` is `True`. + with self.assertRaises(ValueError): + tf.contrib.training.rejection_sample( + [tf.zeros([])], accept_prob_fn, batch_size, enqueue_many=True) + + # Batch dimensions should be equal if `enqueue_many` is `True`. + with self.assertRaises(ValueError): + tf.contrib.training.rejection_sample( + [tf.zeros([5, 1]), tf.zeros([4, 1])], accept_prob_fn, batch_size, + enqueue_many=True) + + def testRuntimeFailures(self): + prob_ph = tf.placeholder(tf.float32, []) + accept_prob_fn = lambda _: prob_ph + batch_size = 32 + + # Set up graph. + tf.set_random_seed(1234) + tf.contrib.training.rejection_sample( + [tf.zeros([])], accept_prob_fn, batch_size, runtime_checks=True, + name='rejection_sample') + prob_tensor = tf.get_default_graph().get_tensor_by_name( + 'rejection_sample/prob_with_checks:0') + + # Run session that should fail. + with self.test_session() as sess: + for illegal_prob in [-0.1, 1.1]: + with self.assertRaises(tf.errors.InvalidArgumentError): + sess.run(prob_tensor, feed_dict={prob_ph: illegal_prob}) + + def testNormalBehavior(self): + tensor_list = [tf.cond( + tf.greater(.5, tf.random_uniform([])), + lambda: tf.constant(1.0), + lambda: tf.constant(2.0))] + accept_prob_fn = lambda x: x[0] - 1.0 + batch_size = 10 + + # Set up graph. + sample = tf.contrib.training.rejection_sample( + tensor_list, accept_prob_fn, batch_size) + + with self.test_session() as sess: + coord = tf.train.Coordinator() + threads = tf.train.start_queue_runners(coord=coord) + + for _ in range(5): + sample_np = sess.run(sample)[0] + self.assertListEqual([2.0] * batch_size, list(sample_np)) + + coord.request_stop() + coord.join(threads) + + +class ConditionalBatchTest(tf.test.TestCase): + def testConditionallyEnqueueAndBatch(self): tf.set_random_seed(1234) tensor = tf.cond( tf.greater(.5, tf.random_uniform([])), lambda: tf.constant(1.0), lambda: tf.constant(2.0)) - accept_prob = tensor - 1 + keep_input = tf.equal(tensor, 2.0) batch_size = 4 # Set up the test graph. - [batch] = sampling_ops._conditional_batch([tensor], accept_prob, batch_size) # pylint: disable=protected-access + [batch] = sampling_ops._conditional_batch([tensor], keep_input, batch_size) # pylint: disable=protected-access # Check conditional operation. with self.test_session(): @@ -411,13 +473,13 @@ class SamplingOpsTest(tf.test.TestCase): def testConditionallyEnqueueAndBatchTypes(self): tensor = tf.constant(1.0) - accept_prob = tensor - 1 + keep_input = tf.constant(True) batch_size = 4 # Check that output types are the same for 1 and 2-length input lists. - output1 = sampling_ops._conditional_batch([tensor], accept_prob, batch_size) # pylint: disable=protected-access + output1 = sampling_ops._conditional_batch([tensor], keep_input, batch_size) # pylint: disable=protected-access output2 = sampling_ops._conditional_batch( # pylint: disable=protected-access - [tensor, tensor], accept_prob, batch_size) + [tensor, tensor], keep_input, batch_size) self.assertEqual(type(output1), type(output2)) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index afae52dd3d3..f32cd0e6fc8 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -712,6 +712,7 @@ cc_library( name = "android_tensorflow_lib_lite", srcs = if_android(["//tensorflow/core:android_srcs"]), copts = tf_copts() + ["-Os"], + linkopts = ["-lz"], tags = [ "manual", "notap", @@ -769,7 +770,6 @@ cc_library( name = "android_tensorflow_lib", srcs = if_android([":android_op_registrations_and_gradients"]), copts = tf_copts(), - linkopts = ["-lz"], tags = [ "manual", "notap", @@ -1093,12 +1093,12 @@ tf_cuda_library( ], ) + select({ "//tensorflow:windows": [], - "//conditions:default": glob([ + "//conditions:default": [ "util/memmapped_file_system.h", "util/memmapped_file_system.cc", "util/memmapped_file_system_writer.h", "util/memmapped_file_system_writer.cc", - ]), + ], }), hdrs = [ "framework/op_segment.h", @@ -1107,6 +1107,7 @@ tf_cuda_library( "framework/tracking_allocator.h", # only needed for tests "framework/unique_tensor_references.h", "util/command_line_flags.h", + "util/env_var.h", "util/presized_cuckoo_map.h", "util/tensor_slice_set.h", "util/tensor_slice_util.h", diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index 848a71c4746..59fa09bd8db 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -435,9 +435,19 @@ Status DirectSession::Run(const RunOptions& run_options, } const bool do_trace = (run_options.trace_level() > RunOptions::NO_TRACE); - const int64 build_cost_model = - options_.config.graph_options().build_cost_model(); - if (do_trace || build_cost_model > 0) { + + bool update_cost_model = false; + if (options_.config.graph_options().build_cost_model() > 0) { + const int64 build_cost_model_every = + options_.config.graph_options().build_cost_model(); + const int64 build_cost_model_after = + options_.config.graph_options().build_cost_model_after(); + update_cost_model = + ((executors_and_keys->step_count + 1 - build_cost_model_after) % + build_cost_model_every == + 0); + } + if (do_trace || update_cost_model) { run_state.collector.reset( new StepStatsCollector(run_metadata->mutable_step_stats())); args.stats_collector = run_state.collector.get(); @@ -479,7 +489,7 @@ Status DirectSession::Run(const RunOptions& run_options, // Build and return the cost model as instructed. mutex_lock l(executor_lock_); ++executors_and_keys->step_count; - if (executors_and_keys->step_count == build_cost_model) { + if (update_cost_model) { // Build the cost model std::unordered_map device_to_graph; for (const PerPartitionExecutorsAndLib& partition : diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc index ccc962927b9..a6445fd0403 100644 --- a/tensorflow/core/common_runtime/executor.cc +++ b/tensorflow/core/common_runtime/executor.cc @@ -646,7 +646,7 @@ class ExecutorState { int64 iteration_count GUARDED_BY(mu) = 0; // The number of outstanding iterations. - int num_outstanding_iterations GUARDED_BY(mu); + int num_outstanding_iterations GUARDED_BY(mu) = 1; // The active iteration states of this frame. gtl::InlinedVector iterations; @@ -1193,8 +1193,8 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_usec) { (first_input + i)->ClearVal(); } FrameState* input_frame = state->tagged_node.input_frame; - int64 input_iter = state->tagged_node.input_iter; - int id = state->tagged_node.node->id(); + const int64 input_iter = state->tagged_node.input_iter; + const int id = state->tagged_node.node->id(); MaybeMarkCompleted(input_frame, input_iter, id); TaggedNodeSeq ready; if (s.ok()) { @@ -1554,120 +1554,6 @@ void ExecutorState::PropagateOutputs(const TaggedNode& tagged_node, } } -void ExecutorState::FrameState::ActivateNodes(const Node* node, - const bool is_dead, int64 iter, - const EntryVector& outputs, - TaggedNodeSeq* ready) { - const NodeItem* nodes = executor->nodes_; - IterationState* iter_state = GetIteration(iter); - for (const Edge* e : node->out_edges()) { - const Node* dst_node = e->dst(); - const int dst_id = dst_node->id(); - const int src_slot = e->src_output(); - - bool dst_dead = false; - bool dst_ready = false; - // True iff this input for dst is needed. We only set this input for - // dst if this flag is true. This is needed to make the thread safety - // analysis happy. - bool dst_need_input = !e->IsControlEdge(); - if (IsMerge(dst_node)) { - // A merge node is ready if all control inputs have arrived and either - // a) a live data input becomes available or b) all data inputs are dead. - // For Merge, pending's LSB is set iff a live data input has arrived. - if (e->IsControlEdge()) { - iter_state->decrement_pending(dst_id, 2); - int count = iter_state->pending(dst_id); - dst_dead = (iter_state->dead_count(dst_id) == dst_node->num_inputs()); - dst_ready = (count == 0) || ((count == 1) && dst_dead); - } else { - if (outputs[src_slot].has_value) { - // This is a live data input. - int count = iter_state->pending(dst_id); - iter_state->mark_live(dst_id); - // Only the first live edge sets the input and (potentially) - // triggers execution. The low bit of count is set if and - // only if no live input has been used yet (mark_live clears - // it). The node should be started if and only if this is - // the first live input and there are no pending control - // edges, i.e. count == 1. - dst_ready = (count == 1); - dst_need_input = ((count & 0x1) == 1); - } else { - // This is a dead data input. Note that dst_node is dead if node is - // a dead enter. We need this to handle properly a while loop on - // the untaken branch of a conditional. - // TODO(yuanbyu): This is a bit hacky, but a good solution for now. - iter_state->increment_dead_count(dst_id); - const int dead_cnt = iter_state->dead_count(dst_id); - dst_dead = (dead_cnt == dst_node->num_inputs()) || IsEnter(node); - dst_ready = (iter_state->pending(dst_id) == 1) && dst_dead; - dst_need_input = false; - } - } - } else { - // A non-merge node is ready if all its inputs are ready. We wait - // for all inputs to come in even if we know the node is dead. This - // ensures that all input tensors get cleaned up. - if (is_dead || (!e->IsControlEdge() && !outputs[src_slot].has_value)) { - iter_state->increment_dead_count(dst_id); - } - dst_dead = iter_state->dead_count(dst_id) > 0; - dst_ready = (iter_state->decrement_pending(dst_id, 1) == 0); - } - - if (dst_need_input) { - const NodeItem& dst_item = nodes[dst_id]; - const int dst_slot = e->dst_input(); - Entry* input_tensors = iter_state->input_tensors; - int dst_loc = dst_item.input_start + dst_slot; - input_tensors[dst_loc] = outputs[src_slot]; - } - - // Add dst to the ready queue if it's ready - if (dst_ready) { - dst_dead = dst_dead && !IsControlTrigger(dst_node); - ready->push_back(TaggedNode(dst_node, this, iter, dst_dead)); - iter_state->outstanding_ops++; - } - } -} - -void ExecutorState::FrameState::ActivateNexts(int64 iter, - TaggedNodeSeq* ready) { - // Propagate the deferred NextIteration nodes to the new iteration. - for (auto& node_entry : next_iter_roots) { - const Node* node = node_entry.first; - const Entry& entry = node_entry.second; - const bool is_dead = !entry.has_value; - ActivateNodes(node, is_dead, iter, {entry}, ready); - } - next_iter_roots.clear(); -} - -void ExecutorState::FrameState::ActivateLoopInvs(int64 iter, - TaggedNodeSeq* ready) { - // Propagate loop invariants to the new iteration. - for (auto& node_entry : inv_values) { - const Node* node = node_entry.first; - const Entry& entry = node_entry.second; - const bool is_dead = !entry.has_value; - ActivateNodes(node, is_dead, iter, {entry}, ready); - } -} - -void ExecutorState::FrameState::AddLoopInv(const Node* node, const Entry& entry, - TaggedNodeSeq* ready) { - // Store this value. - inv_values.push_back({node, entry}); - - // Make this value available to all iterations. - bool is_dead = !entry.has_value; - for (int i = 0; i <= iteration_count; ++i) { - ActivateNodes(node, is_dead, i, {entry}, ready); - } -} - bool ExecutorState::NodeDone(const Status& s, const Node* node, const TaggedNodeSeq& ready, NodeExecStats* stats, TaggedNodeReadyQueue* inline_ready) { @@ -1905,57 +1791,6 @@ void ExecutorState::Finish() { runner([=]() { done_cb(status); }); } -bool ExecutorState::FrameState::IsIterationDone(int64 iter) { - IterationState* iter_state = GetIteration(iter); - if (iter_state->outstanding_ops == 0 && - iter_state->outstanding_frame_count == 0) { - if (iter == 0) { - // The enclosing frame has no pending input. - return num_pending_inputs == 0; - } else { - // The preceding iteration is deleted (and therefore done). - return (GetIteration(iter - 1) == nullptr); - } - } - return false; -} - -void ExecutorState::FrameState::IncrementIteration(TaggedNodeSeq* ready) { - iteration_count++; - int64 next_iter = iteration_count; - - // Initialize the next iteration. - IterationState* iter_state = new IterationState(executor); - SetIteration(next_iter, iter_state); - num_outstanding_iterations++; - dead_exits.clear(); - - // Activate the successors of the deferred roots in the new iteration. - ActivateNexts(next_iter, ready); - - // Activate the loop invariants in the new iteration. - ActivateLoopInvs(next_iter, ready); -} - -bool ExecutorState::FrameState::CleanupIterations(int64 iter, - TaggedNodeSeq* ready) { - int64 curr_iter = iter; - while (curr_iter <= iteration_count && IsIterationDone(curr_iter)) { - // Delete the iteration curr_iter. - delete GetIteration(curr_iter); - SetIteration(curr_iter, nullptr); - --num_outstanding_iterations; - ++curr_iter; - - // When one iteration is completed, we check for deferred iteration, - // and start it if there is one. - if (!next_iter_roots.empty()) { - IncrementIteration(ready); - } - } - return IsFrameDone(); -} - void ExecutorState::FindOrCreateChildFrame(FrameState* frame, int64 iter, const Node* node, FrameState** child) { @@ -2002,14 +1837,15 @@ void ExecutorState::FindOrCreateChildFrame(FrameState* frame, int64 iter, auto it = outstanding_frames_.find(child_name); if (it != outstanding_frames_.end()) { *child = it->second; - delete temp; // Not used so delete it. } else { mutex_lock frame_lock(frame->mu); frame->GetIteration(iter)->outstanding_frame_count++; outstanding_frames_[child_name] = temp; *child = temp; + temp = nullptr; } } + delete temp; // Not used so delete it. } void ExecutorState::DeleteFrame(FrameState* frame, TaggedNodeSeq* ready) { @@ -2084,6 +1920,171 @@ void ExecutorState::CleanupFramesIterations(FrameState* frame, int64 iter, } } +void ExecutorState::FrameState::ActivateNodes(const Node* node, + const bool is_dead, int64 iter, + const EntryVector& outputs, + TaggedNodeSeq* ready) { + const NodeItem* nodes = executor->nodes_; + IterationState* iter_state = GetIteration(iter); + for (const Edge* e : node->out_edges()) { + const Node* dst_node = e->dst(); + const int dst_id = dst_node->id(); + const int src_slot = e->src_output(); + + bool dst_dead = false; + bool dst_ready = false; + // True iff this input for dst is needed. We only set this input for + // dst if this flag is true. This is needed to make the thread safety + // analysis happy. + bool dst_need_input = !e->IsControlEdge(); + if (IsMerge(dst_node)) { + // A merge node is ready if all control inputs have arrived and either + // a) a live data input becomes available or b) all data inputs are dead. + // For Merge, pending's LSB is set iff a live data input has arrived. + if (e->IsControlEdge()) { + iter_state->decrement_pending(dst_id, 2); + int count = iter_state->pending(dst_id); + dst_dead = (iter_state->dead_count(dst_id) == dst_node->num_inputs()); + dst_ready = (count == 0) || ((count == 1) && dst_dead); + } else { + if (outputs[src_slot].has_value) { + // This is a live data input. + int count = iter_state->pending(dst_id); + iter_state->mark_live(dst_id); + // Only the first live edge sets the input and (potentially) + // triggers execution. The low bit of count is set if and + // only if no live input has been used yet (mark_live clears + // it). The node should be started if and only if this is + // the first live input and there are no pending control + // edges, i.e. count == 1. + dst_ready = (count == 1); + dst_need_input = ((count & 0x1) == 1); + } else { + // This is a dead data input. Note that dst_node is dead if node is + // a dead enter. We need this to handle properly a while loop on + // the untaken branch of a conditional. + // TODO(yuanbyu): This is a bit hacky, but a good solution for now. + iter_state->increment_dead_count(dst_id); + const int dead_cnt = iter_state->dead_count(dst_id); + dst_dead = (dead_cnt == dst_node->num_inputs()) || IsEnter(node); + dst_ready = (iter_state->pending(dst_id) == 1) && dst_dead; + dst_need_input = false; + } + } + } else { + // A non-merge node is ready if all its inputs are ready. We wait + // for all inputs to come in even if we know the node is dead. This + // ensures that all input tensors get cleaned up. + if (is_dead || (!e->IsControlEdge() && !outputs[src_slot].has_value)) { + iter_state->increment_dead_count(dst_id); + } + dst_dead = iter_state->dead_count(dst_id) > 0; + dst_ready = (iter_state->decrement_pending(dst_id, 1) == 0); + } + + if (dst_need_input) { + const NodeItem& dst_item = nodes[dst_id]; + const int dst_slot = e->dst_input(); + Entry* input_tensors = iter_state->input_tensors; + int dst_loc = dst_item.input_start + dst_slot; + input_tensors[dst_loc] = outputs[src_slot]; + } + + // Add dst to the ready queue if it's ready + if (dst_ready) { + dst_dead = dst_dead && !IsControlTrigger(dst_node); + ready->push_back(TaggedNode(dst_node, this, iter, dst_dead)); + iter_state->outstanding_ops++; + } + } +} + +void ExecutorState::FrameState::ActivateNexts(int64 iter, + TaggedNodeSeq* ready) { + // Propagate the deferred NextIteration nodes to the new iteration. + for (auto& node_entry : next_iter_roots) { + const Node* node = node_entry.first; + const Entry& entry = node_entry.second; + const bool is_dead = !entry.has_value; + ActivateNodes(node, is_dead, iter, {entry}, ready); + } + next_iter_roots.clear(); +} + +void ExecutorState::FrameState::ActivateLoopInvs(int64 iter, + TaggedNodeSeq* ready) { + // Propagate loop invariants to the new iteration. + for (auto& node_entry : inv_values) { + const Node* node = node_entry.first; + const Entry& entry = node_entry.second; + const bool is_dead = !entry.has_value; + ActivateNodes(node, is_dead, iter, {entry}, ready); + } +} + +void ExecutorState::FrameState::AddLoopInv(const Node* node, const Entry& entry, + TaggedNodeSeq* ready) { + // Store this value. + inv_values.push_back({node, entry}); + + // Make this value available to all iterations. + bool is_dead = !entry.has_value; + for (int i = 0; i <= iteration_count; ++i) { + ActivateNodes(node, is_dead, i, {entry}, ready); + } +} + +bool ExecutorState::FrameState::IsIterationDone(int64 iter) { + IterationState* iter_state = GetIteration(iter); + if (iter_state->outstanding_ops == 0 && + iter_state->outstanding_frame_count == 0) { + if (iter == 0) { + // The enclosing frame has no pending input. + return num_pending_inputs == 0; + } else { + // The preceding iteration is deleted (and therefore done). + return (GetIteration(iter - 1) == nullptr); + } + } + return false; +} + +void ExecutorState::FrameState::IncrementIteration(TaggedNodeSeq* ready) { + iteration_count++; + int64 next_iter = iteration_count; + + // Initialize the next iteration. + IterationState* iter_state = new IterationState(executor); + SetIteration(next_iter, iter_state); + num_outstanding_iterations++; + dead_exits.clear(); + + // Activate the successors of the deferred roots in the new iteration. + ActivateNexts(next_iter, ready); + + // Activate the loop invariants in the new iteration. + ActivateLoopInvs(next_iter, ready); +} + +bool ExecutorState::FrameState::CleanupIterations(int64 iter, + TaggedNodeSeq* ready) { + int64 curr_iter = iter; + while (curr_iter <= iteration_count && IsIterationDone(curr_iter)) { + // Delete the iteration curr_iter. + delete GetIteration(curr_iter); + SetIteration(curr_iter, nullptr); + --num_outstanding_iterations; + ++curr_iter; + + // When one iteration is completed, we check for deferred iteration, + // and start it if there is one. + if (!next_iter_roots.empty()) { + IncrementIteration(ready); + } + } + return IsFrameDone(); +} + void ExecutorImpl::RunAsync(const Args& args, DoneCallback done) { (new ExecutorState(args, this))->RunAsync(done); } diff --git a/tensorflow/core/common_runtime/gpu/process_state.cc b/tensorflow/core/common_runtime/gpu/process_state.cc index 60da115988e..77dabdf4d16 100644 --- a/tensorflow/core/common_runtime/gpu/process_state.cc +++ b/tensorflow/core/common_runtime/gpu/process_state.cc @@ -29,6 +29,7 @@ limitations under the License. #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/env_var.h" // If these flags need to be runtime configurable, consider adding // options to ConfigProto. @@ -203,8 +204,17 @@ Allocator* ProcessState::GetCUDAHostAllocator(int numa_node) { Allocator* allocator = nullptr; static constexpr bool kCudaHostMemoryUseBFC = true; if (kCudaHostMemoryUseBFC) { + // TODO(zheng-xq): evaluate whether 64GB by default is the best choice. + int64 cuda_host_mem_limit_in_mb = -1; + Status status = ReadInt64FromEnvVar("TF_CUDA_HOST_MEM_LIMIT_IN_MB", + 1LL << 16 /*64GB max by default*/, + &cuda_host_mem_limit_in_mb); + if (!status.ok()) { + LOG(ERROR) << "GetCUDAHostAllocator: " << status.error_message(); + } + int64 cuda_host_mem_limit = cuda_host_mem_limit_in_mb * (1LL << 20); allocator = - new BFCAllocator(new CUDAHostAllocator(se), 1LL << 36 /*64GB max*/, + new BFCAllocator(new CUDAHostAllocator(se), cuda_host_mem_limit, true /*allow_growth*/, "cuda_host_bfc" /*name*/); } else { allocator = new PoolAllocator( diff --git a/tensorflow/core/common_runtime/kernel_benchmark_testlib.cc b/tensorflow/core/common_runtime/kernel_benchmark_testlib.cc index c568896de7f..eef32e799eb 100644 --- a/tensorflow/core/common_runtime/kernel_benchmark_testlib.cc +++ b/tensorflow/core/common_runtime/kernel_benchmark_testlib.cc @@ -18,6 +18,7 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/common_runtime/local_device.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/op_segment.h" @@ -46,6 +47,9 @@ Benchmark::Benchmark(const string& device, Graph* g, testing::StopTiming(); string t = str_util::Uppercase(device); + // Allow NewDevice to allocate a new threadpool with different number of + // threads for each new benchmark. + LocalDevice::set_use_global_threadpool(false); device_ = DeviceFactory::NewDevice(t, *options, "/job:localhost/replica:0/task:0"); CHECK(device_) << "Could not create a " << device << " device"; diff --git a/tensorflow/core/common_runtime/local_device.cc b/tensorflow/core/common_runtime/local_device.cc index bbd04e2dbbd..e55456c039a 100644 --- a/tensorflow/core/common_runtime/local_device.cc +++ b/tensorflow/core/common_runtime/local_device.cc @@ -26,42 +26,60 @@ limitations under the License. namespace tensorflow { -namespace { +/* static */ +bool LocalDevice::use_global_threadpool_ = true; -DeviceBase::CpuWorkerThreads eigen_worker_threads; -Eigen::ThreadPoolInterface* eigen_thread_pool = nullptr; -Eigen::ThreadPoolDevice* eigen_device = nullptr; - -static bool InitModule(const SessionOptions& options) { - int32 intra_op_parallelism_threads = - options.config.intra_op_parallelism_threads(); - if (intra_op_parallelism_threads == 0) { - intra_op_parallelism_threads = port::NumSchedulableCPUs(); +struct LocalDevice::EigenThreadPoolInfo { + EigenThreadPoolInfo(const SessionOptions& options) { + int32 intra_op_parallelism_threads = + options.config.intra_op_parallelism_threads(); + if (intra_op_parallelism_threads == 0) { + intra_op_parallelism_threads = port::NumSchedulableCPUs(); + } + VLOG(1) << "Local device intra op parallelism threads: " + << intra_op_parallelism_threads; + eigen_worker_threads_.num_threads = intra_op_parallelism_threads; + eigen_worker_threads_.workers = new thread::ThreadPool( + options.env, "Eigen", intra_op_parallelism_threads); + eigen_threadpool_wrapper_.reset( + new EigenThreadPoolWrapper(eigen_worker_threads_.workers)); + eigen_device_.reset(new Eigen::ThreadPoolDevice( + eigen_threadpool_wrapper_.get(), eigen_worker_threads_.num_threads)); } - VLOG(1) << "Local device intra op parallelism threads: " - << intra_op_parallelism_threads; - eigen_worker_threads.num_threads = intra_op_parallelism_threads; - eigen_worker_threads.workers = new thread::ThreadPool( - options.env, "Eigen", intra_op_parallelism_threads); - eigen_thread_pool = new EigenThreadPoolWrapper(eigen_worker_threads.workers); - eigen_device = new Eigen::ThreadPoolDevice(eigen_thread_pool, - eigen_worker_threads.num_threads); - return true; -} -} // end namespace -// LocalDevice ---------------------------------------------------------------- + ~EigenThreadPoolInfo() { + eigen_threadpool_wrapper_.reset(); + eigen_device_.reset(); + delete eigen_worker_threads_.workers; + } + + DeviceBase::CpuWorkerThreads eigen_worker_threads_; + std::unique_ptr eigen_threadpool_wrapper_; + std::unique_ptr eigen_device_; +}; LocalDevice::LocalDevice(const SessionOptions& options, const DeviceAttributes& attributes, Allocator* device_allocator) - : Device(options.env, attributes, device_allocator) { - // All ThreadPoolDevices in the process will use this single fixed - // sized threadpool for numerical computations. - static bool init = InitModule(options); - CHECK(init); // Avoids compiler warning that init is unused. - set_tensorflow_cpu_worker_threads(&eigen_worker_threads); - set_eigen_cpu_device(eigen_device); + : Device(options.env, attributes, device_allocator), + owned_tp_info_(nullptr) { + LocalDevice::EigenThreadPoolInfo* tp_info; + if (use_global_threadpool_) { + // All ThreadPoolDevices in the process will use this single fixed + // sized threadpool for numerical computations. + static LocalDevice::EigenThreadPoolInfo* global_tp_info = + new LocalDevice::EigenThreadPoolInfo(options); + tp_info = global_tp_info; + } else { + // Each LocalDevice owns a separate ThreadPoolDevice for numerical + // computations. + owned_tp_info_.reset(new LocalDevice::EigenThreadPoolInfo(options)); + tp_info = owned_tp_info_.get(); + } + set_tensorflow_cpu_worker_threads(&tp_info->eigen_worker_threads_); + set_eigen_cpu_device(tp_info->eigen_device_.get()); } +LocalDevice::~LocalDevice() {} + } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/local_device.h b/tensorflow/core/common_runtime/local_device.h index 147cd10ea9c..d1c27c62481 100644 --- a/tensorflow/core/common_runtime/local_device.h +++ b/tensorflow/core/common_runtime/local_device.h @@ -22,6 +22,9 @@ limitations under the License. namespace tensorflow { +namespace test { +class Benchmark; +} struct SessionOptions; // This class is shared by ThreadPoolDevice and GPUDevice and @@ -32,9 +35,20 @@ class LocalDevice : public Device { public: LocalDevice(const SessionOptions& options, const DeviceAttributes& attributes, Allocator* device_allocator); - ~LocalDevice() override {} + ~LocalDevice() override; private: + static bool use_global_threadpool_; + + static void set_use_global_threadpool(bool use_global_threadpool) { + use_global_threadpool_ = use_global_threadpool; + } + + struct EigenThreadPoolInfo; + std::unique_ptr owned_tp_info_; + + friend class test::Benchmark; + TF_DISALLOW_COPY_AND_ASSIGN(LocalDevice); }; diff --git a/tensorflow/core/common_runtime/shape_refiner.cc b/tensorflow/core/common_runtime/shape_refiner.cc index c11f7c2c4b9..4752be41ff1 100644 --- a/tensorflow/core/common_runtime/shape_refiner.cc +++ b/tensorflow/core/common_runtime/shape_refiner.cc @@ -72,7 +72,6 @@ Status ShapeRefiner::AddNode(const Node* node) { // Create the inference context for this node with the existing input shapes. std::unique_ptr c( new shape_inference::InferenceContext(&node->def(), node->op_def(), - {} /* input_shapes_string */, input_shapes, input_tensors)); if (!c->construction_status().ok()) { return c->construction_status(); diff --git a/tensorflow/core/distributed_runtime/graph_mgr.cc b/tensorflow/core/distributed_runtime/graph_mgr.cc index c603c3c9dc3..74279e53309 100644 --- a/tensorflow/core/distributed_runtime/graph_mgr.cc +++ b/tensorflow/core/distributed_runtime/graph_mgr.cc @@ -264,22 +264,6 @@ Status GraphMgr::DeregisterAll() { return Status::OK(); } -Status GraphMgr::Execute(const string& handle, const int64 step_id, - const ExecutorOpts& opts, - StepStatsCollector* collector, - CancellationManager* cancellation_manager, - const NamedTensors& in, NamedTensors* out) { - Notification n; - Status status; - ExecuteAsync(handle, step_id, opts, collector, cancellation_manager, in, out, - [&n, &status](const Status& s) { - status = s; - n.Notify(); - }); - n.WaitForNotification(); - return status; -} - void GraphMgr::ExecuteAsync(const string& handle, const int64 step_id, const ExecutorOpts& opts, StepStatsCollector* collector, diff --git a/tensorflow/core/distributed_runtime/graph_mgr.h b/tensorflow/core/distributed_runtime/graph_mgr.h index 9e9c97215e7..87499995ab5 100644 --- a/tensorflow/core/distributed_runtime/graph_mgr.h +++ b/tensorflow/core/distributed_runtime/graph_mgr.h @@ -77,13 +77,6 @@ class GraphMgr { const NamedTensors& in, NamedTensors* out, StatusCallback done); - // Synchronous wrapper. - Status Execute(const string& handle, const int64 step_id, - const ExecutorOpts& opts, - StepStatsCollector* step_stats_collector, - CancellationManager* cancellation_manager, - const NamedTensors& in, NamedTensors* out); - // Deregisters a graph. Status Deregister(const string& handle); diff --git a/tensorflow/core/framework/common_shape_fns.cc b/tensorflow/core/framework/common_shape_fns.cc index c1249474110..6183e9fe26b 100644 --- a/tensorflow/core/framework/common_shape_fns.cc +++ b/tensorflow/core/framework/common_shape_fns.cc @@ -457,7 +457,7 @@ Status MaxPoolShape(shape_inference::InferenceContext* c) { TF_RETURN_IF_ERROR(c->GetAttr("strides", &strides)); if (strides.size() != 4) { return errors::InvalidArgument( - "AvgPool requires the stride attribute to contain 4 values, but " + "MaxPool requires the stride attribute to contain 4 values, but " "got: ", strides.size()); } @@ -466,7 +466,7 @@ Status MaxPoolShape(shape_inference::InferenceContext* c) { TF_RETURN_IF_ERROR(c->GetAttr("ksize", &kernel_sizes)); if (kernel_sizes.size() != 4) { return errors::InvalidArgument( - "AvgPool requires the ksize attribute to contain 4 values, but got: ", + "MaxPool requires the ksize attribute to contain 4 values, but got: ", kernel_sizes.size()); } diff --git a/tensorflow/core/framework/common_shape_fns_test.cc b/tensorflow/core/framework/common_shape_fns_test.cc index 68b1e6cbc1d..a4efc04467c 100644 --- a/tensorflow/core/framework/common_shape_fns_test.cc +++ b/tensorflow/core/framework/common_shape_fns_test.cc @@ -24,6 +24,24 @@ limitations under the License. namespace tensorflow { namespace shape_inference { +namespace { + +TensorShapeProto S(std::initializer_list dims) { + PartialTensorShape shape(dims); + TensorShapeProto ret; + shape.AsProto(&ret); + return ret; +} + +TensorShapeProto Unknown() { + PartialTensorShape shape; + TensorShapeProto ret; + shape.AsProto(&ret); + return ret; +} + +} // namespace + TEST(CommonShapeFnsTest, NoOutputShapeTest) { OpRegistrationData op_reg_data; TF_CHECK_OK(OpDefBuilder("Assert") @@ -38,7 +56,7 @@ TEST(CommonShapeFnsTest, NoOutputShapeTest) { .Input({{"data", 0, DT_FLOAT}}) .Finalize(&def)); - InferenceContext c(&def, op_def, {"[]", "[10]"}, {}); + InferenceContext c(&def, op_def, {S({}), S({10})}, {}); TF_EXPECT_OK(NoOutputs(&c)); EXPECT_EQ(0, c.num_outputs()); } @@ -56,14 +74,14 @@ TEST(CommonShapeFnsTest, ScalarShapeTest) { NodeDefBuilder("test", "L2Loss").Input("t", 0, DT_FLOAT).Finalize(&def)); { - InferenceContext c(&def, op_def, {"[]"}, {}); + InferenceContext c(&def, op_def, {S({})}, {}); TF_EXPECT_OK(ScalarShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ(0, c.Rank(output)); } { - InferenceContext c(&def, op_def, {"[1,23,4,4,2]"}, {}); + InferenceContext c(&def, op_def, {S({1, 23, 4, 4, 2})}, {}); TF_EXPECT_OK(ScalarShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ(0, c.Rank(output)); @@ -90,7 +108,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) { .Finalize(&def)); { - InferenceContext c(&def, op_def, {"[2,3]", "[3,4]"}, {}); + InferenceContext c(&def, op_def, {S({2, 3}), S({3, 4})}, {}); TF_EXPECT_OK(MatMulShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ(2, c.Value(c.Dim(output, 0))); @@ -99,7 +117,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) { { // Unknown inner dimension for one - InferenceContext c(&def, op_def, {"[2,?]", "[3,4]"}, {}); + InferenceContext c(&def, op_def, {S({2, -1}), S({3, 4})}, {}); TF_EXPECT_OK(MatMulShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ(2, c.Value(c.Dim(output, 0))); @@ -108,7 +126,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) { { // Invalid rank. - InferenceContext c(&def, op_def, {"[2]", "[3,4]"}, {}); + InferenceContext c(&def, op_def, {S({2}), S({3, 4})}, {}); auto s = MatMulShape(&c); EXPECT_FALSE(s.ok()); EXPECT_TRUE( @@ -118,7 +136,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) { { // Unknown outer dimension - InferenceContext c(&def, op_def, {"[2,3]", "[3,?]"}, {}); + InferenceContext c(&def, op_def, {S({2, 3}), S({3, -1})}, {}); TF_EXPECT_OK(MatMulShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ(2, c.Value(c.Dim(output, 0))); @@ -127,7 +145,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) { { // Inner shapes not compatible - InferenceContext c(&def, op_def, {"[2,5]", "[3,4]"}, {}); + InferenceContext c(&def, op_def, {S({2, 5}), S({3, 4})}, {}); auto s = MatMulShape(&c); EXPECT_FALSE(s.ok()); EXPECT_TRUE( @@ -138,7 +156,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) { { // Inner shapes not compatible - InferenceContext c(&def, op_def, {"[2,5,3]", "[3,5,4]"}, {}); + InferenceContext c(&def, op_def, {S({2, 5, 3}), S({3, 5, 4})}, {}); auto s = MatMulShape(&c); EXPECT_FALSE(s.ok()); EXPECT_TRUE( @@ -156,7 +174,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) { .Attr("type", DT_FLOAT) .Finalize(&def)); - InferenceContext c(&def, op_def, {"[3,2]", "[3,4]"}, {}); + InferenceContext c(&def, op_def, {S({3, 2}), S({3, 4})}, {}); auto s = MatMulShape(&c); ShapeHandle output = c.output(0); EXPECT_EQ(2, c.Value(c.Dim(output, 0))); @@ -173,7 +191,7 @@ TEST(CommonShapeFnsTest, MatMulShapeTest) { .Attr("type", DT_FLOAT) .Finalize(&def)); - InferenceContext c(&def, op_def, {"[2,3]", "[4,3]"}, {}); + InferenceContext c(&def, op_def, {S({2, 3}), S({4, 3})}, {}); auto s = MatMulShape(&c); ShapeHandle output = c.output(0); EXPECT_EQ(2, c.Value(c.Dim(output, 0))); @@ -197,7 +215,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) { .Finalize(&def)); { - InferenceContext c(&def, op_def, {"[2,10]", "[10]"}, {}); + InferenceContext c(&def, op_def, {S({2, 10}), S({10})}, {}); TF_EXPECT_OK(BiasAddShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ(2, c.Value(c.Dim(output, 0))); @@ -206,7 +224,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) { { // Unknown ranks. - InferenceContext c(&def, op_def, {"?", "?"}, {}); + InferenceContext c(&def, op_def, {Unknown(), Unknown()}, {}); TF_EXPECT_OK(BiasAddShape(&c)); ShapeHandle output = c.output(0); EXPECT_FALSE(c.RankKnown(output)); @@ -214,7 +232,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) { { // Rank > 2 - InferenceContext c(&def, op_def, {"[4,3,4,2,15]", "[15]"}, {}); + InferenceContext c(&def, op_def, {S({4, 3, 4, 2, 15}), S({15})}, {}); TF_EXPECT_OK(BiasAddShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ("[4,3,4,2,15]", c.DebugString(output)); @@ -227,7 +245,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) { .Input("b", 0, DT_FLOAT) .Attr("data_format", "NCHW") .Finalize(&def)); - InferenceContext c(&def, op_def, {"[2,3,4,5]", "[3]"}, {}); + InferenceContext c(&def, op_def, {S({2, 3, 4, 5}), S({3})}, {}); TF_EXPECT_OK(BiasAddShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ("[2,3,4,5]", c.DebugString(output)); @@ -240,7 +258,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) { .Input("b", 0, DT_FLOAT) .Attr("data_format", "NCHW") .Finalize(&def)); - InferenceContext c(&def, op_def, {"[8,6,4,2,3,4,5]", "[3]"}, {}); + InferenceContext c(&def, op_def, {S({8, 6, 4, 2, 3, 4, 5}), S({3})}, {}); TF_EXPECT_OK(BiasAddShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ("[8,6,4,2,3,4,5]", c.DebugString(output)); @@ -253,7 +271,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) { .Input("b", 0, DT_FLOAT) .Attr("data_format", "NCHW") .Finalize(&def)); - InferenceContext c(&def, op_def, {"[10,11,12]", "[10]"}, {}); + InferenceContext c(&def, op_def, {S({10, 11, 12}), S({10})}, {}); TF_EXPECT_OK(BiasAddShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ("[10,11,12]", c.DebugString(output)); @@ -261,7 +279,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) { { // Input rank not high enough - InferenceContext c(&def, op_def, {"[3]", "[3]"}, {}); + InferenceContext c(&def, op_def, {S({3}), S({3})}, {}); EXPECT_FALSE(BiasAddShape(&c).ok()); } @@ -273,7 +291,7 @@ TEST(CommonShapeFnsTest, BiasAddShapeTest) { .Attr("data_format", "NCHW") .Finalize(&def)); // NCHW format - InferenceContext c(&def, op_def, {"[2,3]", "[3]"}, {}); + InferenceContext c(&def, op_def, {S({2, 3}), S({3})}, {}); EXPECT_FALSE(BiasAddShape(&c).ok()); } } @@ -292,7 +310,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) { .Finalize(&def)); { - InferenceContext c(&def, op_def, {"[2,10]"}, {}); + InferenceContext c(&def, op_def, {S({2, 10})}, {}); TF_EXPECT_OK(BiasAddGradShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ(10, c.Value(c.Dim(output, 0))); @@ -300,7 +318,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) { { // Rank > 2 - InferenceContext c(&def, op_def, {"[5,7,2,10]"}, {}); + InferenceContext c(&def, op_def, {S({5, 7, 2, 10})}, {}); TF_EXPECT_OK(BiasAddGradShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ(10, c.Value(c.Dim(output, 0))); @@ -312,7 +330,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) { .Input("a", 0, DT_FLOAT) .Attr("data_format", "NCHW") .Finalize(&def)); - InferenceContext c(&def, op_def, {"[2,3,4,5]"}, {}); + InferenceContext c(&def, op_def, {S({2, 3, 4, 5})}, {}); TF_EXPECT_OK(BiasAddGradShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ(3, c.Value(c.Dim(output, 0))); @@ -324,7 +342,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) { .Input("a", 0, DT_FLOAT) .Attr("data_format", "NCHW") .Finalize(&def)); - InferenceContext c(&def, op_def, {"[8,6,4,2,3,4,5]"}, {}); + InferenceContext c(&def, op_def, {S({8, 6, 4, 2, 3, 4, 5})}, {}); TF_EXPECT_OK(BiasAddGradShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ(3, c.Value(c.Dim(output, 0))); @@ -336,7 +354,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) { .Input("a", 0, DT_FLOAT) .Attr("data_format", "NCHW") .Finalize(&def)); - InferenceContext c(&def, op_def, {"[10,11,12]"}, {}); + InferenceContext c(&def, op_def, {S({10, 11, 12})}, {}); TF_EXPECT_OK(BiasAddGradShape(&c)); ShapeHandle output = c.output(0); EXPECT_EQ(10, c.Value(c.Dim(output, 0))); @@ -344,7 +362,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) { { // Input rank not high enough - InferenceContext c(&def, op_def, {"[3]"}, {}); + InferenceContext c(&def, op_def, {S({3})}, {}); EXPECT_FALSE(BiasAddGradShape(&c).ok()); } @@ -355,7 +373,7 @@ TEST(CommonShapeFnsTest, BiasAddGradShapeTest) { .Attr("data_format", "NCHW") .Finalize(&def)); // NCHW format - InferenceContext c(&def, op_def, {"[2,3]"}, {}); + InferenceContext c(&def, op_def, {S({2, 3})}, {}); EXPECT_FALSE(BiasAddGradShape(&c).ok()); } } diff --git a/tensorflow/core/framework/shape_inference.cc b/tensorflow/core/framework/shape_inference.cc index 8dca86f5abb..77a433ddcb5 100644 --- a/tensorflow/core/framework/shape_inference.cc +++ b/tensorflow/core/framework/shape_inference.cc @@ -29,26 +29,6 @@ constexpr int64 InferenceContext::kUnknownDim; InferenceContext::InferenceContext( const NodeDef* node_def, const OpDef& op_def, - const std::vector& input_shapes, - const std::vector& input_tensors) - : node_def_(*CHECK_NOTNULL(node_def)) { - PreInputInit(op_def, input_tensors); - - for (const string& spec : input_shapes) { - ShapeHandle shape; - construction_status_.Update(MakeShapeFromString(spec, &shape)); - if (!construction_status_.ok()) { - return; - } - inputs_.push_back(shape); - } - - PostInputInit(); -} - -InferenceContext::InferenceContext( - const NodeDef* node_def, const OpDef& op_def, - const std::vector& input_shapes_string, const std::vector& input_shapes, const std::vector& input_tensors) : node_def_(*CHECK_NOTNULL(node_def)) { @@ -67,7 +47,6 @@ InferenceContext::InferenceContext( InferenceContext::InferenceContext( const NodeDef* node_def, const OpDef& op_def, - const std::vector& input_shapes_string, const std::vector& input_shapes, const std::vector& input_tensors) : node_def_(*CHECK_NOTNULL(node_def)) { @@ -78,8 +57,6 @@ InferenceContext::InferenceContext( } InferenceContext::~InferenceContext() { - for (auto* s : all_shapes_) delete s; - for (auto* d : all_dims_) delete d; } Status InferenceContext::set_output(StringPiece output_name, @@ -209,11 +186,9 @@ Status InferenceContext::WithRank(ShapeHandle shape, int32 rank, std::vector dims; dims.reserve(rank); for (int i = 0; i < rank; ++i) { - all_dims_.push_back(new Dimension()); - dims.push_back(all_dims_.back()); + dims.push_back(UnknownDim()); } - all_shapes_.push_back(new Shape(dims)); - *out = all_shapes_.back(); + *out = shape_manager_.MakeShape(dims); return Status::OK(); } *out = nullptr; @@ -260,8 +235,7 @@ Status InferenceContext::WithValue(DimensionHandle dim, int64 value, return Status::OK(); } if (existing == kUnknownDim) { - all_dims_.push_back(new Dimension(value)); - *out = all_dims_.back(); + *out = MakeDim(value); return Status::OK(); } *out = nullptr; @@ -454,8 +428,7 @@ Status InferenceContext::ReplaceDim(ShapeHandle s, int dim_index_in, ShapeHandle InferenceContext::MakeShape( const std::vector& dims) { - all_shapes_.push_back(new Shape(dims)); - return all_shapes_.back(); + return shape_manager_.MakeShape(dims); } ShapeHandle InferenceContext::MakeShape( @@ -465,12 +438,12 @@ ShapeHandle InferenceContext::MakeShape( for (const DimensionOrConstant& d : dims) { dims_actual.push_back(MakeDim(d)); } - return MakeShape(dims_actual); + + return shape_manager_.MakeShape(dims_actual); } ShapeHandle InferenceContext::UnknownShape() { - all_shapes_.push_back(new Shape()); - return all_shapes_.back(); + return shape_manager_.UnknownShape(); } ShapeHandle InferenceContext::UnknownShapeOfRank(int32 rank) { @@ -718,43 +691,6 @@ Status InferenceContext::Max(DimensionHandle first, DimensionOrConstant second, return Status::OK(); } -Status InferenceContext::MakeShapeFromString(const string& spec, - ShapeHandle* output) { - if (spec == "?") { - *output = UnknownShape(); - return Status::OK(); - } - - std::vector dims; - strings::Scanner scanner(spec); - scanner.OneLiteral("["); - while (scanner.Peek() != ']') { - if (scanner.Peek() == '?') { - scanner.OneLiteral("?"); - dims.push_back(UnknownDim()); - } else { - scanner.RestartCapture().Many(strings::Scanner::DIGIT); - StringPiece match; - int64 dim_size = 0; - CHECK(scanner.GetResult(nullptr, &match) && - strings::safe_strto64(match, &dim_size)) - << spec; - dims.push_back(MakeDim(dim_size)); - } - - if (scanner.Peek() == ',') { - scanner.OneLiteral(","); - } else if (scanner.Peek() != ']') { - return errors::InvalidArgument( - "Invalid input spec (] not found in dim shape): ", spec); - } - } - CHECK(scanner.OneLiteral("]").Eos().GetResult()); - *output = MakeShape(dims); - - return Status::OK(); -} - Status InferenceContext::AttachContext(const Status& status) { std::vector input_shapes; for (const ShapeHandle& input_shape : inputs_) { @@ -768,5 +704,25 @@ Status InferenceContext::AttachContext(const Status& status) { strings::StrCat(status.error_message(), error_context)); } +// ----------------------------------------------------------------------------- +// ShapeManager +// ----------------------------------------------------------------------------- +InferenceContext::ShapeManager::ShapeManager() {} +InferenceContext::ShapeManager::~ShapeManager() { + for (auto* s : all_shapes_) delete s; + for (auto* d : all_dims_) delete d; +} + +ShapeHandle InferenceContext::ShapeManager::MakeShape( + const std::vector& dims) { + all_shapes_.push_back(new Shape(dims)); + return all_shapes_.back(); +} + +ShapeHandle InferenceContext::ShapeManager::UnknownShape() { + all_shapes_.push_back(new Shape()); + return all_shapes_.back(); +} + } // namespace shape_inference } // namespace tensorflow diff --git a/tensorflow/core/framework/shape_inference.h b/tensorflow/core/framework/shape_inference.h index 7ed4a85b5dd..ccb0dc91d0e 100644 --- a/tensorflow/core/framework/shape_inference.h +++ b/tensorflow/core/framework/shape_inference.h @@ -41,6 +41,7 @@ class Dimension { const int64 value_; friend class InferenceContext; + friend class ShapeManager; TF_DISALLOW_COPY_AND_ASSIGN(Dimension); }; @@ -61,6 +62,7 @@ class DimensionHandle { friend class InferenceContext; friend class ShapeInferenceTest; friend class ShapeInferenceTestutil; + friend class ShapeManager; // Intentionally copyable. }; @@ -76,6 +78,7 @@ class Shape { const std::vector dims_; friend class InferenceContext; + friend class ShapeManager; TF_DISALLOW_COPY_AND_ASSIGN(Shape); }; @@ -95,6 +98,7 @@ class ShapeHandle { friend class InferenceContext; friend class ShapeInferenceTest; friend class ShapeInferenceTestutil; + friend class ShapeManager; // Intentionally copyable. }; @@ -134,43 +138,17 @@ class InferenceContext { // is NULL-padded to be the same size as . // // REQUIRES: is not NULL, and must outlive the InferenceContext. - // - // TODO(vrv): Remove 'input_shapes_string' once we can move the - // creation of Shapes from strings out of this class (or hide it). InferenceContext(const NodeDef* node_def, const OpDef& op_def, - const std::vector& input_shapes_string, const std::vector& input_shapes, const std::vector& input_tensors); // is NULL-padded to be the same size as . // // REQUIRES: is not NULL, and must outlive the InferenceContext. - // - // TODO(cwhipkey): Remove 'input_shapes_string' once we can move the creation - // of Shapes from strings out of this class (or hide it). InferenceContext(const NodeDef* node_def, const OpDef& op_def, - const std::vector& input_shapes_string, const std::vector& input_shapes, const std::vector& input_tensors); - // This is a temporary constructor used for initial testing. - // - // TODO(cwhipkey): remove this temporary constructor. - // - // Each input shape describes the input shape as follows: - // * "?" : the shape's rank and dimensions are unknown - // * "[1,?,3]" : the shape's rank is known, and dimensions can be known or - // unknown (? for unknown #1 - multiple dimensions can be - // labeled with the same unknown number, and are deduplicated to - // the same Dimension*. - // - // is NULL-padded to be the same size as . - // - // REQUIRES: is not NULL, and must outlive the InferenceContext. - InferenceContext(const NodeDef* node_def, const OpDef& op_def, - const std::vector& input_shapes, - const std::vector& input_tensors); - ~InferenceContext(); // Runs the shape inference function 'fn' with 'this' as the @@ -340,13 +318,9 @@ class InferenceContext { // Returns a new dimension of the given size. The returned value is owned by // this context. inline DimensionHandle MakeDim(DimensionOrConstant d) { - if (d.dim.IsSet()) { - return d.dim; - } else { - all_dims_.push_back(new Dimension(d.val)); - return all_dims_.back(); - } + return shape_manager_.MakeDim(d); } + inline DimensionHandle UnknownDim() { return MakeDim(kUnknownDim); } // Returns a new dimension whose value is given by a scalar input tensor. @@ -436,15 +410,43 @@ class InferenceContext { } private: + // Creates and stores shapes for use in InferenceContext. + class ShapeManager { + public: + ShapeManager(); + ~ShapeManager(); + + // Returns a new shape with the given dims. The returned value is owned by + // this class. + ShapeHandle MakeShape(const std::vector& dims); + + // Returns a new unknown shape. + ShapeHandle UnknownShape(); + + // Returns a new dimension of the given size. The returned value + // is owned by this class. + inline DimensionHandle MakeDim(DimensionOrConstant d) { + if (d.dim.IsSet()) { + return d.dim; + } else { + all_dims_.push_back(new Dimension(d.val)); + return all_dims_.back(); + } + } + + private: + std::vector all_shapes_; // values are owned. + std::vector all_dims_; // values are owned. + }; + + friend class ShapeInferenceTestutil; // For testing shapes. + // Shared initialization across the two constructors. Remove // once we get rid of one of them. void PreInputInit(const OpDef& op_def, const std::vector& input_tensors); void PostInputInit(); - // Returns a shape from 'shape_string'. - Status MakeShapeFromString(const string& shape_string, ShapeHandle* output); - DimensionHandle GetDimension(const DimensionOrConstant& d); Status ReturnUnknownShape(ShapeHandle* out) { @@ -460,10 +462,9 @@ class InferenceContext { // Adds additional context to the given status. Status AttachContext(const Status& status); - std::vector all_shapes_; // values are owned. - std::vector all_dims_; // values are owned. + ShapeManager shape_manager_; - // inputs_ and outputs_ refer to values from all_shapes_. + // inputs_ and outputs_ refer to values from `shape_manager_`. std::vector inputs_; std::vector input_tensors_; std::vector requested_input_tensor_; diff --git a/tensorflow/core/framework/shape_inference_test.cc b/tensorflow/core/framework/shape_inference_test.cc index 2cd58638723..76a485c678f 100644 --- a/tensorflow/core/framework/shape_inference_test.cc +++ b/tensorflow/core/framework/shape_inference_test.cc @@ -36,6 +36,20 @@ OpDef MakeOpDefWithLists() { return op_reg_data.op_def; } +TensorShapeProto S(std::initializer_list dims) { + PartialTensorShape shape(dims); + TensorShapeProto ret; + shape.AsProto(&ret); + return ret; +} + +TensorShapeProto Unknown() { + PartialTensorShape shape; + TensorShapeProto ret; + shape.AsProto(&ret); + return ret; +} + } // namespace class ShapeInferenceTest : public ::testing::Test { @@ -57,7 +71,7 @@ TEST_F(ShapeInferenceTest, InputOutputByName) { .Attr("N", 3) .Input(FakeInput(DT_FLOAT)) .Finalize(&def); - InferenceContext c(&def, op_def, {"[1,5]", "[2,5]", "[1,3]"}, {}); + InferenceContext c(&def, op_def, {S({1, 5}), S({2, 5}), S({1, 3})}, {}); EXPECT_EQ("5", c.DebugString(c.NumElements(c.input(0)))); EXPECT_EQ("10", c.DebugString(c.NumElements(c.input(1)))); @@ -93,7 +107,7 @@ static OpDef MakeOpDef(int num_inputs, int num_outputs) { TEST_F(ShapeInferenceTest, DimensionOrConstant) { NodeDef def; - InferenceContext c(&def, MakeOpDef(1, 1), {"?"}, {}); + InferenceContext c(&def, MakeOpDef(1, 1), {Unknown()}, {}); EXPECT_EQ(InferenceContext::kUnknownDim, c.Value(InferenceContext::kUnknownDim)); EXPECT_EQ(1, c.Value(1)); @@ -108,7 +122,7 @@ TEST_F(ShapeInferenceTest, Run) { NodeDef def; def.set_name("foo"); def.set_op("foo_op"); - InferenceContext c(&def, MakeOpDef(3, 2), {"[1]"}, {}); + InferenceContext c(&def, MakeOpDef(3, 2), {S({1})}, {}); { auto fn = [](InferenceContext* c) { @@ -139,7 +153,8 @@ TEST_F(ShapeInferenceTest, Run) { TEST_F(ShapeInferenceTest, RankAndDimInspection) { NodeDef def; - InferenceContext c(&def, MakeOpDef(3, 2), {"?", "[1,?,3]", "[]"}, {}); + InferenceContext c(&def, MakeOpDef(3, 2), {Unknown(), S({1, -1, 3}), S({})}, + {}); EXPECT_EQ(3, c.num_inputs()); EXPECT_EQ(2, c.num_outputs()); @@ -179,7 +194,8 @@ TEST_F(ShapeInferenceTest, RankAndDimInspection) { TEST_F(ShapeInferenceTest, NumElements) { NodeDef def; - InferenceContext c(&def, MakeOpDef(3, 2), {"?", "[1,?,3]", "[5,4,3,2]"}, {}); + InferenceContext c(&def, MakeOpDef(3, 2), + {Unknown(), S({1, -1, 3}), S({5, 4, 3, 2})}, {}); EXPECT_EQ("?", c.DebugString(c.NumElements(c.input(0)))); EXPECT_EQ("?", c.DebugString(c.NumElements(c.input(1)))); @@ -192,7 +208,7 @@ TEST_F(ShapeInferenceTest, NumElements) { TEST_F(ShapeInferenceTest, WithRank) { NodeDef def; - InferenceContext c(&def, MakeOpDef(2, 2), {"?", "[1,?,3]"}, {}); + InferenceContext c(&def, MakeOpDef(2, 2), {Unknown(), S({1, -1, 3})}, {}); auto in0 = c.input(0); auto in1 = c.input(1); @@ -230,7 +246,7 @@ TEST_F(ShapeInferenceTest, WithRank) { TEST_F(ShapeInferenceTest, WithRankAtMost) { NodeDef def; - InferenceContext c(&def, MakeOpDef(2, 2), {"?", "[1,?,3]"}, {}); + InferenceContext c(&def, MakeOpDef(2, 2), {Unknown(), S({1, -1, 3})}, {}); auto in0 = c.input(0); auto in1 = c.input(1); @@ -268,7 +284,7 @@ TEST_F(ShapeInferenceTest, WithRankAtMost) { TEST_F(ShapeInferenceTest, WithRankAtLeast) { NodeDef def; - InferenceContext c(&def, MakeOpDef(2, 2), {"?", "[1,?,3]"}, {}); + InferenceContext c(&def, MakeOpDef(2, 2), {Unknown(), S({1, -1, 3})}, {}); auto in0 = c.input(0); auto in1 = c.input(1); @@ -306,7 +322,7 @@ TEST_F(ShapeInferenceTest, WithRankAtLeast) { TEST_F(ShapeInferenceTest, WithValue) { NodeDef def; - InferenceContext c(&def, MakeOpDef(1, 2), {"[1,?]"}, {}); + InferenceContext c(&def, MakeOpDef(1, 2), {S({1, -1})}, {}); auto d0 = c.Dim(c.input(0), 0); auto d1 = c.Dim(c.input(0), 1); @@ -347,7 +363,7 @@ TEST_F(ShapeInferenceTest, WithValue) { TEST_F(ShapeInferenceTest, MergeDim) { NodeDef def; - InferenceContext c(&def, MakeOpDef(1, 2), {"[2,?,2,1,?]"}, {}); + InferenceContext c(&def, MakeOpDef(1, 2), {S({2, -1, 2, 1, -1})}, {}); auto d2 = c.Dim(c.input(0), 0); auto d_unknown = c.Dim(c.input(0), 1); @@ -394,7 +410,9 @@ TEST_F(ShapeInferenceTest, MergeDim) { TEST_F(ShapeInferenceTest, MergeShape) { NodeDef def; InferenceContext c(&def, MakeOpDef(7, 2), - {"?", "[1,2]", "[?,2]", "[1,?]", "[1,3]", "?", "[1]"}, {}); + {Unknown(), S({1, 2}), S({-1, 2}), S({1, -1}), S({1, 3}), + Unknown(), S({1})}, + {}); auto s_unknown = c.input(0); auto s_1_2 = c.input(1); @@ -461,7 +479,10 @@ TEST_F(ShapeInferenceTest, MergeShape) { TEST_F(ShapeInferenceTest, MergePrefix) { NodeDef def; - InferenceContext c(&def, MakeOpDef(4, 2), {"?", "[?,2]", "[1,?,3]", "[2,4]"}, + InferenceContext c(&def, MakeOpDef(4, 2), + { + Unknown(), S({-1, 2}), S({1, -1, 3}), S({2, 4}), + }, {}); auto s_unknown = c.input(0); @@ -514,7 +535,8 @@ TEST_F(ShapeInferenceTest, MergePrefix) { TEST_F(ShapeInferenceTest, Subshape) { NodeDef def; - InferenceContext c(&def, MakeOpDef(2, 2), {"[1,2,3,?,5]", "?"}, {}); + InferenceContext c(&def, MakeOpDef(2, 2), {S({1, 2, 3, -1, 5}), Unknown()}, + {}); ShapeHandle unknown = c.input(1); ShapeHandle out; @@ -588,7 +610,8 @@ TEST_F(ShapeInferenceTest, Subshape) { TEST_F(ShapeInferenceTest, Concatenate) { NodeDef def; - InferenceContext c(&def, MakeOpDef(3, 2), {"[1,?,3]", "[4,5]", "?"}, {}); + InferenceContext c(&def, MakeOpDef(3, 2), + {S({1, -1, 3}), S({4, 5}), Unknown()}, {}); auto in0 = c.input(0); auto in1 = c.input(1); @@ -614,7 +637,7 @@ TEST_F(ShapeInferenceTest, Concatenate) { TEST_F(ShapeInferenceTest, ReplaceDim) { NodeDef def; - InferenceContext c(&def, MakeOpDef(2, 0), {"[1,2,3]", "?"}, {}); + InferenceContext c(&def, MakeOpDef(2, 0), {S({1, 2, 3}), Unknown()}, {}); auto in = c.input(0); auto unknown = c.input(1); @@ -645,7 +668,7 @@ TEST_F(ShapeInferenceTest, ReplaceDim) { TEST_F(ShapeInferenceTest, MakeShape) { NodeDef def; - InferenceContext c(&def, MakeOpDef(1, 2), {"[1,2,3,?,5]"}, {}); + InferenceContext c(&def, MakeOpDef(1, 2), {S({1, 2, 3, -1, 5})}, {}); std::vector dims; auto in0 = c.input(0); @@ -669,7 +692,8 @@ TEST_F(ShapeInferenceTest, MakeShape) { TEST_F(ShapeInferenceTest, UnknownShape) { NodeDef def; - InferenceContext c(&def, MakeOpDef(0, 2), {}, {}); + std::vector empty; + InferenceContext c(&def, MakeOpDef(0, 2), empty, {}); auto u0 = c.UnknownShape(); auto u1 = c.UnknownShape(); @@ -680,7 +704,8 @@ TEST_F(ShapeInferenceTest, UnknownShape) { TEST_F(ShapeInferenceTest, Scalar) { NodeDef def; - InferenceContext c(&def, MakeOpDef(0, 2), {}, {}); + std::vector empty; + InferenceContext c(&def, MakeOpDef(0, 2), empty, {}); auto s0 = c.Scalar(); EXPECT_EQ("[]", c.DebugString(s0)); @@ -690,7 +715,8 @@ TEST_F(ShapeInferenceTest, Scalar) { TEST_F(ShapeInferenceTest, Vector) { NodeDef def; - InferenceContext c(&def, MakeOpDef(0, 2), {}, {}); + std::vector empty; + InferenceContext c(&def, MakeOpDef(0, 2), empty, {}); auto s0 = c.Vector(1); EXPECT_EQ("[1]", c.DebugString(s0)); @@ -705,7 +731,8 @@ TEST_F(ShapeInferenceTest, Vector) { TEST_F(ShapeInferenceTest, Matrix) { NodeDef def; - InferenceContext c(&def, MakeOpDef(0, 2), {}, {}); + std::vector empty; + InferenceContext c(&def, MakeOpDef(0, 2), empty, {}); auto s0 = c.Matrix(1, 2); EXPECT_EQ("[1,2]", c.DebugString(s0)); @@ -727,7 +754,7 @@ TEST_F(ShapeInferenceTest, Matrix) { TEST_F(ShapeInferenceTest, MakeShapeFromShapeTensor) { auto create = [&](Tensor* t) { NodeDef def; - InferenceContext c(&def, MakeOpDef(1, 0), {"?"}, {t}); + InferenceContext c(&def, MakeOpDef(1, 0), {Unknown()}, {t}); ShapeHandle out; Status s = c.MakeShapeFromShapeTensor(0, &out); if (s.ok()) { @@ -766,7 +793,7 @@ TEST_F(ShapeInferenceTest, MakeShapeFromShapeTensor) { // Test when the input shape is wrong. { NodeDef def; - InferenceContext c(&def, MakeOpDef(1, 0), {"[1,?]"}, {nullptr}); + InferenceContext c(&def, MakeOpDef(1, 0), {S({1, -1})}, {nullptr}); ShapeHandle out; EXPECT_EQ("Shape must be rank 1 but is rank 2", c.MakeShapeFromShapeTensor(0, &out).error_message()); @@ -775,7 +802,8 @@ TEST_F(ShapeInferenceTest, MakeShapeFromShapeTensor) { TEST_F(ShapeInferenceTest, MakeShapeFromShapeProto) { NodeDef def; - InferenceContext c(&def, MakeOpDef(0, 2), {}, {}); + std::vector empty; + InferenceContext c(&def, MakeOpDef(0, 2), empty, {}); TensorShapeProto proto; // With a set unknown rank. @@ -810,7 +838,8 @@ TEST_F(ShapeInferenceTest, MakeShapeFromShapeProto) { TEST_F(ShapeInferenceTest, MakeDim) { NodeDef def; - InferenceContext c(&def, MakeOpDef(0, 2), {}, {}); + std::vector empty; + InferenceContext c(&def, MakeOpDef(0, 2), empty, {}); auto d0 = c.MakeDim(1); auto d1 = c.MakeDim(1); @@ -823,7 +852,8 @@ TEST_F(ShapeInferenceTest, MakeDim) { TEST_F(ShapeInferenceTest, UnknownDim) { NodeDef def; - InferenceContext c(&def, MakeOpDef(0, 2), {}, {}); + std::vector empty; + InferenceContext c(&def, MakeOpDef(0, 2), empty, {}); auto d0 = c.UnknownDim(); auto d1 = c.UnknownDim(); @@ -834,7 +864,8 @@ TEST_F(ShapeInferenceTest, UnknownDim) { TEST_F(ShapeInferenceTest, UnknownShapeOfRank) { NodeDef def; - InferenceContext c(&def, MakeOpDef(0, 2), {}, {}); + std::vector empty; + InferenceContext c(&def, MakeOpDef(0, 2), empty, {}); auto unknown_shape_of_rank_3 = c.UnknownShapeOfRank(3); EXPECT_EQ("[?,?,?]", c.DebugString(unknown_shape_of_rank_3)); @@ -847,7 +878,8 @@ TEST_F(ShapeInferenceTest, InputTensors) { const Tensor t1 = tensorflow::test::AsTensor({10}); const Tensor t2 = tensorflow::test::AsTensor({20, 30}); NodeDef def; - InferenceContext c(&def, MakeOpDef(3, 2), {"[1]", "[2]", "[3]"}, {&t1, &t2}); + InferenceContext c(&def, MakeOpDef(3, 2), {S({1}), S({2}), S({3})}, + {&t1, &t2}); EXPECT_TRUE(c.input_tensor(0) == &t1); EXPECT_TRUE(c.input_tensor(1) == &t2); @@ -858,7 +890,7 @@ TEST_F(ShapeInferenceTest, MakeDimForScalarInput) { Tensor t1 = tensorflow::test::AsScalar(20); Tensor t2 = tensorflow::test::AsScalar(-1); NodeDef def; - InferenceContext c(&def, MakeOpDef(2, 2), {"[]", "[]"}, {&t1, &t2}); + InferenceContext c(&def, MakeOpDef(2, 2), {S({}), S({})}, {&t1, &t2}); DimensionHandle d; EXPECT_TRUE(c.MakeDimForScalarInput(0, &d).ok()); @@ -888,7 +920,8 @@ TEST_F(ShapeInferenceTest, GetAttr) { .Finalize(&def) .ok()); - InferenceContext c(&def, op_reg_data.op_def, {}, {}); + std::vector empty; + InferenceContext c(&def, op_reg_data.op_def, empty, {}); string value; EXPECT_TRUE(c.GetAttr("foo", &value).ok()); EXPECT_EQ("bar", value); @@ -896,7 +929,7 @@ TEST_F(ShapeInferenceTest, GetAttr) { TEST_F(ShapeInferenceTest, Divide) { NodeDef def; - InferenceContext c(&def, MakeOpDef(1, 2), {"[6,?]"}, {}); + InferenceContext c(&def, MakeOpDef(1, 2), {S({6, -1})}, {}); auto s = c.input(0); auto d_6 = c.Dim(s, 0); @@ -946,7 +979,7 @@ TEST_F(ShapeInferenceTest, Divide) { TEST_F(ShapeInferenceTest, Add) { NodeDef def; - InferenceContext c(&def, MakeOpDef(1, 2), {"[6,?,0]"}, {}); + InferenceContext c(&def, MakeOpDef(1, 2), {S({6, -1, 0})}, {}); auto s = c.input(0); auto d_6 = c.Dim(s, 0); @@ -997,7 +1030,7 @@ TEST_F(ShapeInferenceTest, Add) { TEST_F(ShapeInferenceTest, Subtract) { NodeDef def; - InferenceContext c(&def, MakeOpDef(1, 2), {"[6,?,0,5]"}, {}); + InferenceContext c(&def, MakeOpDef(1, 2), {S({6, -1, 0, 5})}, {}); auto s = c.input(0); auto d_6 = c.Dim(s, 0); @@ -1046,7 +1079,7 @@ TEST_F(ShapeInferenceTest, Subtract) { TEST_F(ShapeInferenceTest, Multiply) { NodeDef def; - InferenceContext c(&def, MakeOpDef(1, 2), {"[6,?,0,1]"}, {}); + InferenceContext c(&def, MakeOpDef(1, 2), {S({6, -1, 0, 1})}, {}); auto s = c.input(0); auto d_6 = c.Dim(s, 0); @@ -1098,7 +1131,8 @@ TEST_F(ShapeInferenceTest, Multiply) { TEST_F(ShapeInferenceTest, FullyDefined) { NodeDef def; - InferenceContext c(&def, MakeOpDef(0, 2), {}, {}); + std::vector empty; + InferenceContext c(&def, MakeOpDef(0, 2), empty, {}); // No rank or missing dimension information should return false. EXPECT_FALSE(c.FullyDefined(c.UnknownShape())); @@ -1111,7 +1145,7 @@ TEST_F(ShapeInferenceTest, FullyDefined) { TEST_F(ShapeInferenceTest, Min) { NodeDef def; - InferenceContext c(&def, MakeOpDef(1, 2), {"[1,2,?,0]"}, {}); + InferenceContext c(&def, MakeOpDef(1, 2), {S({1, 2, -1, 0})}, {}); auto s = c.input(0); auto d_1 = c.Dim(s, 0); @@ -1159,7 +1193,7 @@ TEST_F(ShapeInferenceTest, Min) { TEST_F(ShapeInferenceTest, Max) { NodeDef def; - InferenceContext c(&def, MakeOpDef(1, 2), {"[1,2,?]"}, {}); + InferenceContext c(&def, MakeOpDef(1, 2), {S({1, 2, -1})}, {}); auto s = c.input(0); auto d_1 = c.Dim(s, 0); @@ -1196,7 +1230,8 @@ TEST_F(ShapeInferenceTest, Max) { TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownShapes) { NodeDef def; - InferenceContext c(&def, MakeOpDef(3, 1), {"?", "?", "?"}, {}); + InferenceContext c(&def, MakeOpDef(3, 1), {Unknown(), Unknown(), Unknown()}, + {}); EXPECT_EQ(3, c.num_inputs()); EXPECT_EQ(1, c.num_outputs()); @@ -1208,7 +1243,8 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownShapes) { TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownDims) { NodeDef def; - InferenceContext c(&def, MakeOpDef(3, 1), {"[?,?]", "[?]", "[?]"}, {}); + InferenceContext c(&def, MakeOpDef(3, 1), {S({-1, -1}), S({-1}), S({-1})}, + {}); EXPECT_EQ(3, c.num_inputs()); EXPECT_EQ(1, c.num_outputs()); @@ -1220,7 +1256,7 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownDims) { TEST_F(ShapeInferenceTest, ValidateSparseTensor_InvalidIndicesRank) { NodeDef def; - InferenceContext c(&def, MakeOpDef(3, 1), {"[?]", "[?]", "[?]"}, {}); + InferenceContext c(&def, MakeOpDef(3, 1), {S({-1}), S({-1}), S({-1})}, {}); EXPECT_EQ(3, c.num_inputs()); EXPECT_EQ(1, c.num_outputs()); @@ -1233,7 +1269,7 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_InvalidIndicesRank) { TEST_F(ShapeInferenceTest, ValidateSparseTensor_InvalidNumElements) { NodeDef def; - InferenceContext c(&def, MakeOpDef(3, 1), {"[5,3]", "[4]", "[3]"}, {}); + InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({4}), S({3})}, {}); EXPECT_EQ(3, c.num_inputs()); EXPECT_EQ(1, c.num_outputs()); @@ -1246,7 +1282,7 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_InvalidNumElements) { TEST_F(ShapeInferenceTest, ValidateSparseTensor_InvalidRank) { NodeDef def; - InferenceContext c(&def, MakeOpDef(3, 1), {"[5,3]", "[5]", "[4]"}, {}); + InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({5}), S({4})}, {}); EXPECT_EQ(3, c.num_inputs()); EXPECT_EQ(1, c.num_outputs()); @@ -1259,7 +1295,7 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_InvalidRank) { TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownNumIndexElements) { NodeDef def; - InferenceContext c(&def, MakeOpDef(3, 1), {"[?,3]", "[5]", "[3]"}, {}); + InferenceContext c(&def, MakeOpDef(3, 1), {S({-1, 3}), S({5}), S({3})}, {}); EXPECT_EQ(3, c.num_inputs()); EXPECT_EQ(1, c.num_outputs()); @@ -1271,7 +1307,7 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownNumIndexElements) { TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownNumValueElements) { NodeDef def; - InferenceContext c(&def, MakeOpDef(3, 1), {"[5,3]", "[?]", "[3]"}, {}); + InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({-1}), S({3})}, {}); EXPECT_EQ(3, c.num_inputs()); EXPECT_EQ(1, c.num_outputs()); @@ -1283,7 +1319,7 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownNumValueElements) { TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownIndexRank) { NodeDef def; - InferenceContext c(&def, MakeOpDef(3, 1), {"[5,?]", "[5]", "[3]"}, {}); + InferenceContext c(&def, MakeOpDef(3, 1), {S({5, -1}), S({5}), S({3})}, {}); EXPECT_EQ(3, c.num_inputs()); EXPECT_EQ(1, c.num_outputs()); @@ -1295,7 +1331,7 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownIndexRank) { TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownShapeRank) { NodeDef def; - InferenceContext c(&def, MakeOpDef(3, 1), {"[5,3]", "[5]", "[?]"}, {}); + InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({5}), S({-1})}, {}); EXPECT_EQ(3, c.num_inputs()); EXPECT_EQ(1, c.num_outputs()); @@ -1307,7 +1343,7 @@ TEST_F(ShapeInferenceTest, ValidateSparseTensor_UnknownShapeRank) { TEST_F(ShapeInferenceTest, ValidateSparseTensor) { NodeDef def; - InferenceContext c(&def, MakeOpDef(3, 1), {"[5,3]", "[5]", "[3]"}, {}); + InferenceContext c(&def, MakeOpDef(3, 1), {S({5, 3}), S({5}), S({3})}, {}); EXPECT_EQ(3, c.num_inputs()); EXPECT_EQ(1, c.num_outputs()); diff --git a/tensorflow/core/framework/shape_inference_testutil.cc b/tensorflow/core/framework/shape_inference_testutil.cc index 8d168620d0c..6cad1f8efaa 100644 --- a/tensorflow/core/framework/shape_inference_testutil.cc +++ b/tensorflow/core/framework/shape_inference_testutil.cc @@ -16,9 +16,9 @@ limitations under the License. #include "tensorflow/core/framework/node_def_util.h" #include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/shape_inference.h" #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/lib/strings/numbers.h" +#include "tensorflow/core/lib/strings/scanner.h" #include "tensorflow/core/lib/strings/str_util.h" namespace tensorflow { @@ -35,8 +35,16 @@ Status ShapeInferenceTestutil::InferShapes(ShapeInferenceTestOp op, std::vector ins_v = str_util::Split(ins, ';'); std::unique_ptr new_node_def; - shape_inference::InferenceContext c(&op.node_def, op_reg_data->op_def, ins_v, - op.input_tensors); + InferenceContext::ShapeManager manager; + std::vector in_shapes; + for (const string& spec : ins_v) { + ShapeHandle shape; + TF_RETURN_IF_ERROR(MakeShapeFromString(&manager, spec, &shape)); + in_shapes.push_back(shape); + } + + shape_inference::InferenceContext c(&op.node_def, op_reg_data->op_def, + in_shapes, op.input_tensors); TF_RETURN_IF_ERROR(c.construction_status()); if (op_reg_data->shape_inference_fn == nullptr) { return errors::InvalidArgument( @@ -199,5 +207,49 @@ Status ShapeInferenceTestutil::InferShapes(ShapeInferenceTestOp op, return Status::OK(); } +// static +Status ShapeInferenceTestutil::MakeShapeFromString( + InferenceContext::ShapeManager* manager, const string& spec, + ShapeHandle* output) { + if (spec == "?") { + *output = manager->UnknownShape(); + return Status::OK(); + } + + std::vector dims; + strings::Scanner scanner(spec); + scanner.OneLiteral("["); + while (scanner.Peek() != ']') { + if (scanner.Peek() == '?') { + scanner.OneLiteral("?"); + dims.push_back(manager->MakeDim(InferenceContext::kUnknownDim)); + } else { + scanner.RestartCapture().Many(strings::Scanner::DIGIT); + StringPiece match; + int64 dim_size = 0; + + if (!scanner.GetResult(nullptr, &match) || + !strings::safe_strto64(match, &dim_size)) { + return errors::InvalidArgument("Could not parse number in ", spec); + } + + dims.push_back(manager->MakeDim(dim_size)); + } + + if (scanner.Peek() == ',') { + scanner.OneLiteral(","); + } else if (scanner.Peek() != ']') { + return errors::InvalidArgument( + "Invalid input spec (] not found in dim shape): ", spec); + } + } + if (!scanner.OneLiteral("]").Eos().GetResult()) { + return errors::InvalidArgument("Malformed shape spec: did not end in ']'."); + } + *output = manager->MakeShape(dims); + + return Status::OK(); +} + } // namespace shape_inference } // namespace tensorflow diff --git a/tensorflow/core/framework/shape_inference_testutil.h b/tensorflow/core/framework/shape_inference_testutil.h index b5d187405ad..64067464fb9 100644 --- a/tensorflow/core/framework/shape_inference_testutil.h +++ b/tensorflow/core/framework/shape_inference_testutil.h @@ -17,6 +17,7 @@ limitations under the License. #include #include "tensorflow/core/framework/graph.pb.h" +#include "tensorflow/core/framework/shape_inference.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/platform/types.h" @@ -65,6 +66,10 @@ class ShapeInferenceTestutil { private: ShapeInferenceTestutil() {} + + // Makes a shape out of 'spec'. + static Status MakeShapeFromString(InferenceContext::ShapeManager* manager, + const string& spec, ShapeHandle* output); }; } // namespace shape_inference diff --git a/tensorflow/core/framework/summary.proto b/tensorflow/core/framework/summary.proto index 10ee1c8779a..3560b96dfcc 100644 --- a/tensorflow/core/framework/summary.proto +++ b/tensorflow/core/framework/summary.proto @@ -8,6 +8,13 @@ option java_package = "org.tensorflow.framework"; import "tensorflow/core/framework/tensor.proto"; +// Metadata associated with a series of Summary data +message SummaryDescription { + // Hint on how plugins should process the data in this series. + // Supported values include "scalar", "histogram", "image", "audio" + string type_hint = 1; +} + // Serialization format for histogram module in // core/lib/histogram/histogram.h message HistogramProto { diff --git a/tensorflow/core/framework/tensor_shape.cc b/tensorflow/core/framework/tensor_shape.cc index 4e1a99acd68..fde1916c088 100644 --- a/tensorflow/core/framework/tensor_shape.cc +++ b/tensorflow/core/framework/tensor_shape.cc @@ -33,7 +33,7 @@ static void AppendTo(const TensorShape& s, gtl::InlinedVector* vals) { } void TensorShape::CheckDimsEqual(int NDIMS) const { - CHECK_EQ(NDIMS, dims()) << "Asking for tensor of " << NDIMS << "dimensions" + CHECK_EQ(NDIMS, dims()) << "Asking for tensor of " << NDIMS << " dimensions" << " from a tensor of " << dims() << " dimensions"; } diff --git a/tensorflow/core/kernels/aggregate_ops.cc b/tensorflow/core/kernels/aggregate_ops.cc index f9a7b3ee52d..b41e438b2b0 100644 --- a/tensorflow/core/kernels/aggregate_ops.cc +++ b/tensorflow/core/kernels/aggregate_ops.cc @@ -139,6 +139,7 @@ TF_CALL_NUMBER_TYPES(REGISTER_ADDN_CPU); #if GOOGLE_CUDA REGISTER_ADDN(Eigen::half, GPU); REGISTER_ADDN(float, GPU); +REGISTER_ADDN(double, GPU); // A special GPU kernel for int32. // TODO(b/25387198): Also enable int32 in device memory. This kernel diff --git a/tensorflow/core/kernels/cwise_op_gpu_round.cu.cc b/tensorflow/core/kernels/cwise_op_gpu_round.cu.cc new file mode 100644 index 00000000000..03d2b2c4423 --- /dev/null +++ b/tensorflow/core/kernels/cwise_op_gpu_round.cu.cc @@ -0,0 +1,26 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#if GOOGLE_CUDA + +#include "tensorflow/core/kernels/cwise_ops_gpu_common.cu.h" + +namespace tensorflow { +namespace functor { +DEFINE_UNARY5(round, Eigen::half, float, double, int32, int64); +} // namespace functor +} // namespace tensorflow + +#endif // GOOGLE_CUDA diff --git a/tensorflow/core/kernels/cwise_op_round.cc b/tensorflow/core/kernels/cwise_op_round.cc new file mode 100644 index 00000000000..0457f3931d8 --- /dev/null +++ b/tensorflow/core/kernels/cwise_op_round.cc @@ -0,0 +1,25 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/kernels/cwise_ops_common.h" + +namespace tensorflow { +REGISTER5(UnaryOp, CPU, "Round", functor::round, Eigen::half, float, double, + int32, int64); +#if GOOGLE_CUDA +REGISTER5(UnaryOp, GPU, "Round", functor::round, Eigen::half, float, double, + int32, int64); +#endif +} // namespace tensorflow diff --git a/tensorflow/core/kernels/cwise_ops.h b/tensorflow/core/kernels/cwise_ops.h index 24dc93629b4..766c7152b00 100644 --- a/tensorflow/core/kernels/cwise_ops.h +++ b/tensorflow/core/kernels/cwise_ops.h @@ -18,6 +18,7 @@ limitations under the License. #include #include +#include #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/numeric_types.h" #include "tensorflow/core/framework/tensor_types.h" @@ -236,6 +237,48 @@ struct functor_traits> { }; }; +#if EIGEN_COMP_GNUC && __cplusplus > 199711L +#define DISABLE_FLOAT_EQUALITY_WARNING \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") +#define ENABLE_FLOAT_EQUALITY_WARNING _Pragma("GCC diagnostic pop") +#else +#define DISABLE_FLOAT_EQUALITY_WARNING +#define ENABLE_FLOAT_EQUALITY_WARNING +#endif + +template +struct scalar_round_op_google { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar + operator()(const Scalar& x) const { + EIGEN_STATIC_ASSERT((!NumTraits::IsComplex), + NUMERIC_TYPE_MUST_BE_REAL) + + Scalar round_val; + round_val = Eigen::numext::floor(x); + const Scalar fraction = x - round_val; + if (fraction > Scalar(.5)) { + round_val += Scalar(1.0); + } else if (fraction == Scalar(.5)) { + const Scalar nearest_even_int = + round_val - Scalar(2) * Eigen::numext::floor(Scalar(.5) * x); + bool is_odd = (nearest_even_int == Scalar(1)); + if (is_odd) { + round_val += Scalar(1); + } + } + return round_val; + } +}; + +template +struct functor_traits> { + enum { Cost = 4 * NumTraits::AddCost, PacketAccess = false }; +}; + +#undef ENABLE_FLOAT_EQUALITY_WARNING +#undef DISABLE_FLOAT_EQUALITY_WARNING + } // end namespace internal } // end namespace Eigen @@ -398,6 +441,9 @@ struct isfinite : base, bool> {}; template struct floor : base> {}; +template +struct round : base> {}; + template struct ceil : base> {}; diff --git a/tensorflow/core/kernels/nn_ops_test.cc b/tensorflow/core/kernels/nn_ops_test.cc index 5ff5c297fbd..54a9dd07b1f 100644 --- a/tensorflow/core/kernels/nn_ops_test.cc +++ b/tensorflow/core/kernels/nn_ops_test.cc @@ -192,6 +192,7 @@ static void BM_ConvFloat(int iters, int batch, int rows, int cols, int in_depth, TF_CHECK_OK(ConvertGraphDefToGraph(opts, graph, g)); string device = use_gpu ? "gpu" : "cpu"; + testing::UseRealTime(); test::Benchmark(device, g, &options).Run(iters); testing::ItemsProcessed(num_ops * iters); } @@ -557,6 +558,7 @@ static void BM_ConvFloatDepthwise(int iters, int batch, int rows, int cols, TF_CHECK_OK(ConvertGraphDefToGraph(opts, graph, g)); string device = use_gpu ? "gpu" : "cpu"; + testing::UseRealTime(); test::Benchmark(device, g, &options).Run(iters); testing::ItemsProcessed(num_ops * iters); } @@ -1075,6 +1077,7 @@ static void BM_MaxPoolBk(int iters, int batch_size, int rows, int cols, Graph* g = new Graph(OpRegistry::Global()); TF_CHECK_OK(root.ToGraph(g)); string device = use_gpu ? "gpu" : "cpu"; + testing::UseRealTime(); test::Benchmark(device, g).Run(iters); testing::ItemsProcessed(batch_size * rows * cols * depth * iters); diff --git a/tensorflow/core/lib/core/blocking_counter.h b/tensorflow/core/lib/core/blocking_counter.h index ebe7de6b3be..b2411f5951f 100644 --- a/tensorflow/core/lib/core/blocking_counter.h +++ b/tensorflow/core/lib/core/blocking_counter.h @@ -16,40 +16,49 @@ limitations under the License. #ifndef TENSORFLOW_LIB_CORE_BLOCKING_COUNTER_H_ #define TENSORFLOW_LIB_CORE_BLOCKING_COUNTER_H_ +#include + #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/mutex.h" -#include "tensorflow/core/platform/types.h" namespace tensorflow { class BlockingCounter { public: - BlockingCounter(int initial_count) : count_(initial_count) { - CHECK_GE(count_, 0); + BlockingCounter(int initial_count) + : state_(initial_count << 1), notified_(false) { + CHECK_GE(initial_count, 0); + DCHECK_EQ((initial_count << 1) >> 1, initial_count); } - ~BlockingCounter() {} + ~BlockingCounter() { DCHECK_EQ(state_ >> 1, 0); } inline void DecrementCount() { - mutex_lock l(mu_); - --count_; - CHECK(count_ >= 0); - if (count_ == 0) { - cond_var_.notify_all(); + unsigned int v = state_.fetch_sub(2, std::memory_order_acq_rel) - 2; + if (v != 1) { + DCHECK_NE(((v + 2) & ~1), 0); + return; // either count has not dropped to 0, or waiter is not waiting } + mutex_lock l(mu_); + DCHECK(!notified_); + notified_ = true; + cond_var_.notify_all(); } inline void Wait() { + unsigned int v = state_.fetch_or(1, std::memory_order_acq_rel); + if ((v >> 1) == 0) return; mutex_lock l(mu_); - while (count_ > 0) { + while (!notified_) { cond_var_.wait(l); } } private: - int count_; mutex mu_; condition_variable cond_var_; + std::atomic state_; // low bit is waiter flag + bool notified_; }; } // namespace tensorflow diff --git a/tensorflow/core/lib/core/blocking_counter_test.cc b/tensorflow/core/lib/core/blocking_counter_test.cc index 12a30af8210..af56f624e55 100644 --- a/tensorflow/core/lib/core/blocking_counter_test.cc +++ b/tensorflow/core/lib/core/blocking_counter_test.cc @@ -13,10 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/platform/test.h" - #include "tensorflow/core/lib/core/blocking_counter.h" #include "tensorflow/core/lib/core/threadpool.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/test_benchmark.h" namespace tensorflow { namespace { @@ -48,4 +48,28 @@ TEST(BlockingCounterTest, TestMultipleThread) { } } // namespace + +static void BM_BlockingCounter(int iters, int num_threads, + int shards_per_thread) { + testing::StopTiming(); + std::unique_ptr thread_pool( + new thread::ThreadPool(Env::Default(), "test", num_threads)); + const int num_shards = num_threads * shards_per_thread; + testing::StartTiming(); + for (int i = 0; i < iters; ++i) { + BlockingCounter bc(num_shards); + for (int j = 0; j < num_threads; ++j) { + thread_pool->Schedule([&bc, shards_per_thread] { + for (int k = 0; k < shards_per_thread; ++k) { + bc.DecrementCount(); + } + }); + } + bc.Wait(); + } + testing::StopTiming(); +} + +BENCHMARK(BM_BlockingCounter)->RangePair(1, 12, 1, 1000); + } // namespace tensorflow diff --git a/tensorflow/core/lib/io/random_inputstream.cc b/tensorflow/core/lib/io/random_inputstream.cc index bb92f0f018d..8b8c1392a1d 100644 --- a/tensorflow/core/lib/io/random_inputstream.cc +++ b/tensorflow/core/lib/io/random_inputstream.cc @@ -19,8 +19,15 @@ limitations under the License. namespace tensorflow { namespace io { -RandomAccessInputStream::RandomAccessInputStream(RandomAccessFile* file) - : file_(file) {} +RandomAccessInputStream::RandomAccessInputStream(RandomAccessFile* file, + bool owns_file) + : file_(file), owns_file_(owns_file) {} + +RandomAccessInputStream::~RandomAccessInputStream() { + if (owns_file_) { + delete file_; + } +} Status RandomAccessInputStream::ReadNBytes(int64 bytes_to_read, string* result) { diff --git a/tensorflow/core/lib/io/random_inputstream.h b/tensorflow/core/lib/io/random_inputstream.h index 8ec64d3b2aa..09ebe9ba49e 100644 --- a/tensorflow/core/lib/io/random_inputstream.h +++ b/tensorflow/core/lib/io/random_inputstream.h @@ -26,8 +26,11 @@ namespace io { // RandomAccessInputStream is NOT safe for concurrent use by multiple threads. class RandomAccessInputStream : public InputStreamInterface { public: - // Does not take ownership of 'file'. 'file' must outlive *this. - explicit RandomAccessInputStream(RandomAccessFile* file); + // Does not take ownership of 'file' unless owns_file is set to true. 'file' + // must outlive *this. + RandomAccessInputStream(RandomAccessFile* file, bool owns_file = false); + + ~RandomAccessInputStream(); Status ReadNBytes(int64 bytes_to_read, string* result) override; @@ -43,6 +46,7 @@ class RandomAccessInputStream : public InputStreamInterface { private: RandomAccessFile* file_; // Not owned. int64 pos_ = 0; // Tracks where we are in the file. + bool owns_file_ = false; }; } // namespace io diff --git a/tensorflow/core/ops/compat/ops_history.v0.pbtxt b/tensorflow/core/ops/compat/ops_history.v0.pbtxt index ec0e7801b1e..1238de51c2c 100644 --- a/tensorflow/core/ops/compat/ops_history.v0.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v0.pbtxt @@ -30770,6 +30770,43 @@ op { } } } +op { + name: "TensorSummary" + input_arg { + name: "tensor" + type_attr: "T" + } + output_arg { + name: "summary" + type: DT_STRING + } + attr { + name: "T" + type: "type" + } + attr { + name: "description" + type: "string" + default_value { + s: "" + } + } + attr { + name: "labels" + type: "list(string)" + default_value { + list { + } + } + } + attr { + name: "display_name" + type: "string" + default_value { + s: "" + } + } +} op { name: "TextLineReader" output_arg { diff --git a/tensorflow/core/ops/logging_ops.cc b/tensorflow/core/ops/logging_ops.cc index 83d02a4954e..42bd12a5b3f 100644 --- a/tensorflow/core/ops/logging_ops.cc +++ b/tensorflow/core/ops/logging_ops.cc @@ -69,23 +69,17 @@ REGISTER_OP("TensorSummary") .Input("tensor: T") .Output("summary: string") .Attr("T: type") - .Attr("display_name: string") .Attr("description: string = ''") .Attr("labels: list(string) = []") + .Attr("display_name: string = ''") .SetShapeFn(shape_inference::ScalarShape) .Doc(R"doc( Outputs a `Summary` protocol buffer with a tensor. tensor: A tensor to serialize. -display_name: A name to associate with the data series. -description: An optional long description of the data being output. -labels: a list of strings used to specify how the data can be interpreted, e.g. - a string tensor containing jpg images should have 'encoding:image/jpg'; a - string tensor with foo protos should have 'encoding:proto/X/Y/foo.proto'; - a numeric tensor containing bounding boxes may have - 'bounding_box:x1,y1,x2,y2,'. If the tensor is a part of a group of related - outputs, that can be encoded through a 'group:$groupName/$roleInGroup' label. - Labels may be formatted as 'prefix:value'. The prefix may be re-used. +description: A json-encoded SummaryDescription proto. +labels: An unused list of strings. +display_name: An unused string. )doc"); REGISTER_OP("ScalarSummary") diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 1f9c47172e9..cc478e33f02 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -18921,18 +18921,13 @@ op { name: "T" type: "type" } - attr { - name: "display_name" - type: "string" - description: "A name to associate with the data series." - } attr { name: "description" type: "string" default_value { s: "" } - description: "An optional long description of the data being output." + description: "A json-encoded SummaryDescription proto." } attr { name: "labels" @@ -18941,7 +18936,15 @@ op { list { } } - description: "a list of strings used to specify how the data can be interpreted, e.g.\na string tensor containing jpg images should have \'encoding:image/jpg\'; a\nstring tensor with foo protos should have \'encoding:proto/X/Y/foo.proto\';\na numeric tensor containing bounding boxes may have\n\'bounding_box:x1,y1,x2,y2,\'. If the tensor is a part of a group of related\noutputs, that can be encoded through a \'group:$groupName/$roleInGroup\' label.\nLabels may be formatted as \'prefix:value\'. The prefix may be re-used." + description: "An unused list of strings." + } + attr { + name: "display_name" + type: "string" + default_value { + s: "" + } + description: "An unused string." } summary: "Outputs a `Summary` protocol buffer with a tensor." } diff --git a/tensorflow/core/platform/default/logging.cc b/tensorflow/core/platform/default/logging.cc index 454fb64e2c3..e7808ca08d5 100644 --- a/tensorflow/core/platform/default/logging.cc +++ b/tensorflow/core/platform/default/logging.cc @@ -92,6 +92,11 @@ LogMessageFatal::~LogMessageFatal() { abort(); } +void LogString(const char* fname, int line, int severity, + const string& message) { + LogMessage(fname, line, severity) << message; +} + template <> void MakeCheckOpValueString(std::ostream* os, const char& v) { if (v >= 32 && v <= 126) { diff --git a/tensorflow/core/platform/env.cc b/tensorflow/core/platform/env.cc index e4bd54cbb61..a5dd7b45c4a 100644 --- a/tensorflow/core/platform/env.cc +++ b/tensorflow/core/platform/env.cc @@ -315,6 +315,7 @@ Status ReadBinaryProto(Env* env, const string& fname, Status ReadTextProto(Env* env, const string& fname, ::tensorflow::protobuf::Message* proto) { +#if !defined(TENSORFLOW_LITE_PROTOS) std::unique_ptr file; TF_RETURN_IF_ERROR(env->NewRandomAccessFile(fname, &file)); std::unique_ptr stream(new FileStream(file.get())); @@ -324,6 +325,9 @@ Status ReadTextProto(Env* env, const string& fname, return errors::DataLoss("Can't parse ", fname, " as text proto"); } return Status::OK(); +#else + return errors::Unimplemented("Can't parse text protos with protolite."); +#endif } } // namespace tensorflow diff --git a/tensorflow/core/platform/hexagon/gemm_wrapper.h b/tensorflow/core/platform/hexagon/soc_interface.h similarity index 79% rename from tensorflow/core/platform/hexagon/gemm_wrapper.h rename to tensorflow/core/platform/hexagon/soc_interface.h index b1c22bafdb4..26c0ca29b3d 100644 --- a/tensorflow/core/platform/hexagon/gemm_wrapper.h +++ b/tensorflow/core/platform/hexagon/soc_interface.h @@ -13,12 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_PLATFORM_HEXAGON_GEMM_WRAPPER_H_ -#define TENSORFLOW_PLATFORM_HEXAGON_GEMM_WRAPPER_H_ +#ifndef TENSORFLOW_PLATFORM_HEXAGON_SOC_INTERFACE_H_ +#define TENSORFLOW_PLATFORM_HEXAGON_SOC_INTERFACE_H_ // Declaration of APIs provided by hexagon shared library. This header is shared // with both hexagon library built with qualcomm SDK and tensorflow. -// All functions defined here must have prefix "hexagon_gemm_wrapper" to avoid +// All functions defined here must have prefix "soc_interface" to avoid // naming conflicts. #ifdef __cplusplus extern "C" { @@ -26,14 +26,14 @@ extern "C" { // Returns the version of loaded hexagon wrapper shared library. // You should assert that the version matches the expected version before // calling APIs defined in this header. -int hexagon_gemm_wrapper_GetWrapperVersion(); +int soc_interface_GetWrapperVersion(); // Returns the version of hexagon binary. // You should assert that the version matches the expected version before // calling APIs defined in this header. -int hexagon_gemm_wrapper_GetHexagonBinaryVersion(); +int soc_interface_GetHexagonBinaryVersion(); // TODO(satok): Support gemm APIs via RPC #ifdef __cplusplus } #endif // __cplusplus -#endif // TENSORFLOW_PLATFORM_HEXAGON_GEMM_WRAPPER_H_ +#endif // TENSORFLOW_PLATFORM_HEXAGON_SOC_INTERFACE_H_ diff --git a/tensorflow/core/platform/logging.h b/tensorflow/core/platform/logging.h index 963dc798294..1ca36db548b 100644 --- a/tensorflow/core/platform/logging.h +++ b/tensorflow/core/platform/logging.h @@ -36,6 +36,14 @@ namespace port { void AdjustFilenameForLogging(string* filename); } // namespace port + +namespace internal { +// Emit "message" as a log message to the log for the specified +// "severity" as if it came from a LOG call at "fname:line" +void LogString(const char* fname, int line, int severity, + const string& message); +} // namespace internal + } // namespace tensorflow #endif // TENSORFLOW_PLATFORM_LOGGING_H_ diff --git a/tensorflow/core/platform/logging_test.cc b/tensorflow/core/platform/logging_test.cc index c82dc1b5fdb..f395f6419d1 100644 --- a/tensorflow/core/platform/logging_test.cc +++ b/tensorflow/core/platform/logging_test.cc @@ -88,4 +88,10 @@ TEST(LoggingDeathTest, FailedChecks) { #endif } +TEST(InternalLogString, Basic) { + // Just make sure that this code compiles (we don't actually verify + // the output) + internal::LogString(__FILE__, __LINE__, INFO, "Hello there"); +} + } // namespace tensorflow diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto index dc90c17bc04..2be35eb4553 100644 --- a/tensorflow/core/protobuf/config.proto +++ b/tensorflow/core/protobuf/config.proto @@ -97,6 +97,10 @@ message GraphOptions { // no cost model. int64 build_cost_model = 4; + // The number of steps to skip before collecting statistics for the + // cost model. + int64 build_cost_model_after = 9; + // Annotate each Node with Op output shape data, to the extent it can // be statically inferred. bool infer_shapes = 5; diff --git a/tensorflow/core/util/env_var.cc b/tensorflow/core/util/env_var.cc new file mode 100644 index 00000000000..d4e89b966ef --- /dev/null +++ b/tensorflow/core/util/env_var.cc @@ -0,0 +1,63 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/util/env_var.h" + +#include + +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/strings/numbers.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/logging.h" + +namespace tensorflow { + +Status ReadBoolFromEnvVar(StringPiece env_var_name, bool default_val, + bool* value) { + *value = default_val; + const char* tf_env_var_val = getenv(env_var_name.ToString().c_str()); + if (tf_env_var_val == nullptr) { + return Status::OK(); + } + string str_value = str_util::Lowercase(tf_env_var_val); + if (str_value == "0" || str_value == "false") { + *value = false; + return Status::OK(); + } else if (str_value == "1" || str_value == "true") { + *value = true; + return Status::OK(); + } + return errors::InvalidArgument(strings::StrCat( + "Failed to parse the env-var ${", env_var_name, "} into bool: ", + tf_env_var_val, ". Use the default value: ", default_val)); +} + +Status ReadInt64FromEnvVar(StringPiece env_var_name, int64 default_val, + int64* value) { + *value = default_val; + const char* tf_env_var_val = getenv(env_var_name.ToString().c_str()); + if (tf_env_var_val == nullptr) { + return Status::OK(); + } + if (strings::safe_strto64(tf_env_var_val, value)) { + return Status::OK(); + } + return errors::InvalidArgument(strings::StrCat( + "Failed to parse the env-var ${", env_var_name, "} into int64: ", + tf_env_var_val, ". Use the default value: ", default_val)); +} + +} // namespace tensorflow diff --git a/tensorflow/core/util/env_var.h b/tensorflow/core/util/env_var.h new file mode 100644 index 00000000000..ec661f1d81b --- /dev/null +++ b/tensorflow/core/util/env_var.h @@ -0,0 +1,40 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_UTIL_ENV_VAR_H_ + +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { + +// Return a boolean into "value" from the environmental variable "env_var_name". +// If it is unset, the default value is used. +// A string "0" or a case insensitive "false" is interpreted as false. +// A string "1" or a case insensitive "true" is interpreted as true. +// Otherwise, an error status is returned. +Status ReadBoolFromEnvVar(StringPiece env_var_name, bool default_val, + bool* value); + +// Return an int64 into "value" from the environmental variable "env_var_name". +// If it is unset, the default value is used. +// If the string cannot be parsed into int64, an error status is returned. +Status ReadInt64FromEnvVar(StringPiece env_var_name, int64 default_val, + int64* value); + +} // namespace tensorflow + +#endif // TENSORFLOW_UTIL_ENV_VAR_H_ diff --git a/tensorflow/core/util/tensor_bundle/BUILD b/tensorflow/core/util/tensor_bundle/BUILD index 493de9721ce..d6db84277f7 100644 --- a/tensorflow/core/util/tensor_bundle/BUILD +++ b/tensorflow/core/util/tensor_bundle/BUILD @@ -15,6 +15,8 @@ load("//tensorflow:tensorflow.bzl", "tf_copts") filegroup( name = "android_srcs", srcs = [ + "naming.cc", + "naming.h", "tensor_bundle.cc", "tensor_bundle.h", ], @@ -26,6 +28,7 @@ cc_library( hdrs = ["tensor_bundle.h"], copts = tf_copts(), deps = [ + ":naming", "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", "//tensorflow/core:framework_headers_lib", @@ -37,6 +40,13 @@ cc_library( ], ) +cc_library( + name = "naming", + srcs = ["naming.cc"], + hdrs = ["naming.h"], + deps = ["//tensorflow/core:lib"], +) + cc_test( name = "tensor_bundle_test", srcs = ["tensor_bundle_test.cc"], diff --git a/tensorflow/core/util/tensor_bundle/naming.cc b/tensorflow/core/util/tensor_bundle/naming.cc new file mode 100644 index 00000000000..db3d7ec3acc --- /dev/null +++ b/tensorflow/core/util/tensor_bundle/naming.cc @@ -0,0 +1,36 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/util/tensor_bundle/naming.h" + +#include "tensorflow/core/lib/strings/stringprintf.h" +#include "tensorflow/core/platform/logging.h" + +namespace tensorflow { + +string MetaFilename(StringPiece prefix) { + return strings::Printf("%.*s.index", static_cast(prefix.size()), + prefix.data()); +} + +string DataFilename(StringPiece prefix, int32 shard_id, int32 num_shards) { + DCHECK_GT(num_shards, 0); + DCHECK_LT(shard_id, num_shards); + return strings::Printf("%.*s.data-%05d-of-%05d", + static_cast(prefix.size()), prefix.data(), + shard_id, num_shards); +} + +} // namespace tensorflow diff --git a/tensorflow/core/util/tensor_bundle/naming.h b/tensorflow/core/util/tensor_bundle/naming.h new file mode 100644 index 00000000000..3d21570c742 --- /dev/null +++ b/tensorflow/core/util/tensor_bundle/naming.h @@ -0,0 +1,46 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// A minimal library exposing the naming logic used in tensor_bundle. +// +// A tensor bundle contains a metadata file and sharded data files, which all +// share a common pathname prefix. +// +// Given the prefix, the actual pathnames of the files can be queried via: +// +// MetaFilename(prefix): pathname of the metadata file. +// DataFilename(prefix, shard_id, num_shards): pathname of a data file. +// +// Typical usage includes forming a filepattern to match files on disk: +// +// // To find the unique metadata file. +// const string metadata_file = MetaFilename("/fs/train/ckpt-step"); +// Env::Default()->GetMatchingFiles(metadata_file, &path); +// +// Regexp can also be used: e.g. R".data-\d{5}-of-\d{5}" for data files. + +#ifndef TENSORFLOW_UTIL_TENSOR_BUNDLE_NAMING_H_ +#define TENSORFLOW_UTIL_TENSOR_BUNDLE_NAMING_H_ + +#include "tensorflow/core/lib/core/stringpiece.h" + +namespace tensorflow { + +string MetaFilename(StringPiece prefix); +string DataFilename(StringPiece prefix, int32 shard_id, int32 num_shards); + +} // namespace tensorflow + +#endif // TENSORFLOW_UTIL_TENSOR_BUNDLE_NAMING_H_ diff --git a/tensorflow/core/util/tensor_bundle/tensor_bundle.cc b/tensorflow/core/util/tensor_bundle/tensor_bundle.cc index 27677b57476..61a69a3840f 100644 --- a/tensorflow/core/util/tensor_bundle/tensor_bundle.cc +++ b/tensorflow/core/util/tensor_bundle/tensor_bundle.cc @@ -239,19 +239,6 @@ bool IsFullSlice(const TensorSlice& slice_spec, } // namespace -string DataFilename(StringPiece prefix, int32 shard_id, int32 num_shards) { - DCHECK_GT(num_shards, 0); - DCHECK_LT(shard_id, num_shards); - return strings::Printf("%.*s.data-%05d-of-%05d", - static_cast(prefix.size()), prefix.data(), - shard_id, num_shards); -} - -string MetaFilename(StringPiece prefix) { - return strings::Printf("%.*s.index", static_cast(prefix.size()), - prefix.data()); -} - BundleWriter::BundleWriter(Env* env, StringPiece prefix) : env_(env), prefix_(prefix.ToString()), out_(nullptr), size_(0) { status_ = diff --git a/tensorflow/core/util/tensor_bundle/tensor_bundle.h b/tensorflow/core/util/tensor_bundle/tensor_bundle.h index b5ca97b5117..46f6749ed89 100644 --- a/tensorflow/core/util/tensor_bundle/tensor_bundle.h +++ b/tensorflow/core/util/tensor_bundle/tensor_bundle.h @@ -78,6 +78,7 @@ limitations under the License. #include "tensorflow/core/platform/file_system.h" #include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/tensor_bundle/naming.h" #include "tensorflow/core/util/tensor_slice_set.h" namespace tensorflow { @@ -309,11 +310,6 @@ class FileOutputBuffer { uint32 crc32c_ = 0; }; -// Pattern: ".data--of-". -string DataFilename(StringPiece prefix, int32 shard_id, int32 num_shards); -// Pattern: ".index." -string MetaFilename(StringPiece prefix); - } // namespace tensorflow #endif // TENSORFLOW_UTIL_TENSOR_BUNDLE_TENSOR_BUNDLE_H_ diff --git a/tensorflow/core/util/use_cudnn.cc b/tensorflow/core/util/use_cudnn.cc index 7e720fdc600..47f57c909db 100644 --- a/tensorflow/core/util/use_cudnn.cc +++ b/tensorflow/core/util/use_cudnn.cc @@ -15,35 +15,39 @@ limitations under the License. #include "tensorflow/core/util/use_cudnn.h" -#include - #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/core/util/env_var.h" namespace tensorflow { -static bool ReadBoolFromEnvVar(const char* env_var_name, bool default_val) { - const char* tf_env_var_val = getenv(env_var_name); - if (tf_env_var_val != nullptr) { - StringPiece tf_env_var_val_str(tf_env_var_val); - if (tf_env_var_val_str == "0") { - return false; - } - return true; +bool CanUseCudnn() { + bool value; + Status status = ReadBoolFromEnvVar("TF_USE_CUDNN", true, &value); + if (!status.ok()) { + LOG(ERROR) << status.error_message(); } - return default_val; + return value; } -bool CanUseCudnn() { return ReadBoolFromEnvVar("TF_USE_CUDNN", true); } - bool CudnnUseAutotune() { - return ReadBoolFromEnvVar("TF_CUDNN_USE_AUTOTUNE", true); + bool value; + Status status = ReadBoolFromEnvVar("TF_CUDNN_USE_AUTOTUNE", true, &value); + if (!status.ok()) { + LOG(ERROR) << status.error_message(); + } + return value; } namespace internal { bool AvgPoolUseCudnn() { - return ReadBoolFromEnvVar("TF_AVGPOOL_USE_CUDNN", false); + bool value; + Status status = ReadBoolFromEnvVar("TF_AVGPOOL_USE_CUDNN", false, &value); + if (!status.ok()) { + LOG(ERROR) << status.error_message(); + } + return value; } } // namespace internal diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowClassifier.java b/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowClassifier.java deleted file mode 100644 index 4a96df854ac..00000000000 --- a/tensorflow/examples/android/src/org/tensorflow/demo/TensorFlowClassifier.java +++ /dev/null @@ -1,84 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -package org.tensorflow.demo; - -import android.content.res.AssetManager; -import android.graphics.Bitmap; -import android.os.Trace; -import android.util.Log; - -import java.util.ArrayList; -import java.util.List; -import java.util.StringTokenizer; - -/** - * JNI wrapper class for the Tensorflow native code. - */ -public class TensorFlowClassifier implements Classifier { - private static final String TAG = "TensorflowClassifier"; - - // jni native methods. - public native int initializeTensorFlow( - AssetManager assetManager, - String model, - String labels, - int numClasses, - int inputSize, - int imageMean, - float imageStd, - String inputName, - String outputName); - - private native String classifyImageBmp(Bitmap bitmap); - - private native String classifyImageRgb(int[] output, int width, int height); - - static { - System.loadLibrary("tensorflow_demo"); - } - - @Override - public List recognizeImage(final Bitmap bitmap) { - // Log this method so that it can be analyzed with systrace. - Trace.beginSection("Recognize"); - final ArrayList recognitions = new ArrayList(); - for (final String result : classifyImageBmp(bitmap).split("\n")) { - Log.i(TAG, "Parsing [" + result + "]"); - - // Clean up the string as needed - final StringTokenizer st = new StringTokenizer(result); - if (!st.hasMoreTokens()) { - continue; - } - - final String id = st.nextToken(); - final String confidenceString = st.nextToken(); - final float confidence = Float.parseFloat(confidenceString); - - final String title = - result.substring(id.length() + confidenceString.length() + 2, result.length()); - - if (!title.isEmpty()) { - recognitions.add(new Recognition(id, title, confidence, null)); - } - } - Trace.endSection(); - return recognitions; - } - - @Override - public void close() {} -} diff --git a/tensorflow/examples/tutorials/mnist/BUILD b/tensorflow/examples/tutorials/mnist/BUILD index 60fd433a206..a8ec5d4c838 100644 --- a/tensorflow/examples/tutorials/mnist/BUILD +++ b/tensorflow/examples/tutorials/mnist/BUILD @@ -88,6 +88,7 @@ py_test( ], main = "fully_connected_feed.py", srcs_version = "PY2AND3", + tags = ["noasan"], # http://b/31795146 deps = [ ":input_data", ":mnist", @@ -108,7 +109,10 @@ py_test( ], main = "mnist_with_summaries.py", srcs_version = "PY2AND3", - tags = ["notsan"], # http://b/29184009 + tags = [ + "noasan", # http://b/31795146 + "notsan", # http://b/29184009 + ], deps = [ ":input_data", "//tensorflow:tensorflow_py", diff --git a/tensorflow/examples/tutorials/mnist/mnist_softmax.py b/tensorflow/examples/tutorials/mnist/mnist_softmax.py index 1791f97a06d..785ef5767df 100644 --- a/tensorflow/examples/tutorials/mnist/mnist_softmax.py +++ b/tensorflow/examples/tutorials/mnist/mnist_softmax.py @@ -46,7 +46,7 @@ def main(_): # The raw formulation of cross-entropy, # - # tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.softmax(y)), + # tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.nn.softmax(y)), # reduction_indices=[1])) # # can be numerically unstable. diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.bijector.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.bijector.md new file mode 100644 index 00000000000..226c06c0696 --- /dev/null +++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.bijector.md @@ -0,0 +1,1322 @@ + + +# Random variable transformations (contrib) +[TOC] + +Bijector Ops. + +An API for reversible (bijective) transformations of random variables. + +## Background + +Differentiable, bijective transformations of continuous random variables alter +the calculations made in the cumulative/probability distribution functions and +sample function. This module provides a standard interface for making these +manipulations. + +For more details and examples, see the `Bijector` docstring. + +To apply a `Bijector`, use `distributions.TransformedDistribution`. + +## Bijectors + +- - - + +### `class tf.contrib.distributions.bijector.Bijector` {#Bijector} + +Interface for transforming a `Distribution` via `TransformedDistribution`. + +A `Bijector` implements a bijective, differentiable function by transforming +an input `Tensor`. The output `Tensor` shape is constrained by the input +`sample`, `batch`, and `event` shape. A `Bijector` is characterized by three +operations: + +1. Forward Evaluation + + Useful for turning one random outcome into another random outcome from a + different distribution. + +2. Inverse Evaluation + + Useful for "reversing" a transformation to compute one probability in + terms of another. + +3. (log o det o Jacobian o inverse)(x) + + "The log of the determinant of the matrix of all first-order partial + derivatives of the inverse function." + Useful for inverting a transformation to compute one probability in terms + of another. Geometrically, the det(Jacobian) is the volume of the + transformation and is used to scale the probability. + +By convention, transformations of random variables are named in terms of the +forward transformation. The forward transformation creates samples, the +inverse is useful for computing probabilities. + +Example Use: + + - Basic properties: + + ```python + x = ... # A tensor. + # Evaluate forward transformation. + fwd_x = my_bijector.forward(x) + x == my_bijector.inverse(fwd_x) + x != my_bijector.forward(fwd_x) # Not equal because g(x) != g(g(x)). + ``` + + - Computing a log-likelihood: + + ```python + def transformed_log_pdf(bijector, log_pdf, x): + return (bijector.inverse_log_det_jacobian(x) + + log_pdf(bijector.inverse(x))) + ``` + + - Transforming a random outcome: + + ```python + def transformed_sample(bijector, x): + return bijector.forward(x) + ``` + +Example transformations: + + - "Exponential" + + ``` + Y = g(X) = exp(X) + X ~ Normal(0, 1) # Univariate. + ``` + + Implies: + + ``` + g^{-1}(Y) = log(Y) + |Jacobian(g^{-1})(y)| = 1 / y + Y ~ LogNormal(0, 1), i.e., + prob(Y=y) = |Jacobian(g^{-1})(y)| * prob(X=g^{-1}(y)) + = (1 / y) Normal(log(y); 0, 1) + ``` + + - "ScaleAndShift" + + ``` + Y = g(X) = sqrtSigma * X + mu + X ~ MultivariateNormal(0, I_d) + ``` + + Implies: + + ``` + g^{-1}(Y) = inv(sqrtSigma) * (Y - mu) + |Jacobian(g^{-1})(y)| = det(inv(sqrtSigma)) + Y ~ MultivariateNormal(mu, sqrtSigma) , i.e., + prob(Y=y) = |Jacobian(g^{-1})(y)| * prob(X=g^{-1}(y)) + = det(sqrtSigma)^(-d) * + MultivariateNormal(inv(sqrtSigma) * (y - mu); 0, I_d) + ``` + +Example of why a `Bijector` needs to understand sample, batch, event +partitioning: + +- Consider the `Exp` `Bijector` applied to a `Tensor` which has sample, batch, + and event (S, B, E) shape semantics. Suppose + the `Tensor`'s partitioned-shape is `(S=[4], B=[2], E=[3, 3])`. + + For `Exp`, the shape of the `Tensor` returned by `forward` and `inverse` is + unchanged, i.e., `[4, 2, 3, 3]`. However the shape returned by + `inverse_log_det_jacobian` is `[4, 2]` because the Jacobian is a reduction + over the event dimensions. + +Subclass Requirements: + +- Subclasses are expected to implement `_forward` and one or both of: + - `_inverse`, `_inverse_log_det_jacobian`, + - `_inverse_and_inverse_log_det_jacobian`. + +- If computation can be shared among `_inverse` and + `_inverse_log_det_jacobian` it is preferable to implement + `_inverse_and_inverse_log_det_jacobian`. This usually reduces + graph-construction overhead because a `Distribution`'s implementation of + `log_prob` will need to evaluate both the inverse Jacobian as well as the + inverse function. + +- If an additional use case needs just `inverse` or just + `inverse_log_det_jacobian` then he or she may also wish to implement these + functions to avoid computing the `inverse_log_det_jacobian` or the + `inverse`, respectively. +- - - + +#### `tf.contrib.distributions.bijector.Bijector.__init__(batch_ndims=None, event_ndims=None, parameters=None, is_constant_jacobian=False, validate_args=False, dtype=None, name=None)` {#Bijector.__init__} + +Constructs Bijector. + +A `Bijector` transforms random variables into new random variables. + +Examples: + +```python +# Create the Y = g(X) = X transform which operates on 4-Tensors of vectors. +identity = Identity(batch_ndims=4, event_ndims=1) + +# Create the Y = g(X) = exp(X) transform which operates on matrices. +exp = Exp(batch_ndims=0, event_ndims=2) +``` + +See `Bijector` subclass docstring for more details and specific examples. + +##### Args: + + +* `batch_ndims`: number of dimensions associated with batch coordinates. +* `event_ndims`: number of dimensions associated with event coordinates. +* `parameters`: Dictionary of parameters used by this `Bijector` +* `is_constant_jacobian`: `Boolean` indicating that the Jacobian is not a + function of the input. +* `validate_args`: `Boolean`, default `False`. Whether to validate input with + asserts. If `validate_args` is `False`, and the inputs are invalid, + correct behavior is not guaranteed. +* `dtype`: `tf.dtype` supported by this `Bijector`. `None` means dtype is not + enforced. +* `name`: The name to give Ops created by the initializer. + + +- - - + +#### `tf.contrib.distributions.bijector.Bijector.dtype` {#Bijector.dtype} + +dtype of `Tensor`s transformable by this distribution. + + +- - - + +#### `tf.contrib.distributions.bijector.Bijector.forward(x, name='forward')` {#Bijector.forward} + +Returns the forward `Bijector` evaluation, i.e., X = g(Y). + +##### Args: + + +* `x`: `Tensor`. The input to the "forward" evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if `_forward` is not implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Bijector.inverse(x, name='inverse')` {#Bijector.inverse} + +Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y). + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse` nor + `_inverse_and_inverse_log_det_jacobian` are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Bijector.inverse_and_inverse_log_det_jacobian(x, name='inverse_and_inverse_log_det_jacobian')` {#Bijector.inverse_and_inverse_log_det_jacobian} + +Returns both the inverse evaluation and inverse_log_det_jacobian. + +Enables possibly more efficient calculation when both inverse and +corresponding Jacobian are needed. + +See `inverse()`, `inverse_log_det_jacobian()` for more details. + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" Jacobian evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse_and_inverse_log_det_jacobian` + nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Bijector.inverse_log_det_jacobian(x, name='inverse_log_det_jacobian')` {#Bijector.inverse_log_det_jacobian} + +Returns the (log o det o Jacobian o inverse)(x). + +Mathematically, returns: log(det(dY/dX g^{-1}))(Y). + +Note that forward_log_det_jacobian is the negative of this function. (See +is_constant_jacobian for related proof.) + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" Jacobian evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse_log_det_jacobian` nor + `_inverse_and_inverse_log_det_jacobian` are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Bijector.is_constant_jacobian` {#Bijector.is_constant_jacobian} + +Returns true iff the Jacobian is not a function of x. + +Note: Jacobian is either constant for both forward and inverse or neither. + +##### Returns: + + `Boolean`. + + +- - - + +#### `tf.contrib.distributions.bijector.Bijector.name` {#Bijector.name} + +Returns the string name of this `Bijector`. + + +- - - + +#### `tf.contrib.distributions.bijector.Bijector.parameters` {#Bijector.parameters} + +Returns this `Bijector`'s parameters as a name/value dictionary. + + +- - - + +#### `tf.contrib.distributions.bijector.Bijector.shaper` {#Bijector.shaper} + +Returns shape object used to manage shape constraints. + + +- - - + +#### `tf.contrib.distributions.bijector.Bijector.validate_args` {#Bijector.validate_args} + +Returns True if Tensor arguments will be validated. + + + +- - - + +### `class tf.contrib.distributions.bijector.Identity` {#Identity} + +Bijector which computes Y = g(X) = X. + +Example Use: + +```python +# Create the Y=g(X)=X transform which is intended for Tensors with 1 batch +# ndim and 1 event ndim (i.e., vector of vectors). +identity = Identity(batch_ndims=1, event_ndims=1) +x = [[1., 2], + [3, 4]] +x == identity.forward(x) == identity.inverse(x) +``` +- - - + +#### `tf.contrib.distributions.bijector.Identity.__init__(validate_args=False, name='Identity')` {#Identity.__init__} + + + + +- - - + +#### `tf.contrib.distributions.bijector.Identity.dtype` {#Identity.dtype} + +dtype of `Tensor`s transformable by this distribution. + + +- - - + +#### `tf.contrib.distributions.bijector.Identity.forward(x, name='forward')` {#Identity.forward} + +Returns the forward `Bijector` evaluation, i.e., X = g(Y). + +##### Args: + + +* `x`: `Tensor`. The input to the "forward" evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if `_forward` is not implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Identity.inverse(x, name='inverse')` {#Identity.inverse} + +Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y). + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse` nor + `_inverse_and_inverse_log_det_jacobian` are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Identity.inverse_and_inverse_log_det_jacobian(x, name='inverse_and_inverse_log_det_jacobian')` {#Identity.inverse_and_inverse_log_det_jacobian} + +Returns both the inverse evaluation and inverse_log_det_jacobian. + +Enables possibly more efficient calculation when both inverse and +corresponding Jacobian are needed. + +See `inverse()`, `inverse_log_det_jacobian()` for more details. + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" Jacobian evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse_and_inverse_log_det_jacobian` + nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Identity.inverse_log_det_jacobian(x, name='inverse_log_det_jacobian')` {#Identity.inverse_log_det_jacobian} + +Returns the (log o det o Jacobian o inverse)(x). + +Mathematically, returns: log(det(dY/dX g^{-1}))(Y). + +Note that forward_log_det_jacobian is the negative of this function. (See +is_constant_jacobian for related proof.) + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" Jacobian evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse_log_det_jacobian` nor + `_inverse_and_inverse_log_det_jacobian` are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Identity.is_constant_jacobian` {#Identity.is_constant_jacobian} + +Returns true iff the Jacobian is not a function of x. + +Note: Jacobian is either constant for both forward and inverse or neither. + +##### Returns: + + `Boolean`. + + +- - - + +#### `tf.contrib.distributions.bijector.Identity.name` {#Identity.name} + +Returns the string name of this `Bijector`. + + +- - - + +#### `tf.contrib.distributions.bijector.Identity.parameters` {#Identity.parameters} + +Returns this `Bijector`'s parameters as a name/value dictionary. + + +- - - + +#### `tf.contrib.distributions.bijector.Identity.shaper` {#Identity.shaper} + +Returns shape object used to manage shape constraints. + + +- - - + +#### `tf.contrib.distributions.bijector.Identity.validate_args` {#Identity.validate_args} + +Returns True if Tensor arguments will be validated. + + + +- - - + +### `class tf.contrib.distributions.bijector.Inline` {#Inline} + +Bijector constructed from callables implementing forward, inverse, and inverse_log_det_jacobian. + +Example Use: + +```python +exp = Inline( + forward_fn=tf.exp, + inverse_fn=tf.log, + inverse_log_det_jacobian_fn=( + lambda y: -tf.reduce_sum(tf.log(y), reduction_indices=-1)), + name="Exp") +``` + +The above example is equivalent to the `Bijector` `Exp(event_ndims=1)`. +- - - + +#### `tf.contrib.distributions.bijector.Inline.__init__(forward_fn, inverse_fn, inverse_log_det_jacobian_fn, is_constant_jacobian=False, name='Inline')` {#Inline.__init__} + +Creates a `Bijector` from callables. + +##### Args: + + +* `forward_fn`: Python callable implementing the forward transformation. +* `inverse_fn`: Python callable implementing the inverse transformation. +* `inverse_log_det_jacobian_fn`: Python callable implementing the + inverse_log_det_jacobian transformation. +* `is_constant_jacobian`: `Boolean` indicating that the Jacobian is constant + for all input arguments. +* `name`: `String`, name given to ops managed by this object. + + +- - - + +#### `tf.contrib.distributions.bijector.Inline.dtype` {#Inline.dtype} + +dtype of `Tensor`s transformable by this distribution. + + +- - - + +#### `tf.contrib.distributions.bijector.Inline.forward(x, name='forward')` {#Inline.forward} + +Returns the forward `Bijector` evaluation, i.e., X = g(Y). + +##### Args: + + +* `x`: `Tensor`. The input to the "forward" evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if `_forward` is not implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Inline.inverse(x, name='inverse')` {#Inline.inverse} + +Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y). + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse` nor + `_inverse_and_inverse_log_det_jacobian` are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Inline.inverse_and_inverse_log_det_jacobian(x, name='inverse_and_inverse_log_det_jacobian')` {#Inline.inverse_and_inverse_log_det_jacobian} + +Returns both the inverse evaluation and inverse_log_det_jacobian. + +Enables possibly more efficient calculation when both inverse and +corresponding Jacobian are needed. + +See `inverse()`, `inverse_log_det_jacobian()` for more details. + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" Jacobian evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse_and_inverse_log_det_jacobian` + nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Inline.inverse_log_det_jacobian(x, name='inverse_log_det_jacobian')` {#Inline.inverse_log_det_jacobian} + +Returns the (log o det o Jacobian o inverse)(x). + +Mathematically, returns: log(det(dY/dX g^{-1}))(Y). + +Note that forward_log_det_jacobian is the negative of this function. (See +is_constant_jacobian for related proof.) + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" Jacobian evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse_log_det_jacobian` nor + `_inverse_and_inverse_log_det_jacobian` are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Inline.is_constant_jacobian` {#Inline.is_constant_jacobian} + +Returns true iff the Jacobian is not a function of x. + +Note: Jacobian is either constant for both forward and inverse or neither. + +##### Returns: + + `Boolean`. + + +- - - + +#### `tf.contrib.distributions.bijector.Inline.name` {#Inline.name} + +Returns the string name of this `Bijector`. + + +- - - + +#### `tf.contrib.distributions.bijector.Inline.parameters` {#Inline.parameters} + +Returns this `Bijector`'s parameters as a name/value dictionary. + + +- - - + +#### `tf.contrib.distributions.bijector.Inline.shaper` {#Inline.shaper} + +Returns shape object used to manage shape constraints. + + +- - - + +#### `tf.contrib.distributions.bijector.Inline.validate_args` {#Inline.validate_args} + +Returns True if Tensor arguments will be validated. + + + +- - - + +### `class tf.contrib.distributions.bijector.Exp` {#Exp} + +Bijector which computes Y = g(X) = exp(X). + +Example Use: + +```python +# Create the Y=g(X)=exp(X) transform which works only on Tensors with 1 +# batch ndim and 2 event ndims (i.e., vector of matrices). +exp = Exp(batch_ndims=1, event_ndims=2) +x = [[[1., 2], + [3, 4]], + [[5, 6], + [7, 8]]] +exp(x) == exp.forward(x) +log(x) == exp.inverse(x) +``` + +Note: the exp(.) is applied element-wise but the Jacobian is a reduction +over the event space. +- - - + +#### `tf.contrib.distributions.bijector.Exp.__init__(event_ndims=0, validate_args=False, name='Exp')` {#Exp.__init__} + + + + +- - - + +#### `tf.contrib.distributions.bijector.Exp.dtype` {#Exp.dtype} + +dtype of `Tensor`s transformable by this distribution. + + +- - - + +#### `tf.contrib.distributions.bijector.Exp.forward(x, name='forward')` {#Exp.forward} + +Returns the forward `Bijector` evaluation, i.e., X = g(Y). + +##### Args: + + +* `x`: `Tensor`. The input to the "forward" evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if `_forward` is not implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Exp.inverse(x, name='inverse')` {#Exp.inverse} + +Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y). + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse` nor + `_inverse_and_inverse_log_det_jacobian` are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Exp.inverse_and_inverse_log_det_jacobian(x, name='inverse_and_inverse_log_det_jacobian')` {#Exp.inverse_and_inverse_log_det_jacobian} + +Returns both the inverse evaluation and inverse_log_det_jacobian. + +Enables possibly more efficient calculation when both inverse and +corresponding Jacobian are needed. + +See `inverse()`, `inverse_log_det_jacobian()` for more details. + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" Jacobian evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse_and_inverse_log_det_jacobian` + nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Exp.inverse_log_det_jacobian(x, name='inverse_log_det_jacobian')` {#Exp.inverse_log_det_jacobian} + +Returns the (log o det o Jacobian o inverse)(x). + +Mathematically, returns: log(det(dY/dX g^{-1}))(Y). + +Note that forward_log_det_jacobian is the negative of this function. (See +is_constant_jacobian for related proof.) + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" Jacobian evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse_log_det_jacobian` nor + `_inverse_and_inverse_log_det_jacobian` are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Exp.is_constant_jacobian` {#Exp.is_constant_jacobian} + +Returns true iff the Jacobian is not a function of x. + +Note: Jacobian is either constant for both forward and inverse or neither. + +##### Returns: + + `Boolean`. + + +- - - + +#### `tf.contrib.distributions.bijector.Exp.name` {#Exp.name} + +Returns the string name of this `Bijector`. + + +- - - + +#### `tf.contrib.distributions.bijector.Exp.parameters` {#Exp.parameters} + +Returns this `Bijector`'s parameters as a name/value dictionary. + + +- - - + +#### `tf.contrib.distributions.bijector.Exp.shaper` {#Exp.shaper} + +Returns shape object used to manage shape constraints. + + +- - - + +#### `tf.contrib.distributions.bijector.Exp.validate_args` {#Exp.validate_args} + +Returns True if Tensor arguments will be validated. + + + +- - - + +### `class tf.contrib.distributions.bijector.ScaleAndShift` {#ScaleAndShift} + +Bijector which computes Y = g(X; loc, scale) = scale * X + loc. + +Example Use: + +```python +# No batch, scalar. +mu = 0 # shape=[] +sigma = 1 # shape=[] +b = ScaleAndShift(loc=mu, scale=sigma) +# b.shaper.batch_ndims == 0 +# b.shaper.event_ndims == 0 + +# One batch, scalar. +mu = ... # shape=[b], b>0 +sigma = ... # shape=[b], b>0 +b = ScaleAndShift(loc=mu, scale=sigma) +# b.shaper.batch_ndims == 1 +# b.shaper.event_ndims == 0 + +# No batch, multivariate. +mu = ... # shape=[d], d>0 +sigma = ... # shape=[d, d], d>0 +b = ScaleAndShift(loc=mu, scale=sigma, event_ndims=1) +# b.shaper.batch_ndims == 0 +# b.shaper.event_ndims == 1 + +# (B1*B2*...*Bb)-batch, multivariate. +mu = ... # shape=[B1,...,Bb, d], b>0, d>0 +sigma = ... # shape=[B1,...,Bb, d, d], b>0, d>0 +b = ScaleAndShift(loc=mu, scale=sigma, event_ndims=1) +# b.shaper.batch_ndims == b +# b.shaper.event_ndims == 1 + +# Mu is broadcast: +mu = 1 +sigma = [I, I] # I is a 3x3 identity matrix. +b = ScaleAndShift(loc=mu, scale=sigma, event_ndims=1) +x = numpy.ones(S + sigma.shape) +b.forward(x) # == x + 1 +``` +- - - + +#### `tf.contrib.distributions.bijector.ScaleAndShift.__init__(loc, scale, event_ndims=0, validate_args=False, name='ScaleAndShift')` {#ScaleAndShift.__init__} + + + + +- - - + +#### `tf.contrib.distributions.bijector.ScaleAndShift.dtype` {#ScaleAndShift.dtype} + +dtype of `Tensor`s transformable by this distribution. + + +- - - + +#### `tf.contrib.distributions.bijector.ScaleAndShift.forward(x, name='forward')` {#ScaleAndShift.forward} + +Returns the forward `Bijector` evaluation, i.e., X = g(Y). + +##### Args: + + +* `x`: `Tensor`. The input to the "forward" evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if `_forward` is not implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.ScaleAndShift.inverse(x, name='inverse')` {#ScaleAndShift.inverse} + +Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y). + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse` nor + `_inverse_and_inverse_log_det_jacobian` are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.ScaleAndShift.inverse_and_inverse_log_det_jacobian(x, name='inverse_and_inverse_log_det_jacobian')` {#ScaleAndShift.inverse_and_inverse_log_det_jacobian} + +Returns both the inverse evaluation and inverse_log_det_jacobian. + +Enables possibly more efficient calculation when both inverse and +corresponding Jacobian are needed. + +See `inverse()`, `inverse_log_det_jacobian()` for more details. + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" Jacobian evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse_and_inverse_log_det_jacobian` + nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.ScaleAndShift.inverse_log_det_jacobian(x, name='inverse_log_det_jacobian')` {#ScaleAndShift.inverse_log_det_jacobian} + +Returns the (log o det o Jacobian o inverse)(x). + +Mathematically, returns: log(det(dY/dX g^{-1}))(Y). + +Note that forward_log_det_jacobian is the negative of this function. (See +is_constant_jacobian for related proof.) + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" Jacobian evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse_log_det_jacobian` nor + `_inverse_and_inverse_log_det_jacobian` are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.ScaleAndShift.is_constant_jacobian` {#ScaleAndShift.is_constant_jacobian} + +Returns true iff the Jacobian is not a function of x. + +Note: Jacobian is either constant for both forward and inverse or neither. + +##### Returns: + + `Boolean`. + + +- - - + +#### `tf.contrib.distributions.bijector.ScaleAndShift.loc` {#ScaleAndShift.loc} + + + + +- - - + +#### `tf.contrib.distributions.bijector.ScaleAndShift.name` {#ScaleAndShift.name} + +Returns the string name of this `Bijector`. + + +- - - + +#### `tf.contrib.distributions.bijector.ScaleAndShift.parameters` {#ScaleAndShift.parameters} + +Returns this `Bijector`'s parameters as a name/value dictionary. + + +- - - + +#### `tf.contrib.distributions.bijector.ScaleAndShift.scale` {#ScaleAndShift.scale} + + + + +- - - + +#### `tf.contrib.distributions.bijector.ScaleAndShift.shaper` {#ScaleAndShift.shaper} + +Returns shape object used to manage shape constraints. + + +- - - + +#### `tf.contrib.distributions.bijector.ScaleAndShift.validate_args` {#ScaleAndShift.validate_args} + +Returns True if Tensor arguments will be validated. + + + + +## Other Functions and Classes +- - - + +### `class tf.contrib.distributions.bijector.Softplus` {#Softplus} + +Bijector which computes `Y = g(X) = Log[1 + exp(X)]`. + +The softplus `Bijector` has the following two useful properties: + +* The domain is the positive real numbers +* `softplus(x) approx x`, for large `x`, so it does not overflow as easily as + the `Exp` `Bijector`. + + Example Use: + + ```python + # Create the Y=g(X)=softplus(X) transform which works only on Tensors with 1 + # batch ndim and 2 event ndims (i.e., vector of matrices). + softplus = Softplus(batch_ndims=1, event_ndims=2) + x = [[[1., 2], + [3, 4]], + [[5, 6], + [7, 8]]] + log(1 + exp(x)) == softplus.forward(x) + log(exp(x) - 1) == softplus.inverse(x) + ``` + + Note: log(.) and exp(.) are applied element-wise but the Jacobian is a + reduction over the event space. +- - - + +#### `tf.contrib.distributions.bijector.Softplus.__init__(event_ndims=0, validate_args=False, name='Softplus')` {#Softplus.__init__} + + + + +- - - + +#### `tf.contrib.distributions.bijector.Softplus.dtype` {#Softplus.dtype} + +dtype of `Tensor`s transformable by this distribution. + + +- - - + +#### `tf.contrib.distributions.bijector.Softplus.forward(x, name='forward')` {#Softplus.forward} + +Returns the forward `Bijector` evaluation, i.e., X = g(Y). + +##### Args: + + +* `x`: `Tensor`. The input to the "forward" evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if `_forward` is not implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Softplus.inverse(x, name='inverse')` {#Softplus.inverse} + +Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y). + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse` nor + `_inverse_and_inverse_log_det_jacobian` are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Softplus.inverse_and_inverse_log_det_jacobian(x, name='inverse_and_inverse_log_det_jacobian')` {#Softplus.inverse_and_inverse_log_det_jacobian} + +Returns both the inverse evaluation and inverse_log_det_jacobian. + +Enables possibly more efficient calculation when both inverse and +corresponding Jacobian are needed. + +See `inverse()`, `inverse_log_det_jacobian()` for more details. + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" Jacobian evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse_and_inverse_log_det_jacobian` + nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Softplus.inverse_log_det_jacobian(x, name='inverse_log_det_jacobian')` {#Softplus.inverse_log_det_jacobian} + +Returns the (log o det o Jacobian o inverse)(x). + +Mathematically, returns: log(det(dY/dX g^{-1}))(Y). + +Note that forward_log_det_jacobian is the negative of this function. (See +is_constant_jacobian for related proof.) + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" Jacobian evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse_log_det_jacobian` nor + `_inverse_and_inverse_log_det_jacobian` are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Softplus.is_constant_jacobian` {#Softplus.is_constant_jacobian} + +Returns true iff the Jacobian is not a function of x. + +Note: Jacobian is either constant for both forward and inverse or neither. + +##### Returns: + + `Boolean`. + + +- - - + +#### `tf.contrib.distributions.bijector.Softplus.name` {#Softplus.name} + +Returns the string name of this `Bijector`. + + +- - - + +#### `tf.contrib.distributions.bijector.Softplus.parameters` {#Softplus.parameters} + +Returns this `Bijector`'s parameters as a name/value dictionary. + + +- - - + +#### `tf.contrib.distributions.bijector.Softplus.shaper` {#Softplus.shaper} + +Returns shape object used to manage shape constraints. + + +- - - + +#### `tf.contrib.distributions.bijector.Softplus.validate_args` {#Softplus.validate_args} + +Returns True if Tensor arguments will be validated. + + + diff --git a/tensorflow/g3doc/api_docs/python/contrib.distributions.md b/tensorflow/g3doc/api_docs/python/contrib.distributions.md index 0e8a2a798c0..7c059a7de41 100644 --- a/tensorflow/g3doc/api_docs/python/contrib.distributions.md +++ b/tensorflow/g3doc/api_docs/python/contrib.distributions.md @@ -727,10 +727,12 @@ Initialize a batch of Binomial distributions. * `logits`: Floating point tensor representing the log-odds of a positive event with shape broadcastable to `[N1,..., Nm]` `m >= 0`, and the same dtype as `n`. Each entry represents logits for the probability - of success for independent Binomial distributions. + of success for independent Binomial distributions. Only one of + `logits` or `p` should be passed in. * `p`: Positive floating point tensor with shape broadcastable to `[N1,..., Nm]` `m >= 0`, `p in [0, 1]`. Each entry represents the - probability of success for independent Binomial distributions. + probability of success for independent Binomial distributions. Only one + of `logits` or `p` should be passed in. * `validate_args`: `Boolean`, default `False`. Whether to assert valid values for parameters `n`, `p`, and `x` in `prob` and `log_prob`. If `False` and inputs are invalid, correct behavior is not guaranteed. @@ -1033,7 +1035,7 @@ survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`. #### `tf.contrib.distributions.Binomial.logits` {#Binomial.logits} -Log-odds. +Log-odds of success. - - - @@ -1321,10 +1323,11 @@ Construct Bernoulli distributions. * `logits`: An N-D `Tensor` representing the log-odds of a positive event. Each entry in the `Tensor` parametrizes an independent Bernoulli distribution where the probability of an event - is sigmoid(logits). + is sigmoid(logits). Only one of `logits` or `p` should be passed in. * `p`: An N-D `Tensor` representing the probability of a positive event. Each entry in the `Tensor` parameterizes an independent - Bernoulli distribution. + Bernoulli distribution. Only one of `logits` or `p` should be passed + in. * `dtype`: dtype for samples. * `validate_args`: `Boolean`, default `False`. Whether to validate that `0 <= p <= 1`. If `validate_args` is `False`, and the inputs are @@ -1609,7 +1612,7 @@ survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`. #### `tf.contrib.distributions.Bernoulli.logits` {#Bernoulli.logits} - +Log-odds of success. - - - @@ -1641,7 +1644,7 @@ Name prepended to all ops created by this `Distribution`. #### `tf.contrib.distributions.Bernoulli.p` {#Bernoulli.p} - +Probability of success. - - - @@ -2142,7 +2145,7 @@ survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`. #### `tf.contrib.distributions.BernoulliWithSigmoidP.logits` {#BernoulliWithSigmoidP.logits} - +Log-odds of success. - - - @@ -2174,7 +2177,7 @@ Name prepended to all ops created by this `Distribution`. #### `tf.contrib.distributions.BernoulliWithSigmoidP.p` {#BernoulliWithSigmoidP.p} - +Probability of success. - - - @@ -3596,9 +3599,45 @@ Categorical distribution. The categorical distribution is parameterized by the log-probabilities of a set of classes. + +#### Examples + +Creates a 3-class distiribution, with the 2nd class, the most likely to be +drawn from. + +```python +p = [0.1, 0.5, 0.4] +dist = Categorical(p=p) +``` + +Creates a 3-class distiribution, with the 2nd class the most likely to be +drawn from, using logits. + +```python +logits = [-50, 400, 40] +dist = Categorical(logits=logits) +``` + +Creates a 3-class distribution, with the 3rd class is most likely to be drawn. +The distribution functions can be evaluated on counts. + +```python +# counts is a scalar. +p = [0.1, 0.4, 0.5] +dist = Categorical(p=p) +dist.pmf(0) # Shape [] + +# p will be broadcast to [[0.1, 0.4, 0.5], [0.1, 0.4, 0.5]] to match counts. +counts = [1, 0] +dist.pmf(counts) # Shape [2] + +# p will be broadcast to shape [3, 5, 7, 3] to match counts. +counts = [[...]] # Shape [5, 7, 3] +dist.pmf(counts) # Shape [5, 7, 3] +``` - - - -#### `tf.contrib.distributions.Categorical.__init__(logits, dtype=tf.int32, validate_args=False, allow_nan_stats=True, name='Categorical')` {#Categorical.__init__} +#### `tf.contrib.distributions.Categorical.__init__(logits=None, p=None, dtype=tf.int32, validate_args=False, allow_nan_stats=True, name='Categorical')` {#Categorical.__init__} Initialize Categorical distributions using class log-probabilities. @@ -3608,7 +3647,13 @@ Initialize Categorical distributions using class log-probabilities. * `logits`: An N-D `Tensor`, `N >= 1`, representing the log probabilities of a set of Categorical distributions. The first `N - 1` dimensions index into a batch of independent distributions and the last dimension - indexes into the classes. + represents a vector of logits for each class. Only one of `logits` or + `p` should be passed in. +* `p`: An N-D `Tensor`, `N >= 1`, representing the probabilities + of a set of Categorical distributions. The first `N - 1` dimensions + index into a batch of independent distributions and the last dimension + represents a vector of probabilities for each class. Only one of + `logits` or `p` should be passed in. * `dtype`: The type of the event samples (default: int32). * `validate_args`: Unused in this distribution. * `allow_nan_stats`: `Boolean`, default `True`. If `False`, raise an @@ -3886,7 +3931,7 @@ survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`. #### `tf.contrib.distributions.Categorical.logits` {#Categorical.logits} - +Vector of coordinatewise logits. - - - @@ -3917,6 +3962,15 @@ Name prepended to all ops created by this `Distribution`. Scalar `int32` tensor: the number of classes. +- - - + +#### `tf.contrib.distributions.Categorical.p` {#Categorical.p} + +Vector of probabilities summing to one. + +Each element is the probability of drawing that coordinate. + + - - - #### `tf.contrib.distributions.Categorical.param_shapes(cls, sample_shape, name='DistributionParamShapes')` {#Categorical.param_shapes} @@ -17730,12 +17784,13 @@ Initialize a batch of Multinomial distributions. * `logits`: Floating point tensor representing the log-odds of a positive event with shape broadcastable to `[N1,..., Nm, k], m >= 0`, and the same dtype as `n`. Defines this as a batch of `N1 x ... x Nm` - different `k` class Multinomial distributions. + different `k` class Multinomial distributions. Only one of `logits` or + `p` should be passed in. * `p`: Positive floating point tensor with shape broadcastable to `[N1,..., Nm, k]` `m >= 0` and same dtype as `n`. Defines this as a batch of `N1 x ... x Nm` different `k` class Multinomial distributions. `p`'s components in the last portion of its shape should - sum up to 1. + sum up to 1. Only one of `logits` or `p` should be passed in. * `validate_args`: `Boolean`, default `False`. Whether to assert valid values for parameters `n` and `p`, and `x` in `prob` and `log_prob`. If `False`, correct behavior is not guaranteed. @@ -18041,7 +18096,7 @@ survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`. #### `tf.contrib.distributions.Multinomial.logits` {#Multinomial.logits} -Log-odds. +Vector of coordinatewise logits. - - - @@ -18076,7 +18131,9 @@ Name prepended to all ops created by this `Distribution`. #### `tf.contrib.distributions.Multinomial.p` {#Multinomial.p} -Event probabilities. +Vector of probabilities summing to one. + +Each element is the probability of drawing that coordinate. - - - @@ -19582,66 +19639,114 @@ Variance. A Transformed Distribution. -A Transformed Distribution models `p(y)` given a base distribution `p(x)`, -an invertible transform, `y = f(x)`, and the determinant of the Jacobian of -`f(x)`. +A Transformed Distribution models `p(y)` given a base distribution `p(x)`, and +a deterministic, invertible, differentiable transform, `Y = g(X)`. The +transform is typically an instance of the `Bijector` class and the base +distribution is typically an instance of the `Distribution` class. Shapes, type, and reparameterization are taken from the base distribution. -#### Mathematical details +Write `P(Y=y)` for cumulative density function of random variable (rv) `Y` and +`p` for its derivative wrt to `Y`. Assume that `Y=g(X)` where `g` is +continuous and `X=g^{-1}(Y)`. Write `J` for the Jacobian (of some function). -* `p(x)` - probability distribution for random variable X -* `p(y)` - probability distribution for random variable Y -* `f` - transform -* `g` - inverse transform, `g(f(x)) = x` -* `J(x)` - Jacobian of f(x) +A `TransformedDistribution` alters the input/outputs of a `Distribution` +associated with rv `X` in the following ways: -A Transformed Distribution exposes `sample` and `pdf`: + * `sample`: - * `sample`: `y = f(x)`, after drawing a sample of X. - * `pdf`: `p(y) = p(x) / det|J(x)| = p(g(y)) / det|J(g(y))|` + Mathematically: + + ``` + Y = g(X) + ``` + + Programmatically: + + ```python + return bijector.forward(distribution.sample(...)) + ``` + + * `log_prob`: + + Mathematically: + + ``` + (log o p o g^{-1})(y) + (log o det o J o g^{-1})(y) + ``` + + Programmatically: + + ```python + return (bijector.inverse_log_det_jacobian(x) + + distribution.log_prob(bijector.inverse(x)) + ``` + + * `log_cdf`: + + Mathematically: + + ``` + (log o P o g^{-1})(y) + ``` + + Programmatically: + + ```python + return distribution.log_prob(bijector.inverse(x)) + ``` + + * and similarly for: `cdf`, `prob`, `log_survival_function`, + `survival_function`. A simple example constructing a Log-Normal distribution from a Normal distribution: ```python -logit_normal = TransformedDistribution( - base_dist_cls=tf.contrib.distributions.Normal, - mu=mu, - sigma=sigma, - transform=lambda x: tf.sigmoid(x), - inverse=lambda y: tf.log(y) - tf.log(1. - y), - log_det_jacobian=(lambda x: - tf.reduce_sum(tf.log(tf.sigmoid(x)) + tf.log(1. - tf.sigmoid(x)), - reduction_indices=[-1]))) - name="LogitNormalTransformedDistribution" -) +ds = tf.contrib.distributions +log_normal = ds.TransformedDistribution( + base_distribution=ds.Normal(mu=mu, sigma=sigma), + bijector=ds.bijector.Exp(), + name="LogNormalTransformedDistribution") +``` + +A `LogNormal` made from callables: + +```python +ds = tf.contrib.distributions +log_normal = ds.TransformedDistribution( + base_distribution=ds.Normal(mu=mu, sigma=sigma), + bijector=ds.bijector.Inline( + forward_fn=tf.exp, + inverse_fn=tf.log, + inverse_log_det_jacobian_fn=( + lambda y: -tf.reduce_sum(tf.log(x), reduction_indices=-1)), + name="LogNormalTransformedDistribution") +``` + +Another example constructing a Normal from a StandardNormal: + +```python +ds = tf.contrib.distributions +normal = ds.TransformedDistribution( + base_distribution=ds.Normal(mu=0, sigma=1), + bijector=ds.bijector.ScaleAndShift(loc=mu, scale=sigma, event_ndims=0), + name="NormalTransformedDistribution") ``` - - - -#### `tf.contrib.distributions.TransformedDistribution.__init__(base_dist_cls, transform, inverse, log_det_jacobian, name='TransformedDistribution', **base_dist_args)` {#TransformedDistribution.__init__} +#### `tf.contrib.distributions.TransformedDistribution.__init__(base_distribution, bijector, name='TransformedDistribution')` {#TransformedDistribution.__init__} Construct a Transformed Distribution. ##### Args: -* `base_dist_cls`: the base distribution class to transform. Must be a - subclass of `Distribution`. -* `transform`: a callable that takes a `Tensor` sample from `base_dist` and - returns a `Tensor` of the same shape and type. `x => y`. -* `inverse`: a callable that computes the inverse of transform. `y => x`. If - None, users can only call `log_pdf` on values returned by `sample`. -* `log_det_jacobian`: a callable that takes a `Tensor` sample from `base_dist` - and returns the log of the determinant of the Jacobian of `transform`. +* `base_distribution`: The base distribution class to transform. Typically an + instance of `Distribution`. +* `bijector`: The object responsible for calculating the transformation. + Typically an instance of `Bijector`. * `name`: The name for the distribution. -* `**base_dist_args`: kwargs to pass on to dist_cls on construction. - -##### Raises: - - -* `TypeError`: if `base_dist_cls` is not a subclass of - `Distribution`. - - - @@ -19692,6 +19797,13 @@ independent distributions of this kind the instance represents. * `batch_shape`: `Tensor`. +- - - + +#### `tf.contrib.distributions.TransformedDistribution.bijector` {#TransformedDistribution.bijector} + +Function transforming x => y. + + - - - #### `tf.contrib.distributions.TransformedDistribution.cdf(value, name='cdf')` {#TransformedDistribution.cdf} @@ -19776,13 +19888,6 @@ Same meaning as `event_shape`. May be only partially defined. * `event_shape`: `TensorShape`, possibly unknown. -- - - - -#### `tf.contrib.distributions.TransformedDistribution.inverse` {#TransformedDistribution.inverse} - -Inverse function of transform, y => x. - - - - - #### `tf.contrib.distributions.TransformedDistribution.is_continuous` {#TransformedDistribution.is_continuous} @@ -19826,13 +19931,6 @@ a more accurate answer than simply taking the logarithm of the `cdf` when values of type `self.dtype`. -- - - - -#### `tf.contrib.distributions.TransformedDistribution.log_det_jacobian` {#TransformedDistribution.log_det_jacobian} - -Function computing the log determinant of the Jacobian of transform. - - - - - #### `tf.contrib.distributions.TransformedDistribution.log_pdf(value, name='log_pdf')` {#TransformedDistribution.log_pdf} @@ -19890,8 +19988,8 @@ Log probability density/mass function (depending on `is_continuous`). Additional documentation from `TransformedDistribution`: -Implements `(log o p o g)(y) - (log o det o J o g)(y)`, -where `g` is the inverse of `transform`. +Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`, +where `g^{-1}` is the inverse of `transform`. Also raises a `ValueError` if `inverse` was not provided to the distribution and `y` was not returned from `sample`. @@ -20065,8 +20163,8 @@ Probability density/mass function (depending on `is_continuous`). Additional documentation from `TransformedDistribution`: -Implements `p(g(y)) / det|J(g(y))|`, where `g` is the inverse of -`transform`. +Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the +inverse of `transform`. Also raises a `ValueError` if `inverse` was not provided to the distribution and `y` was not returned from `sample`. @@ -20116,7 +20214,7 @@ Generate `n` samples. Additional documentation from `TransformedDistribution`: Samples from the base distribution and then passes through -the transform. +the bijector's forward transform. ##### Args: @@ -20170,13 +20268,6 @@ survival_function(x) = P[X > x] `self.dtype`. -- - - - -#### `tf.contrib.distributions.TransformedDistribution.transform` {#TransformedDistribution.transform} - -Function transforming x => y. - - - - - #### `tf.contrib.distributions.TransformedDistribution.validate_args` {#TransformedDistribution.validate_args} diff --git a/tensorflow/g3doc/api_docs/python/contrib.learn.md b/tensorflow/g3doc/api_docs/python/contrib.learn.md index 1d647aea583..f05cdbab6cb 100644 --- a/tensorflow/g3doc/api_docs/python/contrib.learn.md +++ b/tensorflow/g3doc/api_docs/python/contrib.learn.md @@ -295,8 +295,15 @@ Constructs an Estimator instance. * `model_fn`: Model function, takes features and targets tensors or dicts of - tensors and returns predictions and loss tensors. - Supports next three signatures for the function: + tensors and returns tuple of: + + * predictions: `Tensor`, `SparseTensor` or dictionary of same. + Can also be any type that is convertible to a `Tensor` or + `SparseTensor`, or dictionary of same. + * loss: Scalar loss `Tensor`. + * train_op: Training update `Tensor` or `Operation`. + + Supports next three signatures for the function: * `(features, targets) -> (predictions, loss, train_op)` * `(features, targets, mode) -> (predictions, loss, train_op)` diff --git a/tensorflow/g3doc/api_docs/python/contrib.losses.md b/tensorflow/g3doc/api_docs/python/contrib.losses.md index 2398f5e8f1b..60a50a4b5e2 100644 --- a/tensorflow/g3doc/api_docs/python/contrib.losses.md +++ b/tensorflow/g3doc/api_docs/python/contrib.losses.md @@ -228,82 +228,84 @@ measurable element of `predictions` is scaled by the corresponding value of - - - -### `tf.contrib.losses.mean_pairwise_squared_error(*args, **kwargs)` {#mean_pairwise_squared_error} +### `tf.contrib.losses.mean_pairwise_squared_error(predictions, targets, weight=1.0, scope=None)` {#mean_pairwise_squared_error} -Adds a pairwise-errors-squared loss to the training procedure. (deprecated) +Adds a pairwise-errors-squared loss to the training procedure. -THIS FUNCTION IS DEPRECATED. It will be removed after 2016-10-01. -Instructions for updating: -Use mean_pairwise_squared_error. +Unlike `mean_squared_error`, which is a measure of the differences between +corresponding elements of `predictions` and `targets`, +`mean_pairwise_squared_error` is a measure of the differences between pairs of +corresponding elements of `predictions` and `targets`. - Unlike the sum_of_squares loss, which is a measure of the differences between - corresponding elements of `predictions` and `targets`, sum_of_pairwise_squares - is a measure of the differences between pairs of corresponding elements of - `predictions` and `targets`. +For example, if `targets`=[a, b, c] and `predictions`=[x, y, z], there are +three pairs of differences are summed to compute the loss: + loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3 - For example, if `targets`=[a, b, c] and `predictions`=[x, y, z], there are - three pairs of differences are summed to compute the loss: - loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3 +Note that since the inputs are of size [batch_size, d0, ... dN], the +corresponding pairs are computed within each batch sample but not across +samples within a batch. For example, if `predictions` represents a batch of +16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs +is drawn from each image, but not across images. - Note that since the inputs are of size [batch_size, d0, ... dN], the - corresponding pairs are computed within each batch sample but not across - samples within a batch. For example, if `predictions` represents a batch of - 16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs - is drawn from each image, but not across images. +`weight` acts as a coefficient for the loss. If a scalar is provided, then the +loss is simply scaled by the given value. If `weight` is a tensor of size +[batch_size], then the total loss for each sample of the batch is rescaled +by the corresponding element in the `weight` vector. - `weight` acts as a coefficient for the loss. If a scalar is provided, then the - loss is simply scaled by the given value. If `weight` is a tensor of size - [batch_size], then the total loss for each sample of the batch is rescaled - by the corresponding element in the `weight` vector. +##### Args: - Args: - predictions: The predicted outputs, a tensor of size [batch_size, d0, .. dN] - where N+1 is the total number of dimensions in `predictions`. - targets: The ground truth output tensor, whose shape must match the shape of - the `predictions` tensor. - weight: Coefficients for the loss a scalar, a tensor of shape [batch_size] - or a tensor whose shape matches `predictions`. - scope: The scope for the operations performed in computing the loss. - Returns: - A scalar `Tensor` representing the loss value. +* `predictions`: The predicted outputs, a tensor of size [batch_size, d0, .. dN] + where N+1 is the total number of dimensions in `predictions`. +* `targets`: The ground truth output tensor, whose shape must match the shape of + the `predictions` tensor. +* `weight`: Coefficients for the loss a scalar, a tensor of shape [batch_size] + or a tensor whose shape matches `predictions`. +* `scope`: The scope for the operations performed in computing the loss. - Raises: - ValueError: If the shape of `predictions` doesn't match that of `targets` or - if the shape of `weight` is invalid. +##### Returns: + + A scalar `Tensor` representing the loss value. + +##### Raises: + + +* `ValueError`: If the shape of `predictions` doesn't match that of `targets` or + if the shape of `weight` is invalid. - - - -### `tf.contrib.losses.mean_squared_error(*args, **kwargs)` {#mean_squared_error} +### `tf.contrib.losses.mean_squared_error(predictions, targets, weight=1.0, scope=None)` {#mean_squared_error} -Adds a Sum-of-Squares loss to the training procedure. (deprecated) +Adds a Sum-of-Squares loss to the training procedure. -THIS FUNCTION IS DEPRECATED. It will be removed after 2016-10-01. -Instructions for updating: -Use mean_squared_error. +`weight` acts as a coefficient for the loss. If a scalar is provided, then the +loss is simply scaled by the given value. If `weight` is a tensor of size +[batch_size], then the total loss for each sample of the batch is rescaled +by the corresponding element in the `weight` vector. If the shape of +`weight` matches the shape of `predictions`, then the loss of each +measurable element of `predictions` is scaled by the corresponding value of +`weight`. - `weight` acts as a coefficient for the loss. If a scalar is provided, then the - loss is simply scaled by the given value. If `weight` is a tensor of size - [batch_size], then the total loss for each sample of the batch is rescaled - by the corresponding element in the `weight` vector. If the shape of - `weight` matches the shape of `predictions`, then the loss of each - measurable element of `predictions` is scaled by the corresponding value of - `weight`. +##### Args: - Args: - predictions: The predicted outputs. - targets: The ground truth output tensor, same dimensions as 'predictions'. - weight: Coefficients for the loss a scalar, a tensor of shape - [batch_size] or a tensor whose shape matches `predictions`. - scope: The scope for the operations performed in computing the loss. - Returns: - A scalar `Tensor` representing the loss value. +* `predictions`: The predicted outputs. +* `targets`: The ground truth output tensor, same dimensions as 'predictions'. +* `weight`: Coefficients for the loss a scalar, a tensor of shape + [batch_size] or a tensor whose shape matches `predictions`. +* `scope`: The scope for the operations performed in computing the loss. - Raises: - ValueError: If the shape of `predictions` doesn't match that of `targets` or - if the shape of `weight` is invalid. +##### Returns: + + A scalar `Tensor` representing the loss value. + +##### Raises: + + +* `ValueError`: If the shape of `predictions` doesn't match that of `targets` or + if the shape of `weight` is invalid. - - - @@ -411,83 +413,3 @@ corresponding sample. if `weight` is None. -- - - - -### `tf.contrib.losses.sum_of_pairwise_squares(*args, **kwargs)` {#sum_of_pairwise_squares} - -Adds a pairwise-errors-squared loss to the training procedure. (deprecated) - -THIS FUNCTION IS DEPRECATED. It will be removed after 2016-10-01. -Instructions for updating: -Use mean_pairwise_squared_error. - - Unlike the sum_of_squares loss, which is a measure of the differences between - corresponding elements of `predictions` and `targets`, sum_of_pairwise_squares - is a measure of the differences between pairs of corresponding elements of - `predictions` and `targets`. - - For example, if `targets`=[a, b, c] and `predictions`=[x, y, z], there are - three pairs of differences are summed to compute the loss: - loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3 - - Note that since the inputs are of size [batch_size, d0, ... dN], the - corresponding pairs are computed within each batch sample but not across - samples within a batch. For example, if `predictions` represents a batch of - 16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs - is drawn from each image, but not across images. - - `weight` acts as a coefficient for the loss. If a scalar is provided, then the - loss is simply scaled by the given value. If `weight` is a tensor of size - [batch_size], then the total loss for each sample of the batch is rescaled - by the corresponding element in the `weight` vector. - - Args: - predictions: The predicted outputs, a tensor of size [batch_size, d0, .. dN] - where N+1 is the total number of dimensions in `predictions`. - targets: The ground truth output tensor, whose shape must match the shape of - the `predictions` tensor. - weight: Coefficients for the loss a scalar, a tensor of shape [batch_size] - or a tensor whose shape matches `predictions`. - scope: The scope for the operations performed in computing the loss. - - Returns: - A scalar `Tensor` representing the loss value. - - Raises: - ValueError: If the shape of `predictions` doesn't match that of `targets` or - if the shape of `weight` is invalid. - - -- - - - -### `tf.contrib.losses.sum_of_squares(*args, **kwargs)` {#sum_of_squares} - -Adds a Sum-of-Squares loss to the training procedure. (deprecated) - -THIS FUNCTION IS DEPRECATED. It will be removed after 2016-10-01. -Instructions for updating: -Use mean_squared_error. - - `weight` acts as a coefficient for the loss. If a scalar is provided, then the - loss is simply scaled by the given value. If `weight` is a tensor of size - [batch_size], then the total loss for each sample of the batch is rescaled - by the corresponding element in the `weight` vector. If the shape of - `weight` matches the shape of `predictions`, then the loss of each - measurable element of `predictions` is scaled by the corresponding value of - `weight`. - - Args: - predictions: The predicted outputs. - targets: The ground truth output tensor, same dimensions as 'predictions'. - weight: Coefficients for the loss a scalar, a tensor of shape - [batch_size] or a tensor whose shape matches `predictions`. - scope: The scope for the operations performed in computing the loss. - - Returns: - A scalar `Tensor` representing the loss value. - - Raises: - ValueError: If the shape of `predictions` doesn't match that of `targets` or - if the shape of `weight` is invalid. - - diff --git a/tensorflow/g3doc/api_docs/python/contrib.metrics.md b/tensorflow/g3doc/api_docs/python/contrib.metrics.md index 468d4f96cd4..d3fc05c3a20 100644 --- a/tensorflow/g3doc/api_docs/python/contrib.metrics.md +++ b/tensorflow/g3doc/api_docs/python/contrib.metrics.md @@ -355,7 +355,11 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. ### `tf.contrib.metrics.streaming_recall_at_k(*args, **kwargs)` {#streaming_recall_at_k} -Computes the recall@k of the predictions with respect to dense labels. (deprecated arguments) +Computes the recall@k of the predictions with respect to dense labels. (deprecated arguments) (deprecated) + +THIS FUNCTION IS DEPRECATED. It will be removed after 2016-11-08. +Instructions for updating: +Please use `streaming_sparse_recall_at_k`, and reshape labels from [batch_size] to [batch_size, 1]. SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19. Instructions for updating: @@ -964,7 +968,7 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of target classes for the associated prediction. Commonly, N=1 and `labels` has shape [batch_size, num_labels]. [D1, ... DN] must match - `predictions_idx`. Values should be in range [0, num_classes], where + `predictions`. Values should be in range [0, num_classes], where num_classes is the last dimension of `predictions`. * `k`: Integer, k for @k metric. This will calculate an average precision for range `[1,k]`, as documented above. @@ -1031,7 +1035,7 @@ Instructions for updating: [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of target classes for the associated prediction. Commonly, N=1 and `labels` has shape [batch_size, num_labels]. [D1, ... DN] must match - `predictions_idx`. Values should be in range [0, num_classes], where + `predictions`. Values should be in range [0, num_classes], where num_classes is the last dimension of `predictions`. k: Integer, k for @k metric. class_id: Integer class ID for which we want binary metrics. This should be @@ -1104,7 +1108,7 @@ Instructions for updating: labels: `int64` `Tensor` or `SparseTensor` with shape [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of target classes for the associated prediction. Commonly, N=1 and `labels` - has shape [batch_size, num_labels]. [D1, ... DN] must match `labels`. + has shape [batch_size, num_labels]. [D1, ... DN] must match `predictions`. Values should be in range [0, num_classes], where num_classes is the last dimension of `predictions`. k: Integer, k for @k metric. diff --git a/tensorflow/g3doc/api_docs/python/contrib.training.md b/tensorflow/g3doc/api_docs/python/contrib.training.md index 8b08e2b8166..9f311c5f632 100644 --- a/tensorflow/g3doc/api_docs/python/contrib.training.md +++ b/tensorflow/g3doc/api_docs/python/contrib.training.md @@ -724,10 +724,12 @@ It should be run in a separate thread via e.g. a `QueueRunner`. ## Online data resampling To resample data with replacement on a per-example basis, use -['resample_at_rate'](#resample_at_rate), providing the desired rate -for each example. If you wish to specify relative rates, rather than -absolute ones, use ['weighted_resample'](#weighted_resample) (which -also returns the actual resampling rate used for each output example). +['rejection_sample'](#rejection_sample) or +['resample_at_rate'](#resample_at_rate). For `rejection_sample`, provide +a boolean Tensor describing whether to accept or reject. For `resample_at_rate`, +providing the desired rate for each example. If you wish to specify relative +rates, rather than absolute ones, use ['weighted_resample'](#weighted_resample) +(which also returns the actual resampling rate used for each output example). Use ['stratified_sample'](#stratified_sample) or ['stratified_sample_unknown_dist'](#stratified_sample_unknown_dist) to @@ -737,6 +739,66 @@ have a binary classification dataset that is 99.9% class 1, a common approach is to resample from the data so that the data is more balanced. +- - - + +### `tf.contrib.training.rejection_sample(tensors, accept_prob_fn, batch_size, queue_threads=1, enqueue_many=False, prebatch_capacity=16, prebatch_threads=1, runtime_checks=False, name=None)` {#rejection_sample} + +Stochastically creates batches by rejection sampling. + +Each list of non-batched tensors is evaluated by `accept_prob_fn`, to produce +a scalar tensor between 0 and 1. This tensor corresponds to the probability of +being accepted. When `batch_size` tensor groups have been accepted, the batch +queue will return a mini-batch. + +##### Args: + + +* `tensors`: List of tensors for data. All tensors are either one item or a + batch, according to enqueue_many. +* `accept_prob_fn`: A python lambda that takes a non-batch tensor from each + item in `tensors`, and produces a scalar tensor. +* `batch_size`: Size of batch to be returned. +* `queue_threads`: The number of threads for the queue that will hold the final + batch. +* `enqueue_many`: Bool. If true, interpret input tensors as having a batch + dimension. +* `prebatch_capacity`: Capacity for the large queue that is used to convert + batched tensors to single examples. +* `prebatch_threads`: Number of threads for the large queue that is used to + convert batched tensors to single examples. +* `runtime_checks`: Bool. If true, insert runtime checks on the output of + `accept_prob_fn`. Using `True` might have a performance impact. +* `name`: Optional prefix for ops created by this function. + +##### Raises: + + +* `ValueError`: enqueue_many is True and labels doesn't have a batch + dimension, or if enqueue_many is False and labels isn't a scalar. +* `ValueError`: enqueue_many is True, and batch dimension on data and labels + don't match. +* `ValueError`: if a zero initial probability class has a nonzero target + probability. + +##### Returns: + + A list of tensors of the same length as `tensors`, with batch dimension + `batch_size`. + +##### Example: + + # Get tensor for a single data and label example. + data, label = data_provider.Get(['data', 'label']) + + # Get stratified batch according to data tensor. + accept_prob_fn = lambda x: (tf.tanh(x[0]) + 1) / 2 + data_batch = tf.contrib.training.rejection_sample( + [data, label], accept_prob_fn, 16) + + # Run batch through network. + ... + + - - - ### `tf.contrib.training.resample_at_rate(inputs, rates, scope=None, seed=None, back_prop=False)` {#resample_at_rate} diff --git a/tensorflow/g3doc/api_docs/python/control_flow_ops.md b/tensorflow/g3doc/api_docs/python/control_flow_ops.md index dd7e4158566..70c27127e6c 100644 --- a/tensorflow/g3doc/api_docs/python/control_flow_ops.md +++ b/tensorflow/g3doc/api_docs/python/control_flow_ops.md @@ -205,9 +205,9 @@ creates the tensors to be returned if the boolean evaluates to True. in `pred_fn_pairs` as well as `default` should return the same number and types of tensors. -If `exclusive==True`, all predicates are evaluated, and a logging operation -with an error is returned if more than one of the predicates evaluates to -True. If `exclusive==False`, execution stops are the first predicate which +If `exclusive==True`, all predicates are evaluated, and an exception is +thrown if more than one of the predicates evaluates to `True`. +If `exclusive==False`, execution stops are the first predicate which evaluates to True, and the tensors generated by the corresponding function are returned immediately. If none of the predicates evaluate to True, this operation returns the tensors generated by `default`. @@ -253,7 +253,7 @@ Example 2: * `pred_fn_pairs`: Dict or list of pairs of a boolean scalar tensor and a callable which returns a list of tensors. * `default`: A callable that returns a list of tensors. -* `exclusive`: True iff more than one predicate is allowed to evaluate to True. +* `exclusive`: True iff at most one predicate is allowed to evaluate to `True`. * `name`: A name for this operation (optional). ##### Returns: diff --git a/tensorflow/g3doc/api_docs/python/framework.md b/tensorflow/g3doc/api_docs/python/framework.md index eacd295ef38..24ef7787597 100644 --- a/tensorflow/g3doc/api_docs/python/framework.md +++ b/tensorflow/g3doc/api_docs/python/framework.md @@ -1410,6 +1410,9 @@ if tf.constant(5) < tf.constant(7): # Will raise. # ... ``` +This disallows ambiguities between testing the Python value vs testing the +dynamic condition of the `Tensor`. + ##### Raises: `TypeError`. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.case.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.case.md index 9314837b8ee..02bae13a15e 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.case.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.case.md @@ -9,9 +9,9 @@ creates the tensors to be returned if the boolean evaluates to True. in `pred_fn_pairs` as well as `default` should return the same number and types of tensors. -If `exclusive==True`, all predicates are evaluated, and a logging operation -with an error is returned if more than one of the predicates evaluates to -True. If `exclusive==False`, execution stops are the first predicate which +If `exclusive==True`, all predicates are evaluated, and an exception is +thrown if more than one of the predicates evaluates to `True`. +If `exclusive==False`, execution stops are the first predicate which evaluates to True, and the tensors generated by the corresponding function are returned immediately. If none of the predicates evaluate to True, this operation returns the tensors generated by `default`. @@ -57,7 +57,7 @@ Example 2: * `pred_fn_pairs`: Dict or list of pairs of a boolean scalar tensor and a callable which returns a list of tensors. * `default`: A callable that returns a list of tensors. -* `exclusive`: True iff more than one predicate is allowed to evaluate to True. +* `exclusive`: True iff at most one predicate is allowed to evaluate to `True`. * `name`: A name for this operation (optional). ##### Returns: diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Bernoulli.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Bernoulli.md index e4ce4b16dd5..e9b11e4b4ee 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Bernoulli.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard0/tf.contrib.distributions.Bernoulli.md @@ -14,10 +14,11 @@ Construct Bernoulli distributions. * `logits`: An N-D `Tensor` representing the log-odds of a positive event. Each entry in the `Tensor` parametrizes an independent Bernoulli distribution where the probability of an event - is sigmoid(logits). + is sigmoid(logits). Only one of `logits` or `p` should be passed in. * `p`: An N-D `Tensor` representing the probability of a positive event. Each entry in the `Tensor` parameterizes an independent - Bernoulli distribution. + Bernoulli distribution. Only one of `logits` or `p` should be passed + in. * `dtype`: dtype for samples. * `validate_args`: `Boolean`, default `False`. Whether to validate that `0 <= p <= 1`. If `validate_args` is `False`, and the inputs are @@ -302,7 +303,7 @@ survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`. #### `tf.contrib.distributions.Bernoulli.logits` {#Bernoulli.logits} - +Log-odds of success. - - - @@ -334,7 +335,7 @@ Name prepended to all ops created by this `Distribution`. #### `tf.contrib.distributions.Bernoulli.p` {#Bernoulli.p} - +Probability of success. - - - diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.Tensor.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.Tensor.md index 621e994691a..7360430d36b 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.Tensor.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.Tensor.md @@ -289,6 +289,9 @@ if tf.constant(5) < tf.constant(7): # Will raise. # ... ``` +This disallows ambiguities between testing the Python value vs testing the +dynamic condition of the `Tensor`. + ##### Raises: `TypeError`. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.constant_initializer.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.constant_initializer.md index 4b141b4fca9..dc761426fc9 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.constant_initializer.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.constant_initializer.md @@ -72,5 +72,5 @@ tensor shape, the initializer will raise a `ValueError`. * `ValueError`: Too many elements provided. Needed at most 6, but received 8 - ``` +``` diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md index e9000f01015..2ac3000749c 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.distributions.TransformedDistribution.md @@ -1,65 +1,113 @@ A Transformed Distribution. -A Transformed Distribution models `p(y)` given a base distribution `p(x)`, -an invertible transform, `y = f(x)`, and the determinant of the Jacobian of -`f(x)`. +A Transformed Distribution models `p(y)` given a base distribution `p(x)`, and +a deterministic, invertible, differentiable transform, `Y = g(X)`. The +transform is typically an instance of the `Bijector` class and the base +distribution is typically an instance of the `Distribution` class. Shapes, type, and reparameterization are taken from the base distribution. -#### Mathematical details +Write `P(Y=y)` for cumulative density function of random variable (rv) `Y` and +`p` for its derivative wrt to `Y`. Assume that `Y=g(X)` where `g` is +continuous and `X=g^{-1}(Y)`. Write `J` for the Jacobian (of some function). -* `p(x)` - probability distribution for random variable X -* `p(y)` - probability distribution for random variable Y -* `f` - transform -* `g` - inverse transform, `g(f(x)) = x` -* `J(x)` - Jacobian of f(x) +A `TransformedDistribution` alters the input/outputs of a `Distribution` +associated with rv `X` in the following ways: -A Transformed Distribution exposes `sample` and `pdf`: + * `sample`: - * `sample`: `y = f(x)`, after drawing a sample of X. - * `pdf`: `p(y) = p(x) / det|J(x)| = p(g(y)) / det|J(g(y))|` + Mathematically: + + ``` + Y = g(X) + ``` + + Programmatically: + + ```python + return bijector.forward(distribution.sample(...)) + ``` + + * `log_prob`: + + Mathematically: + + ``` + (log o p o g^{-1})(y) + (log o det o J o g^{-1})(y) + ``` + + Programmatically: + + ```python + return (bijector.inverse_log_det_jacobian(x) + + distribution.log_prob(bijector.inverse(x)) + ``` + + * `log_cdf`: + + Mathematically: + + ``` + (log o P o g^{-1})(y) + ``` + + Programmatically: + + ```python + return distribution.log_prob(bijector.inverse(x)) + ``` + + * and similarly for: `cdf`, `prob`, `log_survival_function`, + `survival_function`. A simple example constructing a Log-Normal distribution from a Normal distribution: ```python -logit_normal = TransformedDistribution( - base_dist_cls=tf.contrib.distributions.Normal, - mu=mu, - sigma=sigma, - transform=lambda x: tf.sigmoid(x), - inverse=lambda y: tf.log(y) - tf.log(1. - y), - log_det_jacobian=(lambda x: - tf.reduce_sum(tf.log(tf.sigmoid(x)) + tf.log(1. - tf.sigmoid(x)), - reduction_indices=[-1]))) - name="LogitNormalTransformedDistribution" -) +ds = tf.contrib.distributions +log_normal = ds.TransformedDistribution( + base_distribution=ds.Normal(mu=mu, sigma=sigma), + bijector=ds.bijector.Exp(), + name="LogNormalTransformedDistribution") +``` + +A `LogNormal` made from callables: + +```python +ds = tf.contrib.distributions +log_normal = ds.TransformedDistribution( + base_distribution=ds.Normal(mu=mu, sigma=sigma), + bijector=ds.bijector.Inline( + forward_fn=tf.exp, + inverse_fn=tf.log, + inverse_log_det_jacobian_fn=( + lambda y: -tf.reduce_sum(tf.log(x), reduction_indices=-1)), + name="LogNormalTransformedDistribution") +``` + +Another example constructing a Normal from a StandardNormal: + +```python +ds = tf.contrib.distributions +normal = ds.TransformedDistribution( + base_distribution=ds.Normal(mu=0, sigma=1), + bijector=ds.bijector.ScaleAndShift(loc=mu, scale=sigma, event_ndims=0), + name="NormalTransformedDistribution") ``` - - - -#### `tf.contrib.distributions.TransformedDistribution.__init__(base_dist_cls, transform, inverse, log_det_jacobian, name='TransformedDistribution', **base_dist_args)` {#TransformedDistribution.__init__} +#### `tf.contrib.distributions.TransformedDistribution.__init__(base_distribution, bijector, name='TransformedDistribution')` {#TransformedDistribution.__init__} Construct a Transformed Distribution. ##### Args: -* `base_dist_cls`: the base distribution class to transform. Must be a - subclass of `Distribution`. -* `transform`: a callable that takes a `Tensor` sample from `base_dist` and - returns a `Tensor` of the same shape and type. `x => y`. -* `inverse`: a callable that computes the inverse of transform. `y => x`. If - None, users can only call `log_pdf` on values returned by `sample`. -* `log_det_jacobian`: a callable that takes a `Tensor` sample from `base_dist` - and returns the log of the determinant of the Jacobian of `transform`. +* `base_distribution`: The base distribution class to transform. Typically an + instance of `Distribution`. +* `bijector`: The object responsible for calculating the transformation. + Typically an instance of `Bijector`. * `name`: The name for the distribution. -* `**base_dist_args`: kwargs to pass on to dist_cls on construction. - -##### Raises: - - -* `TypeError`: if `base_dist_cls` is not a subclass of - `Distribution`. - - - @@ -110,6 +158,13 @@ independent distributions of this kind the instance represents. * `batch_shape`: `Tensor`. +- - - + +#### `tf.contrib.distributions.TransformedDistribution.bijector` {#TransformedDistribution.bijector} + +Function transforming x => y. + + - - - #### `tf.contrib.distributions.TransformedDistribution.cdf(value, name='cdf')` {#TransformedDistribution.cdf} @@ -194,13 +249,6 @@ Same meaning as `event_shape`. May be only partially defined. * `event_shape`: `TensorShape`, possibly unknown. -- - - - -#### `tf.contrib.distributions.TransformedDistribution.inverse` {#TransformedDistribution.inverse} - -Inverse function of transform, y => x. - - - - - #### `tf.contrib.distributions.TransformedDistribution.is_continuous` {#TransformedDistribution.is_continuous} @@ -244,13 +292,6 @@ a more accurate answer than simply taking the logarithm of the `cdf` when values of type `self.dtype`. -- - - - -#### `tf.contrib.distributions.TransformedDistribution.log_det_jacobian` {#TransformedDistribution.log_det_jacobian} - -Function computing the log determinant of the Jacobian of transform. - - - - - #### `tf.contrib.distributions.TransformedDistribution.log_pdf(value, name='log_pdf')` {#TransformedDistribution.log_pdf} @@ -308,8 +349,8 @@ Log probability density/mass function (depending on `is_continuous`). Additional documentation from `TransformedDistribution`: -Implements `(log o p o g)(y) - (log o det o J o g)(y)`, -where `g` is the inverse of `transform`. +Implements `(log o p o g^{-1})(y) + (log o det o J o g^{-1})(y)`, +where `g^{-1}` is the inverse of `transform`. Also raises a `ValueError` if `inverse` was not provided to the distribution and `y` was not returned from `sample`. @@ -483,8 +524,8 @@ Probability density/mass function (depending on `is_continuous`). Additional documentation from `TransformedDistribution`: -Implements `p(g(y)) / det|J(g(y))|`, where `g` is the inverse of -`transform`. +Implements `p(g^{-1}(y)) det|J(g^{-1}(y))|`, where `g^{-1}` is the +inverse of `transform`. Also raises a `ValueError` if `inverse` was not provided to the distribution and `y` was not returned from `sample`. @@ -534,7 +575,7 @@ Generate `n` samples. Additional documentation from `TransformedDistribution`: Samples from the base distribution and then passes through -the transform. +the bijector's forward transform. ##### Args: @@ -588,13 +629,6 @@ survival_function(x) = P[X > x] `self.dtype`. -- - - - -#### `tf.contrib.distributions.TransformedDistribution.transform` {#TransformedDistribution.transform} - -Function transforming x => y. - - - - - #### `tf.contrib.distributions.TransformedDistribution.validate_args` {#TransformedDistribution.validate_args} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.losses.sum_of_pairwise_squares.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.losses.sum_of_pairwise_squares.md deleted file mode 100644 index 13326b052a6..00000000000 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.losses.sum_of_pairwise_squares.md +++ /dev/null @@ -1,44 +0,0 @@ -### `tf.contrib.losses.sum_of_pairwise_squares(*args, **kwargs)` {#sum_of_pairwise_squares} - -Adds a pairwise-errors-squared loss to the training procedure. (deprecated) - -THIS FUNCTION IS DEPRECATED. It will be removed after 2016-10-01. -Instructions for updating: -Use mean_pairwise_squared_error. - - Unlike the sum_of_squares loss, which is a measure of the differences between - corresponding elements of `predictions` and `targets`, sum_of_pairwise_squares - is a measure of the differences between pairs of corresponding elements of - `predictions` and `targets`. - - For example, if `targets`=[a, b, c] and `predictions`=[x, y, z], there are - three pairs of differences are summed to compute the loss: - loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3 - - Note that since the inputs are of size [batch_size, d0, ... dN], the - corresponding pairs are computed within each batch sample but not across - samples within a batch. For example, if `predictions` represents a batch of - 16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs - is drawn from each image, but not across images. - - `weight` acts as a coefficient for the loss. If a scalar is provided, then the - loss is simply scaled by the given value. If `weight` is a tensor of size - [batch_size], then the total loss for each sample of the batch is rescaled - by the corresponding element in the `weight` vector. - - Args: - predictions: The predicted outputs, a tensor of size [batch_size, d0, .. dN] - where N+1 is the total number of dimensions in `predictions`. - targets: The ground truth output tensor, whose shape must match the shape of - the `predictions` tensor. - weight: Coefficients for the loss a scalar, a tensor of shape [batch_size] - or a tensor whose shape matches `predictions`. - scope: The scope for the operations performed in computing the loss. - - Returns: - A scalar `Tensor` representing the loss value. - - Raises: - ValueError: If the shape of `predictions` doesn't match that of `targets` or - if the shape of `weight` is invalid. - diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.metrics.streaming_sparse_recall_at_k.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.metrics.streaming_sparse_recall_at_k.md index ed0094534ce..1b19ba9c7b7 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.metrics.streaming_sparse_recall_at_k.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard1/tf.contrib.metrics.streaming_sparse_recall_at_k.md @@ -39,7 +39,7 @@ Instructions for updating: labels: `int64` `Tensor` or `SparseTensor` with shape [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of target classes for the associated prediction. Commonly, N=1 and `labels` - has shape [batch_size, num_labels]. [D1, ... DN] must match `labels`. + has shape [batch_size, num_labels]. [D1, ... DN] must match `predictions`. Values should be in range [0, num_classes], where num_classes is the last dimension of `predictions`. k: Integer, k for @k metric. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Categorical.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Categorical.md index acecb8e52cb..7d3f2a3a252 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Categorical.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.Categorical.md @@ -2,9 +2,45 @@ Categorical distribution. The categorical distribution is parameterized by the log-probabilities of a set of classes. + +#### Examples + +Creates a 3-class distiribution, with the 2nd class, the most likely to be +drawn from. + +```python +p = [0.1, 0.5, 0.4] +dist = Categorical(p=p) +``` + +Creates a 3-class distiribution, with the 2nd class the most likely to be +drawn from, using logits. + +```python +logits = [-50, 400, 40] +dist = Categorical(logits=logits) +``` + +Creates a 3-class distribution, with the 3rd class is most likely to be drawn. +The distribution functions can be evaluated on counts. + +```python +# counts is a scalar. +p = [0.1, 0.4, 0.5] +dist = Categorical(p=p) +dist.pmf(0) # Shape [] + +# p will be broadcast to [[0.1, 0.4, 0.5], [0.1, 0.4, 0.5]] to match counts. +counts = [1, 0] +dist.pmf(counts) # Shape [2] + +# p will be broadcast to shape [3, 5, 7, 3] to match counts. +counts = [[...]] # Shape [5, 7, 3] +dist.pmf(counts) # Shape [5, 7, 3] +``` - - - -#### `tf.contrib.distributions.Categorical.__init__(logits, dtype=tf.int32, validate_args=False, allow_nan_stats=True, name='Categorical')` {#Categorical.__init__} +#### `tf.contrib.distributions.Categorical.__init__(logits=None, p=None, dtype=tf.int32, validate_args=False, allow_nan_stats=True, name='Categorical')` {#Categorical.__init__} Initialize Categorical distributions using class log-probabilities. @@ -14,7 +50,13 @@ Initialize Categorical distributions using class log-probabilities. * `logits`: An N-D `Tensor`, `N >= 1`, representing the log probabilities of a set of Categorical distributions. The first `N - 1` dimensions index into a batch of independent distributions and the last dimension - indexes into the classes. + represents a vector of logits for each class. Only one of `logits` or + `p` should be passed in. +* `p`: An N-D `Tensor`, `N >= 1`, representing the probabilities + of a set of Categorical distributions. The first `N - 1` dimensions + index into a batch of independent distributions and the last dimension + represents a vector of probabilities for each class. Only one of + `logits` or `p` should be passed in. * `dtype`: The type of the event samples (default: int32). * `validate_args`: Unused in this distribution. * `allow_nan_stats`: `Boolean`, default `True`. If `False`, raise an @@ -292,7 +334,7 @@ survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`. #### `tf.contrib.distributions.Categorical.logits` {#Categorical.logits} - +Vector of coordinatewise logits. - - - @@ -323,6 +365,15 @@ Name prepended to all ops created by this `Distribution`. Scalar `int32` tensor: the number of classes. +- - - + +#### `tf.contrib.distributions.Categorical.p` {#Categorical.p} + +Vector of probabilities summing to one. + +Each element is the probability of drawing that coordinate. + + - - - #### `tf.contrib.distributions.Categorical.param_shapes(cls, sample_shape, name='DistributionParamShapes')` {#Categorical.param_shapes} diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.bijector.Bijector.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.bijector.Bijector.md new file mode 100644 index 00000000000..be9565eb653 --- /dev/null +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.distributions.bijector.Bijector.md @@ -0,0 +1,315 @@ +Interface for transforming a `Distribution` via `TransformedDistribution`. + +A `Bijector` implements a bijective, differentiable function by transforming +an input `Tensor`. The output `Tensor` shape is constrained by the input +`sample`, `batch`, and `event` shape. A `Bijector` is characterized by three +operations: + +1. Forward Evaluation + + Useful for turning one random outcome into another random outcome from a + different distribution. + +2. Inverse Evaluation + + Useful for "reversing" a transformation to compute one probability in + terms of another. + +3. (log o det o Jacobian o inverse)(x) + + "The log of the determinant of the matrix of all first-order partial + derivatives of the inverse function." + Useful for inverting a transformation to compute one probability in terms + of another. Geometrically, the det(Jacobian) is the volume of the + transformation and is used to scale the probability. + +By convention, transformations of random variables are named in terms of the +forward transformation. The forward transformation creates samples, the +inverse is useful for computing probabilities. + +Example Use: + + - Basic properties: + + ```python + x = ... # A tensor. + # Evaluate forward transformation. + fwd_x = my_bijector.forward(x) + x == my_bijector.inverse(fwd_x) + x != my_bijector.forward(fwd_x) # Not equal because g(x) != g(g(x)). + ``` + + - Computing a log-likelihood: + + ```python + def transformed_log_pdf(bijector, log_pdf, x): + return (bijector.inverse_log_det_jacobian(x) + + log_pdf(bijector.inverse(x))) + ``` + + - Transforming a random outcome: + + ```python + def transformed_sample(bijector, x): + return bijector.forward(x) + ``` + +Example transformations: + + - "Exponential" + + ``` + Y = g(X) = exp(X) + X ~ Normal(0, 1) # Univariate. + ``` + + Implies: + + ``` + g^{-1}(Y) = log(Y) + |Jacobian(g^{-1})(y)| = 1 / y + Y ~ LogNormal(0, 1), i.e., + prob(Y=y) = |Jacobian(g^{-1})(y)| * prob(X=g^{-1}(y)) + = (1 / y) Normal(log(y); 0, 1) + ``` + + - "ScaleAndShift" + + ``` + Y = g(X) = sqrtSigma * X + mu + X ~ MultivariateNormal(0, I_d) + ``` + + Implies: + + ``` + g^{-1}(Y) = inv(sqrtSigma) * (Y - mu) + |Jacobian(g^{-1})(y)| = det(inv(sqrtSigma)) + Y ~ MultivariateNormal(mu, sqrtSigma) , i.e., + prob(Y=y) = |Jacobian(g^{-1})(y)| * prob(X=g^{-1}(y)) + = det(sqrtSigma)^(-d) * + MultivariateNormal(inv(sqrtSigma) * (y - mu); 0, I_d) + ``` + +Example of why a `Bijector` needs to understand sample, batch, event +partitioning: + +- Consider the `Exp` `Bijector` applied to a `Tensor` which has sample, batch, + and event (S, B, E) shape semantics. Suppose + the `Tensor`'s partitioned-shape is `(S=[4], B=[2], E=[3, 3])`. + + For `Exp`, the shape of the `Tensor` returned by `forward` and `inverse` is + unchanged, i.e., `[4, 2, 3, 3]`. However the shape returned by + `inverse_log_det_jacobian` is `[4, 2]` because the Jacobian is a reduction + over the event dimensions. + +Subclass Requirements: + +- Subclasses are expected to implement `_forward` and one or both of: + - `_inverse`, `_inverse_log_det_jacobian`, + - `_inverse_and_inverse_log_det_jacobian`. + +- If computation can be shared among `_inverse` and + `_inverse_log_det_jacobian` it is preferable to implement + `_inverse_and_inverse_log_det_jacobian`. This usually reduces + graph-construction overhead because a `Distribution`'s implementation of + `log_prob` will need to evaluate both the inverse Jacobian as well as the + inverse function. + +- If an additional use case needs just `inverse` or just + `inverse_log_det_jacobian` then he or she may also wish to implement these + functions to avoid computing the `inverse_log_det_jacobian` or the + `inverse`, respectively. +- - - + +#### `tf.contrib.distributions.bijector.Bijector.__init__(batch_ndims=None, event_ndims=None, parameters=None, is_constant_jacobian=False, validate_args=False, dtype=None, name=None)` {#Bijector.__init__} + +Constructs Bijector. + +A `Bijector` transforms random variables into new random variables. + +Examples: + +```python +# Create the Y = g(X) = X transform which operates on 4-Tensors of vectors. +identity = Identity(batch_ndims=4, event_ndims=1) + +# Create the Y = g(X) = exp(X) transform which operates on matrices. +exp = Exp(batch_ndims=0, event_ndims=2) +``` + +See `Bijector` subclass docstring for more details and specific examples. + +##### Args: + + +* `batch_ndims`: number of dimensions associated with batch coordinates. +* `event_ndims`: number of dimensions associated with event coordinates. +* `parameters`: Dictionary of parameters used by this `Bijector` +* `is_constant_jacobian`: `Boolean` indicating that the Jacobian is not a + function of the input. +* `validate_args`: `Boolean`, default `False`. Whether to validate input with + asserts. If `validate_args` is `False`, and the inputs are invalid, + correct behavior is not guaranteed. +* `dtype`: `tf.dtype` supported by this `Bijector`. `None` means dtype is not + enforced. +* `name`: The name to give Ops created by the initializer. + + +- - - + +#### `tf.contrib.distributions.bijector.Bijector.dtype` {#Bijector.dtype} + +dtype of `Tensor`s transformable by this distribution. + + +- - - + +#### `tf.contrib.distributions.bijector.Bijector.forward(x, name='forward')` {#Bijector.forward} + +Returns the forward `Bijector` evaluation, i.e., X = g(Y). + +##### Args: + + +* `x`: `Tensor`. The input to the "forward" evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if `_forward` is not implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Bijector.inverse(x, name='inverse')` {#Bijector.inverse} + +Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y). + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse` nor + `_inverse_and_inverse_log_det_jacobian` are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Bijector.inverse_and_inverse_log_det_jacobian(x, name='inverse_and_inverse_log_det_jacobian')` {#Bijector.inverse_and_inverse_log_det_jacobian} + +Returns both the inverse evaluation and inverse_log_det_jacobian. + +Enables possibly more efficient calculation when both inverse and +corresponding Jacobian are needed. + +See `inverse()`, `inverse_log_det_jacobian()` for more details. + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" Jacobian evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse_and_inverse_log_det_jacobian` + nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Bijector.inverse_log_det_jacobian(x, name='inverse_log_det_jacobian')` {#Bijector.inverse_log_det_jacobian} + +Returns the (log o det o Jacobian o inverse)(x). + +Mathematically, returns: log(det(dY/dX g^{-1}))(Y). + +Note that forward_log_det_jacobian is the negative of this function. (See +is_constant_jacobian for related proof.) + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" Jacobian evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse_log_det_jacobian` nor + `_inverse_and_inverse_log_det_jacobian` are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Bijector.is_constant_jacobian` {#Bijector.is_constant_jacobian} + +Returns true iff the Jacobian is not a function of x. + +Note: Jacobian is either constant for both forward and inverse or neither. + +##### Returns: + + `Boolean`. + + +- - - + +#### `tf.contrib.distributions.bijector.Bijector.name` {#Bijector.name} + +Returns the string name of this `Bijector`. + + +- - - + +#### `tf.contrib.distributions.bijector.Bijector.parameters` {#Bijector.parameters} + +Returns this `Bijector`'s parameters as a name/value dictionary. + + +- - - + +#### `tf.contrib.distributions.bijector.Bijector.shaper` {#Bijector.shaper} + +Returns shape object used to manage shape constraints. + + +- - - + +#### `tf.contrib.distributions.bijector.Bijector.validate_args` {#Bijector.validate_args} + +Returns True if Tensor arguments will be validated. + + diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.metrics.streaming_sparse_precision_at_k.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.metrics.streaming_sparse_precision_at_k.md index 51bc30bc01e..3c02fd755f2 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.metrics.streaming_sparse_precision_at_k.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.contrib.metrics.streaming_sparse_precision_at_k.md @@ -42,7 +42,7 @@ Instructions for updating: [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of target classes for the associated prediction. Commonly, N=1 and `labels` has shape [batch_size, num_labels]. [D1, ... DN] must match - `predictions_idx`. Values should be in range [0, num_classes], where + `predictions`. Values should be in range [0, num_classes], where num_classes is the last dimension of `predictions`. k: Integer, k for @k metric. class_id: Integer class ID for which we want binary metrics. This should be diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.summary.tensor_summary.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.summary.tensor_summary.md index 61f16181b53..3fb19c26013 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.summary.tensor_summary.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard2/tf.summary.tensor_summary.md @@ -1,4 +1,4 @@ -### `tf.summary.tensor_summary(display_name, tensor, description='', labels=None, collections=None, name=None)` {#tensor_summary} +### `tf.summary.tensor_summary(name, tensor, summary_description=None, collections=None)` {#tensor_summary} Outputs a `Summary` protocol buffer with a serialized tensor.proto. @@ -9,19 +9,12 @@ has one summary value containing the input tensor. ##### Args: -* `display_name`: A name to associate with the data series. Will be used to - organize output data and as a name in visualizers. +* `name`: A name for the generated node. Will also serve as the series name in + TensorBoard. * `tensor`: A tensor of any type and shape to serialize. -* `description`: An optional long description of the data being output. -* `labels`: a list of strings used to specify how the data can be interpreted, - for example: - * `'encoding:image/jpg'` for a string tensor containing jpg images - * `'encoding:proto/X/Y/foo.proto'` for a string tensor containing Foos - * `'group:$groupName/$roleInGroup'` for a tensor that is related to - other tensors that are all in a group. (e.g. bounding boxes and images) +* `summary_description`: Optional summary_pb2.SummaryDescription() * `collections`: Optional list of graph collections keys. The new summary op is added to these collections. Defaults to `[GraphKeys.SUMMARIES]`. -* `name`: An optional name for the generated node (optional). ##### Returns: diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Binomial.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Binomial.md index 27414dd7308..10897cfe667 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Binomial.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Binomial.md @@ -65,10 +65,12 @@ Initialize a batch of Binomial distributions. * `logits`: Floating point tensor representing the log-odds of a positive event with shape broadcastable to `[N1,..., Nm]` `m >= 0`, and the same dtype as `n`. Each entry represents logits for the probability - of success for independent Binomial distributions. + of success for independent Binomial distributions. Only one of + `logits` or `p` should be passed in. * `p`: Positive floating point tensor with shape broadcastable to `[N1,..., Nm]` `m >= 0`, `p in [0, 1]`. Each entry represents the - probability of success for independent Binomial distributions. + probability of success for independent Binomial distributions. Only one + of `logits` or `p` should be passed in. * `validate_args`: `Boolean`, default `False`. Whether to assert valid values for parameters `n`, `p`, and `x` in `prob` and `log_prob`. If `False` and inputs are invalid, correct behavior is not guaranteed. @@ -371,7 +373,7 @@ survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`. #### `tf.contrib.distributions.Binomial.logits` {#Binomial.logits} -Log-odds. +Log-odds of success. - - - diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Multinomial.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Multinomial.md index bfc40da6ceb..15e6b46e834 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Multinomial.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.Multinomial.md @@ -73,12 +73,13 @@ Initialize a batch of Multinomial distributions. * `logits`: Floating point tensor representing the log-odds of a positive event with shape broadcastable to `[N1,..., Nm, k], m >= 0`, and the same dtype as `n`. Defines this as a batch of `N1 x ... x Nm` - different `k` class Multinomial distributions. + different `k` class Multinomial distributions. Only one of `logits` or + `p` should be passed in. * `p`: Positive floating point tensor with shape broadcastable to `[N1,..., Nm, k]` `m >= 0` and same dtype as `n`. Defines this as a batch of `N1 x ... x Nm` different `k` class Multinomial distributions. `p`'s components in the last portion of its shape should - sum up to 1. + sum up to 1. Only one of `logits` or `p` should be passed in. * `validate_args`: `Boolean`, default `False`. Whether to assert valid values for parameters `n` and `p`, and `x` in `prob` and `log_prob`. If `False`, correct behavior is not guaranteed. @@ -384,7 +385,7 @@ survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`. #### `tf.contrib.distributions.Multinomial.logits` {#Multinomial.logits} -Log-odds. +Vector of coordinatewise logits. - - - @@ -419,7 +420,9 @@ Name prepended to all ops created by this `Distribution`. #### `tf.contrib.distributions.Multinomial.p` {#Multinomial.p} -Event probabilities. +Vector of probabilities summing to one. + +Each element is the probability of drawing that coordinate. - - - diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.bijector.ScaleAndShift.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.bijector.ScaleAndShift.md new file mode 100644 index 00000000000..7f1246b9646 --- /dev/null +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.distributions.bijector.ScaleAndShift.md @@ -0,0 +1,218 @@ +Bijector which computes Y = g(X; loc, scale) = scale * X + loc. + +Example Use: + +```python +# No batch, scalar. +mu = 0 # shape=[] +sigma = 1 # shape=[] +b = ScaleAndShift(loc=mu, scale=sigma) +# b.shaper.batch_ndims == 0 +# b.shaper.event_ndims == 0 + +# One batch, scalar. +mu = ... # shape=[b], b>0 +sigma = ... # shape=[b], b>0 +b = ScaleAndShift(loc=mu, scale=sigma) +# b.shaper.batch_ndims == 1 +# b.shaper.event_ndims == 0 + +# No batch, multivariate. +mu = ... # shape=[d], d>0 +sigma = ... # shape=[d, d], d>0 +b = ScaleAndShift(loc=mu, scale=sigma, event_ndims=1) +# b.shaper.batch_ndims == 0 +# b.shaper.event_ndims == 1 + +# (B1*B2*...*Bb)-batch, multivariate. +mu = ... # shape=[B1,...,Bb, d], b>0, d>0 +sigma = ... # shape=[B1,...,Bb, d, d], b>0, d>0 +b = ScaleAndShift(loc=mu, scale=sigma, event_ndims=1) +# b.shaper.batch_ndims == b +# b.shaper.event_ndims == 1 + +# Mu is broadcast: +mu = 1 +sigma = [I, I] # I is a 3x3 identity matrix. +b = ScaleAndShift(loc=mu, scale=sigma, event_ndims=1) +x = numpy.ones(S + sigma.shape) +b.forward(x) # == x + 1 +``` +- - - + +#### `tf.contrib.distributions.bijector.ScaleAndShift.__init__(loc, scale, event_ndims=0, validate_args=False, name='ScaleAndShift')` {#ScaleAndShift.__init__} + + + + +- - - + +#### `tf.contrib.distributions.bijector.ScaleAndShift.dtype` {#ScaleAndShift.dtype} + +dtype of `Tensor`s transformable by this distribution. + + +- - - + +#### `tf.contrib.distributions.bijector.ScaleAndShift.forward(x, name='forward')` {#ScaleAndShift.forward} + +Returns the forward `Bijector` evaluation, i.e., X = g(Y). + +##### Args: + + +* `x`: `Tensor`. The input to the "forward" evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if `_forward` is not implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.ScaleAndShift.inverse(x, name='inverse')` {#ScaleAndShift.inverse} + +Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y). + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse` nor + `_inverse_and_inverse_log_det_jacobian` are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.ScaleAndShift.inverse_and_inverse_log_det_jacobian(x, name='inverse_and_inverse_log_det_jacobian')` {#ScaleAndShift.inverse_and_inverse_log_det_jacobian} + +Returns both the inverse evaluation and inverse_log_det_jacobian. + +Enables possibly more efficient calculation when both inverse and +corresponding Jacobian are needed. + +See `inverse()`, `inverse_log_det_jacobian()` for more details. + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" Jacobian evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse_and_inverse_log_det_jacobian` + nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.ScaleAndShift.inverse_log_det_jacobian(x, name='inverse_log_det_jacobian')` {#ScaleAndShift.inverse_log_det_jacobian} + +Returns the (log o det o Jacobian o inverse)(x). + +Mathematically, returns: log(det(dY/dX g^{-1}))(Y). + +Note that forward_log_det_jacobian is the negative of this function. (See +is_constant_jacobian for related proof.) + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" Jacobian evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse_log_det_jacobian` nor + `_inverse_and_inverse_log_det_jacobian` are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.ScaleAndShift.is_constant_jacobian` {#ScaleAndShift.is_constant_jacobian} + +Returns true iff the Jacobian is not a function of x. + +Note: Jacobian is either constant for both forward and inverse or neither. + +##### Returns: + + `Boolean`. + + +- - - + +#### `tf.contrib.distributions.bijector.ScaleAndShift.loc` {#ScaleAndShift.loc} + + + + +- - - + +#### `tf.contrib.distributions.bijector.ScaleAndShift.name` {#ScaleAndShift.name} + +Returns the string name of this `Bijector`. + + +- - - + +#### `tf.contrib.distributions.bijector.ScaleAndShift.parameters` {#ScaleAndShift.parameters} + +Returns this `Bijector`'s parameters as a name/value dictionary. + + +- - - + +#### `tf.contrib.distributions.bijector.ScaleAndShift.scale` {#ScaleAndShift.scale} + + + + +- - - + +#### `tf.contrib.distributions.bijector.ScaleAndShift.shaper` {#ScaleAndShift.shaper} + +Returns shape object used to manage shape constraints. + + +- - - + +#### `tf.contrib.distributions.bijector.ScaleAndShift.validate_args` {#ScaleAndShift.validate_args} + +Returns True if Tensor arguments will be validated. + + diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.learn.Estimator.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.learn.Estimator.md index 6d698594f57..99899f1421c 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.learn.Estimator.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.learn.Estimator.md @@ -9,8 +9,15 @@ Constructs an Estimator instance. * `model_fn`: Model function, takes features and targets tensors or dicts of - tensors and returns predictions and loss tensors. - Supports next three signatures for the function: + tensors and returns tuple of: + + * predictions: `Tensor`, `SparseTensor` or dictionary of same. + Can also be any type that is convertible to a `Tensor` or + `SparseTensor`, or dictionary of same. + * loss: Scalar loss `Tensor`. + * train_op: Training update `Tensor` or `Operation`. + + Supports next three signatures for the function: * `(features, targets) -> (predictions, loss, train_op)` * `(features, targets, mode) -> (predictions, loss, train_op)` diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.training.rejection_sample.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.training.rejection_sample.md new file mode 100644 index 00000000000..fe3c9866e8d --- /dev/null +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard3/tf.contrib.training.rejection_sample.md @@ -0,0 +1,57 @@ +### `tf.contrib.training.rejection_sample(tensors, accept_prob_fn, batch_size, queue_threads=1, enqueue_many=False, prebatch_capacity=16, prebatch_threads=1, runtime_checks=False, name=None)` {#rejection_sample} + +Stochastically creates batches by rejection sampling. + +Each list of non-batched tensors is evaluated by `accept_prob_fn`, to produce +a scalar tensor between 0 and 1. This tensor corresponds to the probability of +being accepted. When `batch_size` tensor groups have been accepted, the batch +queue will return a mini-batch. + +##### Args: + + +* `tensors`: List of tensors for data. All tensors are either one item or a + batch, according to enqueue_many. +* `accept_prob_fn`: A python lambda that takes a non-batch tensor from each + item in `tensors`, and produces a scalar tensor. +* `batch_size`: Size of batch to be returned. +* `queue_threads`: The number of threads for the queue that will hold the final + batch. +* `enqueue_many`: Bool. If true, interpret input tensors as having a batch + dimension. +* `prebatch_capacity`: Capacity for the large queue that is used to convert + batched tensors to single examples. +* `prebatch_threads`: Number of threads for the large queue that is used to + convert batched tensors to single examples. +* `runtime_checks`: Bool. If true, insert runtime checks on the output of + `accept_prob_fn`. Using `True` might have a performance impact. +* `name`: Optional prefix for ops created by this function. + +##### Raises: + + +* `ValueError`: enqueue_many is True and labels doesn't have a batch + dimension, or if enqueue_many is False and labels isn't a scalar. +* `ValueError`: enqueue_many is True, and batch dimension on data and labels + don't match. +* `ValueError`: if a zero initial probability class has a nonzero target + probability. + +##### Returns: + + A list of tensors of the same length as `tensors`, with batch dimension + `batch_size`. + +##### Example: + + # Get tensor for a single data and label example. + data, label = data_provider.Get(['data', 'label']) + + # Get stratified batch according to data tensor. + accept_prob_fn = lambda x: (tf.tanh(x[0]) + 1) / 2 + data_batch = tf.contrib.training.rejection_sample( + [data, label], accept_prob_fn, 16) + + # Run batch through network. + ... + diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.BernoulliWithSigmoidP.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.BernoulliWithSigmoidP.md index 02dd663694b..97a2f4d2b86 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.BernoulliWithSigmoidP.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.BernoulliWithSigmoidP.md @@ -274,7 +274,7 @@ survival function, which are more accurate than `1 - cdf(x)` when `x >> 1`. #### `tf.contrib.distributions.BernoulliWithSigmoidP.logits` {#BernoulliWithSigmoidP.logits} - +Log-odds of success. - - - @@ -306,7 +306,7 @@ Name prepended to all ops created by this `Distribution`. #### `tf.contrib.distributions.BernoulliWithSigmoidP.p` {#BernoulliWithSigmoidP.p} - +Probability of success. - - - diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.bijector.Softplus.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.bijector.Softplus.md new file mode 100644 index 00000000000..16313d2e851 --- /dev/null +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.distributions.bijector.Softplus.md @@ -0,0 +1,188 @@ +Bijector which computes `Y = g(X) = Log[1 + exp(X)]`. + +The softplus `Bijector` has the following two useful properties: + +* The domain is the positive real numbers +* `softplus(x) approx x`, for large `x`, so it does not overflow as easily as + the `Exp` `Bijector`. + + Example Use: + + ```python + # Create the Y=g(X)=softplus(X) transform which works only on Tensors with 1 + # batch ndim and 2 event ndims (i.e., vector of matrices). + softplus = Softplus(batch_ndims=1, event_ndims=2) + x = [[[1., 2], + [3, 4]], + [[5, 6], + [7, 8]]] + log(1 + exp(x)) == softplus.forward(x) + log(exp(x) - 1) == softplus.inverse(x) + ``` + + Note: log(.) and exp(.) are applied element-wise but the Jacobian is a + reduction over the event space. +- - - + +#### `tf.contrib.distributions.bijector.Softplus.__init__(event_ndims=0, validate_args=False, name='Softplus')` {#Softplus.__init__} + + + + +- - - + +#### `tf.contrib.distributions.bijector.Softplus.dtype` {#Softplus.dtype} + +dtype of `Tensor`s transformable by this distribution. + + +- - - + +#### `tf.contrib.distributions.bijector.Softplus.forward(x, name='forward')` {#Softplus.forward} + +Returns the forward `Bijector` evaluation, i.e., X = g(Y). + +##### Args: + + +* `x`: `Tensor`. The input to the "forward" evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if `_forward` is not implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Softplus.inverse(x, name='inverse')` {#Softplus.inverse} + +Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y). + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse` nor + `_inverse_and_inverse_log_det_jacobian` are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Softplus.inverse_and_inverse_log_det_jacobian(x, name='inverse_and_inverse_log_det_jacobian')` {#Softplus.inverse_and_inverse_log_det_jacobian} + +Returns both the inverse evaluation and inverse_log_det_jacobian. + +Enables possibly more efficient calculation when both inverse and +corresponding Jacobian are needed. + +See `inverse()`, `inverse_log_det_jacobian()` for more details. + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" Jacobian evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse_and_inverse_log_det_jacobian` + nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Softplus.inverse_log_det_jacobian(x, name='inverse_log_det_jacobian')` {#Softplus.inverse_log_det_jacobian} + +Returns the (log o det o Jacobian o inverse)(x). + +Mathematically, returns: log(det(dY/dX g^{-1}))(Y). + +Note that forward_log_det_jacobian is the negative of this function. (See +is_constant_jacobian for related proof.) + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" Jacobian evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse_log_det_jacobian` nor + `_inverse_and_inverse_log_det_jacobian` are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Softplus.is_constant_jacobian` {#Softplus.is_constant_jacobian} + +Returns true iff the Jacobian is not a function of x. + +Note: Jacobian is either constant for both forward and inverse or neither. + +##### Returns: + + `Boolean`. + + +- - - + +#### `tf.contrib.distributions.bijector.Softplus.name` {#Softplus.name} + +Returns the string name of this `Bijector`. + + +- - - + +#### `tf.contrib.distributions.bijector.Softplus.parameters` {#Softplus.parameters} + +Returns this `Bijector`'s parameters as a name/value dictionary. + + +- - - + +#### `tf.contrib.distributions.bijector.Softplus.shaper` {#Softplus.shaper} + +Returns shape object used to manage shape constraints. + + +- - - + +#### `tf.contrib.distributions.bijector.Softplus.validate_args` {#Softplus.validate_args} + +Returns True if Tensor arguments will be validated. + + diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.losses.mean_squared_error.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.losses.mean_squared_error.md index 1d9a3a4ad84..87ebea3329a 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.losses.mean_squared_error.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.losses.mean_squared_error.md @@ -1,30 +1,31 @@ -### `tf.contrib.losses.mean_squared_error(*args, **kwargs)` {#mean_squared_error} +### `tf.contrib.losses.mean_squared_error(predictions, targets, weight=1.0, scope=None)` {#mean_squared_error} -Adds a Sum-of-Squares loss to the training procedure. (deprecated) +Adds a Sum-of-Squares loss to the training procedure. -THIS FUNCTION IS DEPRECATED. It will be removed after 2016-10-01. -Instructions for updating: -Use mean_squared_error. +`weight` acts as a coefficient for the loss. If a scalar is provided, then the +loss is simply scaled by the given value. If `weight` is a tensor of size +[batch_size], then the total loss for each sample of the batch is rescaled +by the corresponding element in the `weight` vector. If the shape of +`weight` matches the shape of `predictions`, then the loss of each +measurable element of `predictions` is scaled by the corresponding value of +`weight`. - `weight` acts as a coefficient for the loss. If a scalar is provided, then the - loss is simply scaled by the given value. If `weight` is a tensor of size - [batch_size], then the total loss for each sample of the batch is rescaled - by the corresponding element in the `weight` vector. If the shape of - `weight` matches the shape of `predictions`, then the loss of each - measurable element of `predictions` is scaled by the corresponding value of - `weight`. +##### Args: - Args: - predictions: The predicted outputs. - targets: The ground truth output tensor, same dimensions as 'predictions'. - weight: Coefficients for the loss a scalar, a tensor of shape - [batch_size] or a tensor whose shape matches `predictions`. - scope: The scope for the operations performed in computing the loss. - Returns: - A scalar `Tensor` representing the loss value. +* `predictions`: The predicted outputs. +* `targets`: The ground truth output tensor, same dimensions as 'predictions'. +* `weight`: Coefficients for the loss a scalar, a tensor of shape + [batch_size] or a tensor whose shape matches `predictions`. +* `scope`: The scope for the operations performed in computing the loss. - Raises: - ValueError: If the shape of `predictions` doesn't match that of `targets` or - if the shape of `weight` is invalid. +##### Returns: + + A scalar `Tensor` representing the loss value. + +##### Raises: + + +* `ValueError`: If the shape of `predictions` doesn't match that of `targets` or + if the shape of `weight` is invalid. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_recall_at_k.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_recall_at_k.md index 68ba0ee73bf..24e2d3d8b5a 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_recall_at_k.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard4/tf.contrib.metrics.streaming_recall_at_k.md @@ -1,6 +1,10 @@ ### `tf.contrib.metrics.streaming_recall_at_k(*args, **kwargs)` {#streaming_recall_at_k} -Computes the recall@k of the predictions with respect to dense labels. (deprecated arguments) +Computes the recall@k of the predictions with respect to dense labels. (deprecated arguments) (deprecated) + +THIS FUNCTION IS DEPRECATED. It will be removed after 2016-11-08. +Instructions for updating: +Please use `streaming_sparse_recall_at_k`, and reshape labels from [batch_size] to [batch_size, 1]. SOME ARGUMENTS ARE DEPRECATED. They will be removed after 2016-10-19. Instructions for updating: diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.bijector.Exp.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.bijector.Exp.md new file mode 100644 index 00000000000..b714ac42381 --- /dev/null +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard5/tf.contrib.distributions.bijector.Exp.md @@ -0,0 +1,182 @@ +Bijector which computes Y = g(X) = exp(X). + +Example Use: + +```python +# Create the Y=g(X)=exp(X) transform which works only on Tensors with 1 +# batch ndim and 2 event ndims (i.e., vector of matrices). +exp = Exp(batch_ndims=1, event_ndims=2) +x = [[[1., 2], + [3, 4]], + [[5, 6], + [7, 8]]] +exp(x) == exp.forward(x) +log(x) == exp.inverse(x) +``` + +Note: the exp(.) is applied element-wise but the Jacobian is a reduction +over the event space. +- - - + +#### `tf.contrib.distributions.bijector.Exp.__init__(event_ndims=0, validate_args=False, name='Exp')` {#Exp.__init__} + + + + +- - - + +#### `tf.contrib.distributions.bijector.Exp.dtype` {#Exp.dtype} + +dtype of `Tensor`s transformable by this distribution. + + +- - - + +#### `tf.contrib.distributions.bijector.Exp.forward(x, name='forward')` {#Exp.forward} + +Returns the forward `Bijector` evaluation, i.e., X = g(Y). + +##### Args: + + +* `x`: `Tensor`. The input to the "forward" evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if `_forward` is not implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Exp.inverse(x, name='inverse')` {#Exp.inverse} + +Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y). + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse` nor + `_inverse_and_inverse_log_det_jacobian` are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Exp.inverse_and_inverse_log_det_jacobian(x, name='inverse_and_inverse_log_det_jacobian')` {#Exp.inverse_and_inverse_log_det_jacobian} + +Returns both the inverse evaluation and inverse_log_det_jacobian. + +Enables possibly more efficient calculation when both inverse and +corresponding Jacobian are needed. + +See `inverse()`, `inverse_log_det_jacobian()` for more details. + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" Jacobian evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse_and_inverse_log_det_jacobian` + nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Exp.inverse_log_det_jacobian(x, name='inverse_log_det_jacobian')` {#Exp.inverse_log_det_jacobian} + +Returns the (log o det o Jacobian o inverse)(x). + +Mathematically, returns: log(det(dY/dX g^{-1}))(Y). + +Note that forward_log_det_jacobian is the negative of this function. (See +is_constant_jacobian for related proof.) + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" Jacobian evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse_log_det_jacobian` nor + `_inverse_and_inverse_log_det_jacobian` are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Exp.is_constant_jacobian` {#Exp.is_constant_jacobian} + +Returns true iff the Jacobian is not a function of x. + +Note: Jacobian is either constant for both forward and inverse or neither. + +##### Returns: + + `Boolean`. + + +- - - + +#### `tf.contrib.distributions.bijector.Exp.name` {#Exp.name} + +Returns the string name of this `Bijector`. + + +- - - + +#### `tf.contrib.distributions.bijector.Exp.parameters` {#Exp.parameters} + +Returns this `Bijector`'s parameters as a name/value dictionary. + + +- - - + +#### `tf.contrib.distributions.bijector.Exp.shaper` {#Exp.shaper} + +Returns shape object used to manage shape constraints. + + +- - - + +#### `tf.contrib.distributions.bijector.Exp.validate_args` {#Exp.validate_args} + +Returns True if Tensor arguments will be validated. + + diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.bijector.Identity.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.bijector.Identity.md new file mode 100644 index 00000000000..8f7f3c4f2f3 --- /dev/null +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.distributions.bijector.Identity.md @@ -0,0 +1,176 @@ +Bijector which computes Y = g(X) = X. + +Example Use: + +```python +# Create the Y=g(X)=X transform which is intended for Tensors with 1 batch +# ndim and 1 event ndim (i.e., vector of vectors). +identity = Identity(batch_ndims=1, event_ndims=1) +x = [[1., 2], + [3, 4]] +x == identity.forward(x) == identity.inverse(x) +``` +- - - + +#### `tf.contrib.distributions.bijector.Identity.__init__(validate_args=False, name='Identity')` {#Identity.__init__} + + + + +- - - + +#### `tf.contrib.distributions.bijector.Identity.dtype` {#Identity.dtype} + +dtype of `Tensor`s transformable by this distribution. + + +- - - + +#### `tf.contrib.distributions.bijector.Identity.forward(x, name='forward')` {#Identity.forward} + +Returns the forward `Bijector` evaluation, i.e., X = g(Y). + +##### Args: + + +* `x`: `Tensor`. The input to the "forward" evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if `_forward` is not implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Identity.inverse(x, name='inverse')` {#Identity.inverse} + +Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y). + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse` nor + `_inverse_and_inverse_log_det_jacobian` are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Identity.inverse_and_inverse_log_det_jacobian(x, name='inverse_and_inverse_log_det_jacobian')` {#Identity.inverse_and_inverse_log_det_jacobian} + +Returns both the inverse evaluation and inverse_log_det_jacobian. + +Enables possibly more efficient calculation when both inverse and +corresponding Jacobian are needed. + +See `inverse()`, `inverse_log_det_jacobian()` for more details. + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" Jacobian evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse_and_inverse_log_det_jacobian` + nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Identity.inverse_log_det_jacobian(x, name='inverse_log_det_jacobian')` {#Identity.inverse_log_det_jacobian} + +Returns the (log o det o Jacobian o inverse)(x). + +Mathematically, returns: log(det(dY/dX g^{-1}))(Y). + +Note that forward_log_det_jacobian is the negative of this function. (See +is_constant_jacobian for related proof.) + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" Jacobian evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse_log_det_jacobian` nor + `_inverse_and_inverse_log_det_jacobian` are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Identity.is_constant_jacobian` {#Identity.is_constant_jacobian} + +Returns true iff the Jacobian is not a function of x. + +Note: Jacobian is either constant for both forward and inverse or neither. + +##### Returns: + + `Boolean`. + + +- - - + +#### `tf.contrib.distributions.bijector.Identity.name` {#Identity.name} + +Returns the string name of this `Bijector`. + + +- - - + +#### `tf.contrib.distributions.bijector.Identity.parameters` {#Identity.parameters} + +Returns this `Bijector`'s parameters as a name/value dictionary. + + +- - - + +#### `tf.contrib.distributions.bijector.Identity.shaper` {#Identity.shaper} + +Returns shape object used to manage shape constraints. + + +- - - + +#### `tf.contrib.distributions.bijector.Identity.validate_args` {#Identity.validate_args} + +Returns True if Tensor arguments will be validated. + + diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.losses.sum_of_squares.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.losses.sum_of_squares.md deleted file mode 100644 index 05f13bf638c..00000000000 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.contrib.losses.sum_of_squares.md +++ /dev/null @@ -1,30 +0,0 @@ -### `tf.contrib.losses.sum_of_squares(*args, **kwargs)` {#sum_of_squares} - -Adds a Sum-of-Squares loss to the training procedure. (deprecated) - -THIS FUNCTION IS DEPRECATED. It will be removed after 2016-10-01. -Instructions for updating: -Use mean_squared_error. - - `weight` acts as a coefficient for the loss. If a scalar is provided, then the - loss is simply scaled by the given value. If `weight` is a tensor of size - [batch_size], then the total loss for each sample of the batch is rescaled - by the corresponding element in the `weight` vector. If the shape of - `weight` matches the shape of `predictions`, then the loss of each - measurable element of `predictions` is scaled by the corresponding value of - `weight`. - - Args: - predictions: The predicted outputs. - targets: The ground truth output tensor, same dimensions as 'predictions'. - weight: Coefficients for the loss a scalar, a tensor of shape - [batch_size] or a tensor whose shape matches `predictions`. - scope: The scope for the operations performed in computing the loss. - - Returns: - A scalar `Tensor` representing the loss value. - - Raises: - ValueError: If the shape of `predictions` doesn't match that of `targets` or - if the shape of `weight` is invalid. - diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.summary.scalar.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.summary.scalar.md index 020c9c060df..979cc1840c6 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.summary.scalar.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard6/tf.summary.scalar.md @@ -1,4 +1,4 @@ -### `tf.summary.scalar(display_name, tensor, description='', labels=None, collections=None, name=None)` {#scalar} +### `tf.summary.scalar(name, tensor, summary_description=None, collections=None)` {#scalar} Outputs a `Summary` protocol buffer containing a single scalar value. @@ -7,14 +7,12 @@ The generated Summary has a Tensor.proto containing the input Tensor. ##### Args: -* `display_name`: A name to associate with the data series. Will be used to - organize output data and as a name in visualizers. +* `name`: A name for the generated node. Will also serve as the series name in + TensorBoard. * `tensor`: A tensor containing a single floating point or integer value. -* `description`: An optional long description of the data being output. -* `labels`: a list of strings used to attach metadata. +* `summary_description`: Optional summary_description_pb2.SummaryDescription * `collections`: Optional list of graph collections keys. The new summary op is added to these collections. Defaults to `[GraphKeys.SUMMARIES]`. -* `name`: An optional name for the generated node (optional). ##### Returns: diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.bijector.Inline.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.bijector.Inline.md new file mode 100644 index 00000000000..439988379b5 --- /dev/null +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard7/tf.contrib.distributions.bijector.Inline.md @@ -0,0 +1,189 @@ +Bijector constructed from callables implementing forward, inverse, and inverse_log_det_jacobian. + +Example Use: + +```python +exp = Inline( + forward_fn=tf.exp, + inverse_fn=tf.log, + inverse_log_det_jacobian_fn=( + lambda y: -tf.reduce_sum(tf.log(y), reduction_indices=-1)), + name="Exp") +``` + +The above example is equivalent to the `Bijector` `Exp(event_ndims=1)`. +- - - + +#### `tf.contrib.distributions.bijector.Inline.__init__(forward_fn, inverse_fn, inverse_log_det_jacobian_fn, is_constant_jacobian=False, name='Inline')` {#Inline.__init__} + +Creates a `Bijector` from callables. + +##### Args: + + +* `forward_fn`: Python callable implementing the forward transformation. +* `inverse_fn`: Python callable implementing the inverse transformation. +* `inverse_log_det_jacobian_fn`: Python callable implementing the + inverse_log_det_jacobian transformation. +* `is_constant_jacobian`: `Boolean` indicating that the Jacobian is constant + for all input arguments. +* `name`: `String`, name given to ops managed by this object. + + +- - - + +#### `tf.contrib.distributions.bijector.Inline.dtype` {#Inline.dtype} + +dtype of `Tensor`s transformable by this distribution. + + +- - - + +#### `tf.contrib.distributions.bijector.Inline.forward(x, name='forward')` {#Inline.forward} + +Returns the forward `Bijector` evaluation, i.e., X = g(Y). + +##### Args: + + +* `x`: `Tensor`. The input to the "forward" evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if `_forward` is not implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Inline.inverse(x, name='inverse')` {#Inline.inverse} + +Returns the inverse `Bijector` evaluation, i.e., X = g^{-1}(Y). + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse` nor + `_inverse_and_inverse_log_det_jacobian` are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Inline.inverse_and_inverse_log_det_jacobian(x, name='inverse_and_inverse_log_det_jacobian')` {#Inline.inverse_and_inverse_log_det_jacobian} + +Returns both the inverse evaluation and inverse_log_det_jacobian. + +Enables possibly more efficient calculation when both inverse and +corresponding Jacobian are needed. + +See `inverse()`, `inverse_log_det_jacobian()` for more details. + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" Jacobian evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse_and_inverse_log_det_jacobian` + nor {`_inverse`, `_inverse_log_det_jacobian`} are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Inline.inverse_log_det_jacobian(x, name='inverse_log_det_jacobian')` {#Inline.inverse_log_det_jacobian} + +Returns the (log o det o Jacobian o inverse)(x). + +Mathematically, returns: log(det(dY/dX g^{-1}))(Y). + +Note that forward_log_det_jacobian is the negative of this function. (See +is_constant_jacobian for related proof.) + +##### Args: + + +* `x`: `Tensor`. The input to the "inverse" Jacobian evaluation. +* `name`: The name to give this op. + +##### Returns: + + `Tensor`. + +##### Raises: + + +* `TypeError`: if `self.dtype` is specified and `x.dtype` is not + `self.dtype`. +* `NotImplementedError`: if neither `_inverse_log_det_jacobian` nor + `_inverse_and_inverse_log_det_jacobian` are implemented. + + +- - - + +#### `tf.contrib.distributions.bijector.Inline.is_constant_jacobian` {#Inline.is_constant_jacobian} + +Returns true iff the Jacobian is not a function of x. + +Note: Jacobian is either constant for both forward and inverse or neither. + +##### Returns: + + `Boolean`. + + +- - - + +#### `tf.contrib.distributions.bijector.Inline.name` {#Inline.name} + +Returns the string name of this `Bijector`. + + +- - - + +#### `tf.contrib.distributions.bijector.Inline.parameters` {#Inline.parameters} + +Returns this `Bijector`'s parameters as a name/value dictionary. + + +- - - + +#### `tf.contrib.distributions.bijector.Inline.shaper` {#Inline.shaper} + +Returns shape object used to manage shape constraints. + + +- - - + +#### `tf.contrib.distributions.bijector.Inline.validate_args` {#Inline.validate_args} + +Returns True if Tensor arguments will be validated. + + diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.metrics.streaming_sparse_average_precision_at_k.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.metrics.streaming_sparse_average_precision_at_k.md index 396806e8963..bf0893bc5f3 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.metrics.streaming_sparse_average_precision_at_k.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.contrib.metrics.streaming_sparse_average_precision_at_k.md @@ -33,7 +33,7 @@ If `weights` is `None`, weights default to 1. Use weights of 0 to mask values. [D1, ... DN, num_labels], where N >= 1 and num_labels is the number of target classes for the associated prediction. Commonly, N=1 and `labels` has shape [batch_size, num_labels]. [D1, ... DN] must match - `predictions_idx`. Values should be in range [0, num_classes], where + `predictions`. Values should be in range [0, num_classes], where num_classes is the last dimension of `predictions`. * `k`: Integer, k for @k metric. This will calculate an average precision for range `[1,k]`, as documented above. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.train.SummarySaverHook.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.train.SummarySaverHook.md index 2c879bdb0da..028c38057b7 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.train.SummarySaverHook.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard8/tf.train.SummarySaverHook.md @@ -1,14 +1,16 @@ Saves summaries every N steps. - - - -#### `tf.train.SummarySaverHook.__init__(save_steps=100, output_dir=None, summary_writer=None, scaffold=None, summary_op=None)` {#SummarySaverHook.__init__} +#### `tf.train.SummarySaverHook.__init__(save_steps=100, save_secs=None, output_dir=None, summary_writer=None, scaffold=None, summary_op=None)` {#SummarySaverHook.__init__} Initializes a `SummarySaver` monitor. ##### Args: -* `save_steps`: `int`, save summaries every N steps. See `EveryN`. +* `save_steps`: `int`, save summaries every N steps. Exactly one of + `save_secs` and `save_steps` should be set. +* `save_secs`: `int`, save summaries every N seconds. * `output_dir`: `string`, the directory to save the summaries to. Only used if no `summary_writer` is supplied. * `summary_writer`: `SummaryWriter`. If `None` and an `output_dir` was passed, diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.losses.mean_pairwise_squared_error.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.losses.mean_pairwise_squared_error.md index d2785ed69e8..3b7668eb67a 100644 --- a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.losses.mean_pairwise_squared_error.md +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.contrib.losses.mean_pairwise_squared_error.md @@ -1,44 +1,45 @@ -### `tf.contrib.losses.mean_pairwise_squared_error(*args, **kwargs)` {#mean_pairwise_squared_error} +### `tf.contrib.losses.mean_pairwise_squared_error(predictions, targets, weight=1.0, scope=None)` {#mean_pairwise_squared_error} -Adds a pairwise-errors-squared loss to the training procedure. (deprecated) +Adds a pairwise-errors-squared loss to the training procedure. -THIS FUNCTION IS DEPRECATED. It will be removed after 2016-10-01. -Instructions for updating: -Use mean_pairwise_squared_error. +Unlike `mean_squared_error`, which is a measure of the differences between +corresponding elements of `predictions` and `targets`, +`mean_pairwise_squared_error` is a measure of the differences between pairs of +corresponding elements of `predictions` and `targets`. - Unlike the sum_of_squares loss, which is a measure of the differences between - corresponding elements of `predictions` and `targets`, sum_of_pairwise_squares - is a measure of the differences between pairs of corresponding elements of - `predictions` and `targets`. +For example, if `targets`=[a, b, c] and `predictions`=[x, y, z], there are +three pairs of differences are summed to compute the loss: + loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3 - For example, if `targets`=[a, b, c] and `predictions`=[x, y, z], there are - three pairs of differences are summed to compute the loss: - loss = [ ((a-b) - (x-y)).^2 + ((a-c) - (x-z)).^2 + ((b-c) - (y-z)).^2 ] / 3 +Note that since the inputs are of size [batch_size, d0, ... dN], the +corresponding pairs are computed within each batch sample but not across +samples within a batch. For example, if `predictions` represents a batch of +16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs +is drawn from each image, but not across images. - Note that since the inputs are of size [batch_size, d0, ... dN], the - corresponding pairs are computed within each batch sample but not across - samples within a batch. For example, if `predictions` represents a batch of - 16 grayscale images of dimension [batch_size, 100, 200], then the set of pairs - is drawn from each image, but not across images. +`weight` acts as a coefficient for the loss. If a scalar is provided, then the +loss is simply scaled by the given value. If `weight` is a tensor of size +[batch_size], then the total loss for each sample of the batch is rescaled +by the corresponding element in the `weight` vector. - `weight` acts as a coefficient for the loss. If a scalar is provided, then the - loss is simply scaled by the given value. If `weight` is a tensor of size - [batch_size], then the total loss for each sample of the batch is rescaled - by the corresponding element in the `weight` vector. +##### Args: - Args: - predictions: The predicted outputs, a tensor of size [batch_size, d0, .. dN] - where N+1 is the total number of dimensions in `predictions`. - targets: The ground truth output tensor, whose shape must match the shape of - the `predictions` tensor. - weight: Coefficients for the loss a scalar, a tensor of shape [batch_size] - or a tensor whose shape matches `predictions`. - scope: The scope for the operations performed in computing the loss. - Returns: - A scalar `Tensor` representing the loss value. +* `predictions`: The predicted outputs, a tensor of size [batch_size, d0, .. dN] + where N+1 is the total number of dimensions in `predictions`. +* `targets`: The ground truth output tensor, whose shape must match the shape of + the `predictions` tensor. +* `weight`: Coefficients for the loss a scalar, a tensor of shape [batch_size] + or a tensor whose shape matches `predictions`. +* `scope`: The scope for the operations performed in computing the loss. - Raises: - ValueError: If the shape of `predictions` doesn't match that of `targets` or - if the shape of `weight` is invalid. +##### Returns: + + A scalar `Tensor` representing the loss value. + +##### Raises: + + +* `ValueError`: If the shape of `predictions` doesn't match that of `targets` or + if the shape of `weight` is invalid. diff --git a/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.eye.md b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.eye.md new file mode 100644 index 00000000000..b71edf9b969 --- /dev/null +++ b/tensorflow/g3doc/api_docs/python/functions_and_classes/shard9/tf.eye.md @@ -0,0 +1,36 @@ +### `tf.eye(num_rows, num_columns=None, batch_shape=None, dtype=tf.float32, name=None)` {#eye} + +Construct an identity matrix, or a batch of matrices. + +```python +# Construct one identity matrix. +tf.eye(2) +==> [[1., 0.], + [0., 1.]] + +# Construct a batch of 3 identity matricies, each 2 x 2. +# batch_identity[i, :, :] is a 2 x 2 identity matrix, i = 0, 1, 2. +batch_identity = tf.eye(2, batch_shape=[3]) + +# Construct one 2 x 3 "identity" matrix +tf.eye(2, num_columns=3) +==> [[ 1., 0., 0.], + [ 0., 1., 0.]] +``` + +##### Args: + + +* `num_rows`: Non-negative `int32` scalar `Tensor` giving the number of rows + in each batch matrix. +* `num_columns`: Optional non-negative `int32` scalar `Tensor` giving the number + of columns in each batch matrix. Defaults to `num_rows`. +* `batch_shape`: `int32` `Tensor`. If provided, returned `Tensor` will have + leading batch dimensions of this shape. +* `dtype`: The type of an element in the resulting `Tensor` +* `name`: A name for this `Op`. Defaults to "eye". + +##### Returns: + + A `Tensor` of shape `batch_shape + [num_rows, num_columns]` + diff --git a/tensorflow/g3doc/api_docs/python/index.md b/tensorflow/g3doc/api_docs/python/index.md index a6b099dff95..86655fbc451 100644 --- a/tensorflow/g3doc/api_docs/python/index.md +++ b/tensorflow/g3doc/api_docs/python/index.md @@ -198,6 +198,7 @@ * [`erf`](../../api_docs/python/math_ops.md#erf) * [`erfc`](../../api_docs/python/math_ops.md#erfc) * [`exp`](../../api_docs/python/math_ops.md#exp) + * [`eye`](../../api_docs/python/math_ops.md#eye) * [`fft`](../../api_docs/python/math_ops.md#fft) * [`fft2d`](../../api_docs/python/math_ops.md#fft2d) * [`fft3d`](../../api_docs/python/math_ops.md#fft3d) @@ -741,6 +742,14 @@ * [`WishartCholesky`](../../api_docs/python/contrib.distributions.md#WishartCholesky) * [`WishartFull`](../../api_docs/python/contrib.distributions.md#WishartFull) +* **[Random variable transformations (contrib)](../../api_docs/python/contrib.distributions.bijector.md)**: + * [`Bijector`](../../api_docs/python/contrib.distributions.bijector.md#Bijector) + * [`Exp`](../../api_docs/python/contrib.distributions.bijector.md#Exp) + * [`Identity`](../../api_docs/python/contrib.distributions.bijector.md#Identity) + * [`Inline`](../../api_docs/python/contrib.distributions.bijector.md#Inline) + * [`ScaleAndShift`](../../api_docs/python/contrib.distributions.bijector.md#ScaleAndShift) + * [`Softplus`](../../api_docs/python/contrib.distributions.bijector.md#Softplus) + * **[FFmpeg (contrib)](../../api_docs/python/contrib.ffmpeg.md)**: * [`decode_audio`](../../api_docs/python/contrib.ffmpeg.md#decode_audio) * [`encode_audio`](../../api_docs/python/contrib.ffmpeg.md#encode_audio) @@ -946,8 +955,6 @@ * [`sigmoid_cross_entropy`](../../api_docs/python/contrib.losses.md#sigmoid_cross_entropy) * [`softmax_cross_entropy`](../../api_docs/python/contrib.losses.md#softmax_cross_entropy) * [`sparse_softmax_cross_entropy`](../../api_docs/python/contrib.losses.md#sparse_softmax_cross_entropy) - * [`sum_of_pairwise_squares`](../../api_docs/python/contrib.losses.md#sum_of_pairwise_squares) - * [`sum_of_squares`](../../api_docs/python/contrib.losses.md#sum_of_squares) * **[RNN (contrib)](../../api_docs/python/contrib.rnn.md)**: * [`AttentionCellWrapper`](../../api_docs/python/contrib.rnn.md#AttentionCellWrapper) @@ -1002,6 +1009,7 @@ * [`bucket`](../../api_docs/python/contrib.training.md#bucket) * [`bucket_by_sequence_length`](../../api_docs/python/contrib.training.md#bucket_by_sequence_length) * [`NextQueuedSequenceBatch`](../../api_docs/python/contrib.training.md#NextQueuedSequenceBatch) + * [`rejection_sample`](../../api_docs/python/contrib.training.md#rejection_sample) * [`resample_at_rate`](../../api_docs/python/contrib.training.md#resample_at_rate) * [`SequenceQueueingStateSaver`](../../api_docs/python/contrib.training.md#SequenceQueueingStateSaver) * [`stratified_sample`](../../api_docs/python/contrib.training.md#stratified_sample) diff --git a/tensorflow/g3doc/api_docs/python/math_ops.md b/tensorflow/g3doc/api_docs/python/math_ops.md index ad1126474de..69d8bac2b63 100644 --- a/tensorflow/g3doc/api_docs/python/math_ops.md +++ b/tensorflow/g3doc/api_docs/python/math_ops.md @@ -1131,6 +1131,45 @@ tf.transpose(x, perm=[0, 2, 1]) ==> [[[1 4] +- - - + +### `tf.eye(num_rows, num_columns=None, batch_shape=None, dtype=tf.float32, name=None)` {#eye} + +Construct an identity matrix, or a batch of matrices. + +```python +# Construct one identity matrix. +tf.eye(2) +==> [[1., 0.], + [0., 1.]] + +# Construct a batch of 3 identity matricies, each 2 x 2. +# batch_identity[i, :, :] is a 2 x 2 identity matrix, i = 0, 1, 2. +batch_identity = tf.eye(2, batch_shape=[3]) + +# Construct one 2 x 3 "identity" matrix +tf.eye(2, num_columns=3) +==> [[ 1., 0., 0.], + [ 0., 1., 0.]] +``` + +##### Args: + + +* `num_rows`: Non-negative `int32` scalar `Tensor` giving the number of rows + in each batch matrix. +* `num_columns`: Optional non-negative `int32` scalar `Tensor` giving the number + of columns in each batch matrix. Defaults to `num_rows`. +* `batch_shape`: `int32` `Tensor`. If provided, returned `Tensor` will have + leading batch dimensions of this shape. +* `dtype`: The type of an element in the resulting `Tensor` +* `name`: A name for this `Op`. Defaults to "eye". + +##### Returns: + + A `Tensor` of shape `batch_shape + [num_rows, num_columns]` + + - - - ### `tf.matrix_diag(diagonal, name=None)` {#matrix_diag} diff --git a/tensorflow/g3doc/api_docs/python/state_ops.md b/tensorflow/g3doc/api_docs/python/state_ops.md index 3a2f45ad7fa..5c1d0ebb3a6 100644 --- a/tensorflow/g3doc/api_docs/python/state_ops.md +++ b/tensorflow/g3doc/api_docs/python/state_ops.md @@ -2433,7 +2433,7 @@ tensor shape, the initializer will raise a `ValueError`. * `ValueError`: Too many elements provided. Needed at most 6, but received 8 - ``` +``` - - - diff --git a/tensorflow/g3doc/api_docs/python/summary.md b/tensorflow/g3doc/api_docs/python/summary.md index 14dac8117fe..4fbc65c0cf1 100644 --- a/tensorflow/g3doc/api_docs/python/summary.md +++ b/tensorflow/g3doc/api_docs/python/summary.md @@ -8,7 +8,7 @@ This module contains ops for generating summaries. ## Summary Ops - - - -### `tf.summary.tensor_summary(display_name, tensor, description='', labels=None, collections=None, name=None)` {#tensor_summary} +### `tf.summary.tensor_summary(name, tensor, summary_description=None, collections=None)` {#tensor_summary} Outputs a `Summary` protocol buffer with a serialized tensor.proto. @@ -19,19 +19,12 @@ has one summary value containing the input tensor. ##### Args: -* `display_name`: A name to associate with the data series. Will be used to - organize output data and as a name in visualizers. +* `name`: A name for the generated node. Will also serve as the series name in + TensorBoard. * `tensor`: A tensor of any type and shape to serialize. -* `description`: An optional long description of the data being output. -* `labels`: a list of strings used to specify how the data can be interpreted, - for example: - * `'encoding:image/jpg'` for a string tensor containing jpg images - * `'encoding:proto/X/Y/foo.proto'` for a string tensor containing Foos - * `'group:$groupName/$roleInGroup'` for a tensor that is related to - other tensors that are all in a group. (e.g. bounding boxes and images) +* `summary_description`: Optional summary_pb2.SummaryDescription() * `collections`: Optional list of graph collections keys. The new summary op is added to these collections. Defaults to `[GraphKeys.SUMMARIES]`. -* `name`: An optional name for the generated node (optional). ##### Returns: @@ -41,7 +34,7 @@ has one summary value containing the input tensor. - - - -### `tf.summary.scalar(display_name, tensor, description='', labels=None, collections=None, name=None)` {#scalar} +### `tf.summary.scalar(name, tensor, summary_description=None, collections=None)` {#scalar} Outputs a `Summary` protocol buffer containing a single scalar value. @@ -50,14 +43,12 @@ The generated Summary has a Tensor.proto containing the input Tensor. ##### Args: -* `display_name`: A name to associate with the data series. Will be used to - organize output data and as a name in visualizers. +* `name`: A name for the generated node. Will also serve as the series name in + TensorBoard. * `tensor`: A tensor containing a single floating point or integer value. -* `description`: An optional long description of the data being output. -* `labels`: a list of strings used to attach metadata. +* `summary_description`: Optional summary_description_pb2.SummaryDescription * `collections`: Optional list of graph collections keys. The new summary op is added to these collections. Defaults to `[GraphKeys.SUMMARIES]`. -* `name`: An optional name for the generated node (optional). ##### Returns: diff --git a/tensorflow/g3doc/api_docs/python/train.md b/tensorflow/g3doc/api_docs/python/train.md index c83a9b4749d..08b1241667f 100644 --- a/tensorflow/g3doc/api_docs/python/train.md +++ b/tensorflow/g3doc/api_docs/python/train.md @@ -4401,14 +4401,16 @@ such as saving a last checkpoint. Saves summaries every N steps. - - - -#### `tf.train.SummarySaverHook.__init__(save_steps=100, output_dir=None, summary_writer=None, scaffold=None, summary_op=None)` {#SummarySaverHook.__init__} +#### `tf.train.SummarySaverHook.__init__(save_steps=100, save_secs=None, output_dir=None, summary_writer=None, scaffold=None, summary_op=None)` {#SummarySaverHook.__init__} Initializes a `SummarySaver` monitor. ##### Args: -* `save_steps`: `int`, save summaries every N steps. See `EveryN`. +* `save_steps`: `int`, save summaries every N steps. Exactly one of + `save_secs` and `save_steps` should be set. +* `save_secs`: `int`, save summaries every N seconds. * `output_dir`: `string`, the directory to save the summaries to. Only used if no `summary_writer` is supplied. * `summary_writer`: `SummaryWriter`. If `None` and an `output_dir` was passed, diff --git a/tensorflow/g3doc/tutorials/estimators/index.md b/tensorflow/g3doc/tutorials/estimators/index.md index bbc94637567..75909639aba 100644 --- a/tensorflow/g3doc/tutorials/estimators/index.md +++ b/tensorflow/g3doc/tutorials/estimators/index.md @@ -22,7 +22,7 @@ different activation functions for each neural network layer. Or maybe you're implementing a ranking or recommendation system, and neither a classifier nor a regressor is appropriate for generating predictions. -This tutorial covers how to create your own Estimator using the building blocks +This tutorial covers how to create your own `Estimator` using the building blocks provided in `tf.contrib.learn`, which will predict the ages of [abalones](https://en.wikipedia.org/wiki/Abalone) based on their physical measurements. You'll learn how to do the following: @@ -237,9 +237,8 @@ nn = tf.contrib.learn.Estimator( that will be passed into the `model_fn`. NOTE: Just like `tf.contrib.learn`'s predefined regressors and classifiers, the -`Estimator` initializer also accepts the following general configuration -arguments, all of which are optional: `model_dir`, `config`, and -`weight_column_name`. +`Estimator` initializer also accepts the general configuration +arguments `model_dir` and `config`. For the abalone age predictor, the model will accept one hyperparameter: learning rate. Define `LEARNING_RATE` as a constant at the beginning of your diff --git a/tensorflow/g3doc/tutorials/index.md b/tensorflow/g3doc/tutorials/index.md index c191dc88511..edc1f6b5a44 100644 --- a/tensorflow/g3doc/tutorials/index.md +++ b/tensorflow/g3doc/tutorials/index.md @@ -78,6 +78,14 @@ for predicting median house values. [View Tutorial](../tutorials/input_fn/index.md) +### Creating Estimators in tf.contrib.learn + +This tutorial covers how to create your own `Estimator` using the building blocks +provided in tf.contrib.learn. You'll build a model to predict the ages of abalones +based on their physical measurements. + +[View Tutorial](../tutorials/estimators/index.md) + ## TensorFlow Serving ### TensorFlow Serving diff --git a/tensorflow/g3doc/tutorials/leftnav_files b/tensorflow/g3doc/tutorials/leftnav_files index 6d9f6638db5..a75e62f5e36 100644 --- a/tensorflow/g3doc/tutorials/leftnav_files +++ b/tensorflow/g3doc/tutorials/leftnav_files @@ -9,6 +9,7 @@ wide/index.md wide_and_deep/index.md monitors/index.md input_fn/index.md +estimators/index.md ### TensorFlow Serving tfserve/index.md ### Image Processing diff --git a/tensorflow/g3doc/tutorials/tflearn/index.md b/tensorflow/g3doc/tutorials/tflearn/index.md index a7cebaaba82..b6e26ee351b 100644 --- a/tensorflow/g3doc/tutorials/tflearn/index.md +++ b/tensorflow/g3doc/tutorials/tflearn/index.md @@ -2,21 +2,23 @@ TensorFlow’s high-level machine learning API (tf.contrib.learn) makes it easy to configure, train, and evaluate a variety of machine learning models. In this -tutorial, you’ll use tf.contrib.learn to construct a -[neural network](https://en.wikipedia.org/wiki/Artificial_neural_network) -classifier and train it on the [Iris data set](https://en.wikipedia.org/wiki/Iris_flower_data_set) -to predict flower species based on sepal/petal geometry. You'll write code to -perform the following five steps: +tutorial, you’ll use tf.contrib.learn to construct a [neural +network](https://en.wikipedia.org/wiki/Artificial_neural_network) classifier and +train it on the [Iris data +set](https://en.wikipedia.org/wiki/Iris_flower_data_set) to predict flower +species based on sepal/petal geometry. You'll write code to perform the +following five steps: 1. Load CSVs containing Iris training/test data into a TensorFlow `Dataset` -2. Construct a [neural network classifier](../../api_docs/python/contrib.learn.md#DNNClassifier) +2. Construct a [neural network + classifier](../../api_docs/python/contrib.learn.md#DNNClassifier) 3. Fit the model using the training data 4. Evaluate the accuracy of the model 5. Classify new samples -NOTE: Remember to -[install TensorFlow on your machine](../../get_started/os_setup.md#download-and-setup) -before getting started with this tutorial. +NOTE: Remember to [install TensorFlow on your +machine](../../get_started/os_setup.md#download-and-setup) before getting +started with this tutorial. ## Complete Neural Network Source Code @@ -35,10 +37,14 @@ IRIS_TRAINING = "iris_training.csv" IRIS_TEST = "iris_test.csv" # Load datasets. -training_set = tf.contrib.learn.datasets.base.load_csv(filename=IRIS_TRAINING, - target_dtype=np.int) -test_set = tf.contrib.learn.datasets.base.load_csv(filename=IRIS_TEST, - target_dtype=np.int) +training_set = tf.contrib.learn.datasets.base.load_csv_with_header( + filename=IRIS_TRAINING, + target_dtype=np.int, + features_dtype=np.float32) +test_set = tf.contrib.learn.datasets.base.load_csv_with_header( + filename=IRIS_TEST, + target_dtype=np.int, + features_dtype=np.float32) # Specify that all features have real-value data feature_columns = [tf.contrib.layers.real_valued_column("", dimension=4)] @@ -62,7 +68,7 @@ print('Accuracy: {0:f}'.format(accuracy_score)) # Classify two new flower samples. new_samples = np.array( [[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]], dtype=float) -y = classifier.predict(new_samples) +y = list(classifier.predict(new_samples, as_iterable=True)) print('Predictions: {}'.format(str(y))) ``` @@ -87,8 +93,8 @@ and [*Iris virginica*](https://www.flickr.com/photos/33397993@N05/3352169862) Each row contains the following data for each flower sample: [sepal](https://en.wikipedia.org/wiki/Sepal) length, sepal width, [petal](https://en.wikipedia.org/wiki/Petal) length, petal width, and flower -species. Flower species are represented as integers, with 0 denoting -*Iris setosa*, 1 denoting *Iris versicolor*, and 2 denoting *Iris virginica*. +species. Flower species are represented as integers, with 0 denoting *Iris +setosa*, 1 denoting *Iris versicolor*, and 2 denoting *Iris virginica*. Sepal Length | Sepal Width | Petal Length | Petal Width | Species :----------- | :---------- | :----------- | :---------- | :------- @@ -107,8 +113,10 @@ Sepal Length | Sepal Width | Petal Length | Petal Width | Species For this tutorial, the Iris data has been randomized and split into two separate CSVs: -* A training set of 120 samples ([iris_training.csv](http://download.tensorflow.org/data/iris_training.csv)) -* A test set of 30 samples ([iris_test.csv](http://download.tensorflow.org/data/iris_test.csv)). +* A training set of 120 samples + ([iris_training.csv](http://download.tensorflow.org/data/iris_training.csv)) +* A test set of 30 samples + ([iris_test.csv](http://download.tensorflow.org/data/iris_test.csv)). Place these files in the same directory as your Python code. @@ -124,13 +132,17 @@ import numpy as np ``` Next, load the training and test sets into `Dataset`s using the -[`load_csv()`](https://www.tensorflow.org/code/tensorflow/contrib/learn/python/learn/datasets/base.py) -method in `learn.datasets.base`. The `load_csv()` method takes two required -arguments: +[`load_csv_with_header()`](https://www.tensorflow.org/code/tensorflow/contrib/learn/python/learn/datasets/base.py) +method in `learn.datasets.base`. The `load_csv_with_header()` method takes three +required arguments: * `filename`, which takes the filepath to the CSV file -* `target_dtype`, which takes the [`numpy` datatype](http://docs.scipy.org/doc/numpy/user/basics.types.html) - of the dataset's target value. +* `target_dtype`, which takes the [`numpy` + datatype](http://docs.scipy.org/doc/numpy/user/basics.types.html) of the + dataset's target value. +* `features_dtype`, which takes the [`numpy` + datatype](http://docs.scipy.org/doc/numpy/user/basics.types.html) of the + dataset's feature values. Here, the target (the value you're training the model to predict) is flower species, which is an integer from 0–2, so the appropriate `numpy` datatype @@ -142,25 +154,28 @@ IRIS_TRAINING = "iris_training.csv" IRIS_TEST = "iris_test.csv" # Load datasets. -training_set = tf.contrib.learn.datasets.base.load_csv(filename=IRIS_TRAINING, - target_dtype=np.int) -test_set = tf.contrib.learn.datasets.base.load_csv(filename=IRIS_TEST, - target_dtype=np.int) +training_set = tf.contrib.learn.datasets.base.load_csv_with_header( + filename=IRIS_TRAINING, + target_dtype=np.int, + features_dtype=np.float32) +test_set = tf.contrib.learn.datasets.base.load_csv_with_header( + filename=IRIS_TEST, + target_dtype=np.int, + features_dtype=np.float32) ``` -`Dataset`s in tf.contrib.learn are -[named tuples](https://docs.python.org/2/library/collections.html#collections.namedtuple); +`Dataset`s in tf.contrib.learn are [named +tuples](https://docs.python.org/2/library/collections.html#collections.namedtuple); you can access feature data and target values via the `data` and `target` fields. Here, `training_set.data` and `training_set.target` contain the feature data and target values for the training set, respectively, and `test_set.data` and `test_set.target` contain feature data and target values for the test set. -Later on, in -["Fit the DNNClassifier to the Iris Training Data,"](#fit-dnnclassifier) -you'll use `training_set.data` and `training_set.target` to train your model, -and in ["Evaluate Model Accuracy,"](#evaluate-accuracy) you'll use -`test_set.data` and `test_set.target`. But first, you'll construct your model in -the next section. +Later on, in ["Fit the DNNClassifier to the Iris Training +Data,"](#fit-dnnclassifier) you'll use `training_set.data` and +`training_set.target` to train your model, and in ["Evaluate Model +Accuracy,"](#evaluate-accuracy) you'll use `test_set.data` and +`test_set.target`. But first, you'll construct your model in the next section. ## Construct a Deep Neural Network Classifier @@ -169,8 +184,8 @@ tf.contrib.learn offers a variety of predefined models, called use "out of the box" to run training and evaluation operations on your data. Here, you'll configure a Deep Neural Network Classifier model to fit the Iris data. Using tf.contrib.learn, you can instantiate your -[`DNNClassifier`](../../api_docs/python/contrib.learn.md#DNNClassifier) -with just a couple lines of code: +[`DNNClassifier`](../../api_docs/python/contrib.learn.md#DNNClassifier) with +just a couple lines of code: ```python # Specify that all features have real-value data @@ -193,14 +208,14 @@ accordingly to `4` to hold all the data. Then, the code creates a `DNNClassifier` model using the following arguments: * `feature_columns=feature_columns`. The set of feature columns defined above. -* `hidden_units=[10, 20, 10]`. Three - [hidden layers](http://stats.stackexchange.com/questions/181/how-to-choose-the-number-of-hidden-layers-and-nodes-in-a-feedforward-neural-netw), +* `hidden_units=[10, 20, 10]`. Three [hidden + layers](http://stats.stackexchange.com/questions/181/how-to-choose-the-number-of-hidden-layers-and-nodes-in-a-feedforward-neural-netw), containing 10, 20, and 10 neurons, respectively. * `n_classes=3`. Three target classes, representing the three Iris species. * `model_dir=/tmp/iris_model`. The directory in which TensorFlow will save checkpoint data during model training. For more on logging and monitoring - with TensorFlow, see - [Logging and Monitoring Basics with tf.contrib.learn](../monitors/index.md). + with TensorFlow, see [Logging and Monitoring Basics with + tf.contrib.learn](../monitors/index.md). ## Fit the DNNClassifier to the Iris Training Data {#fit-dnnclassifier} @@ -271,7 +286,7 @@ You can predict their species with the following code: # Classify two new flower samples. new_samples = np.array( [[6.4, 3.2, 4.5, 1.5], [5.8, 3.1, 5.0, 1.7]], dtype=float) -y = classifier.predict(new_samples) +y = list(classifier.predict(new_samples, as_iterable=True)) print('Predictions: {}'.format(str(y))) ``` @@ -293,7 +308,8 @@ second sample is *Iris virginica*. [Large-scale Linear Models with TensorFlow](../linear/overview.md). * To build your own Estimator using tf.contrib.learn APIs, check out [Building - Machine Learning Estimator in TensorFlow](http://terrytangyuan.github.io/2016/07/08/understand-and-build-tensorflow-estimator/). + Machine Learning Estimator in + TensorFlow](http://terrytangyuan.github.io/2016/07/08/understand-and-build-tensorflow-estimator/). * To experiment with neural network modeling and visualization in the browser, check out [Deep Playground](http://playground.tensorflow.org/). diff --git a/tensorflow/go/op/op.go b/tensorflow/go/op/op.go new file mode 100644 index 00000000000..dd79c2076ac --- /dev/null +++ b/tensorflow/go/op/op.go @@ -0,0 +1,51 @@ +// Copyright 2016 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package op defines functions for adding TensorFlow operations to a Graph. +// +// Functions for adding an operation to a graph take a Scope object as the +// first argument. The Scope object encapsulates a graph and a set of +// properties (such as a name prefix) for all operations being added +// to the graph. +// +// WARNING: The API in this package has not been finalized and can +// change without notice. +package op + +import ( + tf "github.com/tensorflow/tensorflow/tensorflow/go" +) + +// Const adds an operation to graph that produces value as output. +func Const(scope *Scope, value interface{}) (tf.Output, error) { + if t, ok := value.(*tf.Tensor); ok { + return makeConst(scope, t) + } + t, err := tf.NewTensor(value) + if err != nil { + return tf.Output{}, err + } + return makeConst(scope, t) +} + +func makeConst(scope *Scope, t *tf.Tensor) (tf.Output, error) { + op, err := scope.Graph().AddOperation(tf.OpSpec{ + Name: scope.opName("Const"), + Type: "Const", + Attrs: map[string]interface{}{ + "dtype": t.DataType(), + "value": t, + }}) + return op.Output(0), err +} diff --git a/tensorflow/go/op/scope.go b/tensorflow/go/op/scope.go new file mode 100644 index 00000000000..25ebbae70f6 --- /dev/null +++ b/tensorflow/go/op/scope.go @@ -0,0 +1,77 @@ +// Copyright 2016 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package op + +import ( + "fmt" + + tf "github.com/tensorflow/tensorflow/tensorflow/go" +) + +// Scope encapsulates common properties of operations being added to a Graph. +// +// Scopes allow common properties (such as a name prefix) to be specified +// once for multiple operations being added to a graph. The With* methods +// create derivative scopes that encapsulate the same set of properties +// as the parent Scope, except for the one being changed by the specific +// With* method. +// +// Scopes are NOT safe for concurrent use by multiple goroutines. +type Scope struct { + graph *tf.Graph + namemap map[string]int + namespace string +} + +// NewScope creates a Scope initialized with an empty Graph. +func NewScope() *Scope { + return &Scope{graph: tf.NewGraph(), namemap: make(map[string]int)} +} + +// Graph returns the Graph which this Scope and its children are +func (s *Scope) Graph() *tf.Graph { + return s.graph +} + +// SubScope returns a new Scope which will cause all operations added to the +// graph to be namespaced with 'namespace'. If namespace collides with an +// existing namespace within the scope, then a suffix will be added. +func (s *Scope) SubScope(namespace string) *Scope { + namespace = s.uniqueName(namespace) + if s.namespace != "" { + namespace = s.namespace + "/" + namespace + } + return &Scope{ + graph: s.graph, + namemap: make(map[string]int), + namespace: namespace, + } +} + +func (s *Scope) uniqueName(name string) string { + count := s.namemap[name] + s.namemap[name]++ + if count == 0 { + return name + } + return fmt.Sprint(name, "_", count) +} + +func (s *Scope) opName(typ string) string { + if s.namespace == "" { + return typ + } + return s.namespace + "/" + typ +} diff --git a/tensorflow/go/op/scope_test.go b/tensorflow/go/op/scope_test.go new file mode 100644 index 00000000000..3d1d3364195 --- /dev/null +++ b/tensorflow/go/op/scope_test.go @@ -0,0 +1,62 @@ +// Copyright 2016 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package op + +import ( + "fmt" + "testing" +) + +func TestScopeSubScope(t *testing.T) { + constant := func(s *Scope) string { + c, err := Const(s, int64(1)) + if err != nil { + t.Fatal(err) + } + return c.Op.Name() + } + var ( + root = NewScope() + sub1 = root.SubScope("x") + sub2 = root.SubScope("x") + sub1a = sub1.SubScope("y") + sub2a = sub2.SubScope("y") + ) + testdata := []struct { + got, want string + }{ + {constant(root), "Const"}, + {constant(sub1), "x/Const"}, + {constant(sub1a), "x/y/Const"}, + {constant(sub2), "x_1/Const"}, + {constant(sub2a), "x_1/y/Const"}, + } + for idx, test := range testdata { + if test.got != test.want { + t.Errorf("#%d: Got %q, want %q", idx, test.got, test.want) + } + } + +} + +func ExampleScope_SubScope() { + var ( + s = NewScope() + c1, _ = Const(s.SubScope("x"), int64(1)) + c2, _ = Const(s.SubScope("x"), int64(1)) + ) + fmt.Println(c1.Op.Name(), c2.Op.Name()) + // Output: x/Const x_1/Const +} diff --git a/tensorflow/go/tensor.go b/tensorflow/go/tensor.go index 76a4615a7bf..b1c34b6cd5c 100644 --- a/tensorflow/go/tensor.go +++ b/tensorflow/go/tensor.go @@ -27,9 +27,32 @@ import ( ) // DataType holds the type for a scalar value. E.g., one slot in a tensor. -// The values here are identical to corresponding values in types.proto. type DataType C.TF_DataType +// Types of scalar values in the TensorFlow type system. +const ( + Float DataType = C.TF_FLOAT + Double DataType = C.TF_DOUBLE + Int32 DataType = C.TF_INT32 + Uint8 DataType = C.TF_UINT8 + Int16 DataType = C.TF_INT16 + Int8 DataType = C.TF_INT8 + String DataType = C.TF_STRING + Complex64 DataType = C.TF_COMPLEX64 + Complex DataType = C.TF_COMPLEX + Int64 DataType = C.TF_INT64 + Bool DataType = C.TF_BOOL + Qint8 DataType = C.TF_QINT8 + Quint8 DataType = C.TF_QUINT8 + Qint32 DataType = C.TF_QINT32 + Bfloat16 DataType = C.TF_BFLOAT16 + Qint16 DataType = C.TF_QINT16 + Quint16 DataType = C.TF_QUINT16 + Uint16 DataType = C.TF_UINT16 + Complex128 DataType = C.TF_COMPLEX128 + Half DataType = C.TF_HALF +) + // Tensor holds a multi-dimensional array of elements of a single data type. type Tensor struct { // We create TF_Tensor on demand rather than keep a handle to C.TF_Tensor diff --git a/tensorflow/go/tensor_test.go b/tensorflow/go/tensor_test.go index 630d6137292..fd80658615e 100644 --- a/tensorflow/go/tensor_test.go +++ b/tensorflow/go/tensor_test.go @@ -60,6 +60,8 @@ func TestNewTensor(t *testing.T) { []uint32{5}, uint64(5), []uint64{5}, + // Mismatched dimensions + [][]float32{{1,2,3},{4}}, } for _, test := range tests { diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 9cf4edee061..59fcc45a811 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -2055,6 +2055,9 @@ py_test( size = "small", srcs = ["training/monitored_session_test.py"], srcs_version = "PY2AND3", + tags = [ + "notsan", # http://b/32109634 + ], deps = [ "//tensorflow:tensorflow_py", ], diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD index 9751907eede..64434b40b99 100644 --- a/tensorflow/python/debug/BUILD +++ b/tensorflow/python/debug/BUILD @@ -46,6 +46,12 @@ py_library( srcs_version = "PY2AND3", ) +py_library( + name = "command_parser", + srcs = ["cli/command_parser.py"], + srcs_version = "PY2AND3", +) + py_library( name = "tensor_format", srcs = ["cli/tensor_format.py"], @@ -58,6 +64,7 @@ py_library( srcs = ["cli/analyzer_cli.py"], srcs_version = "PY2AND3", deps = [ + ":command_parser", ":debug_data", ":debugger_cli_common", ":tensor_format", @@ -68,7 +75,10 @@ py_library( name = "curses_ui", srcs = ["cli/curses_ui.py"], srcs_version = "PY2AND3", - deps = [":debugger_cli_common"], + deps = [ + ":command_parser", + ":debugger_cli_common", + ], ) py_library( @@ -200,6 +210,19 @@ py_test( ], ) +py_test( + name = "command_parser_test", + size = "small", + srcs = [ + "cli/command_parser_test.py", + ], + srcs_version = "PY2AND3", + deps = [ + ":command_parser", + "//tensorflow/python:framework_test_lib", + ], +) + py_test( name = "tensor_format_test", size = "small", diff --git a/tensorflow/python/debug/cli/analyzer_cli.py b/tensorflow/python/debug/cli/analyzer_cli.py index ab24023b79a..872624707a8 100644 --- a/tensorflow/python/debug/cli/analyzer_cli.py +++ b/tensorflow/python/debug/cli/analyzer_cli.py @@ -30,6 +30,7 @@ import re from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.python.debug import debug_data +from tensorflow.python.debug.cli import command_parser from tensorflow.python.debug.cli import debugger_cli_common from tensorflow.python.debug.cli import tensor_format @@ -44,12 +45,10 @@ HANG_SUFFIX = "|- " DEPTH_TEMPLATE = "(%d) " OP_TYPE_TEMPLATE = "[%s] " -# String consntats for control inputs/outputs, etc. +# String constants for control inputs/outputs, etc. CTRL_LABEL = "(Ctrl) " ELLIPSIS = "..." -DEFAULT_NDARRAY_DISPLAY_THRESHOLD = 2000 - class DebugAnalyzer(object): """Analyzer for debug data from dump directories.""" @@ -69,6 +68,10 @@ class DebugAnalyzer(object): # Argument parsers for command handlers. self._arg_parsers = {} + # Default threshold number of elements above which ellipses will be used + # when printing the value of the tensor. + self.default_ndarray_display_threshold = 2000 + # Parser for list_tensors. ap = argparse.ArgumentParser( description="List dumped intermediate tensors.", @@ -187,11 +190,22 @@ class DebugAnalyzer(object): ap.add_argument( "tensor_name", type=str, - help="Name of the tensor, e.g., hidden1/Wx_plus_b/MatMul:0") + help="Name of the tensor, followed by any slicing indices, " + "e.g., hidden1/Wx_plus_b/MatMul:0, " + "hidden1/Wx_plus_b/MatMul:0[1, :]") + ap.add_argument( + "-n", + "--number", + dest="number", + type=int, + default=-1, + help="0-based dump number for the specified tensor. " + "Required for tensor with multiple dumps.") + ap.add_argument( "-a", "--all", - dest="all", + dest="print_all", action="store_true", help="Print the tensor in its entirety, i.e., do not use ellipses.") self._arg_parsers["print_tensor"] = ap @@ -455,15 +469,22 @@ class DebugAnalyzer(object): Output text lines as a RichTextLines object. """ + parsed = self._arg_parsers["print_tensor"].parse_args(args) + if screen_info and "cols" in screen_info: np_printoptions = {"linewidth": screen_info["cols"]} else: np_printoptions = {} - parsed = self._arg_parsers["print_tensor"].parse_args(args) + # Determine if there parsed.tensor_name contains any indexing (slicing). + if parsed.tensor_name.count("[") == 1 and parsed.tensor_name.endswith("]"): + tensor_name = parsed.tensor_name[:parsed.tensor_name.index("[")] + tensor_slicing = parsed.tensor_name[parsed.tensor_name.index("["):] + else: + tensor_name = parsed.tensor_name + tensor_slicing = "" - node_name, output_slot = debug_data.parse_node_or_tensor_name( - parsed.tensor_name) + node_name, output_slot = debug_data.parse_node_or_tensor_name(tensor_name) if output_slot is None: return self._error("\"%s\" is not a valid tensor name" % parsed.tensor_name) @@ -484,25 +505,102 @@ class DebugAnalyzer(object): matching_data.append(datum) if not matching_data: + # No dump for this tensor. return self._error( "Tensor \"%s\" did not generate any dumps." % parsed.tensor_name) - - # TODO(cais): In the case of multiple dumps from the same tensor, require - # explicit specification of the DebugOp and the temporal order. - if len(matching_data) > 1: - return self._error( - "print_tensor logic for multiple dumped records has not been " - "implemented.") - - tensor = matching_data[0].get_tensor() - if parsed.all: - np_printoptions["threshold"] = tensor.size + elif len(matching_data) == 1: + # There is only one dump for this tensor. + if parsed.number <= 0: + return self._format_tensor( + matching_data[0].get_tensor(), + matching_data[0].watch_key, + np_printoptions, + print_all=parsed.print_all, + tensor_slicing=tensor_slicing) + else: + return self._error( + "Invalid number (%d) for tensor %s, which generated one dump." % + (parsed.number, parsed.tensor_name)) else: - np_printoptions["threshold"] = DEFAULT_NDARRAY_DISPLAY_THRESHOLD + # There are more than one dumps for this tensor. + if parsed.number < 0: + lines = [ + "Tensor \"%s\" generated %d dumps:" % (parsed.tensor_name, + len(matching_data)) + ] + + for i, datum in enumerate(matching_data): + rel_time = (datum.timestamp - self._debug_dump.t0) / 1000.0 + lines.append("#%d [%.3f ms] %s" % (i, rel_time, datum.watch_key)) + + lines.append("") + lines.append( + "Use the -n (--number) flag to specify which dump to print.") + lines.append("For example:") + lines.append(" print_tensor %s -n 0" % parsed.tensor_name) + + return debugger_cli_common.RichTextLines(lines) + elif parsed.number >= len(matching_data): + return self._error( + "Specified number (%d) exceeds the number of available dumps " + "(%d) for tensor %s" % + (parsed.number, len(matching_data), parsed.tensor_name)) + else: + return self._format_tensor( + matching_data[parsed.number].get_tensor(), + matching_data[parsed.number].watch_key + " (dump #%d)" % + parsed.number, + np_printoptions, + print_all=parsed.print_all, + tensor_slicing=tensor_slicing) + + def _format_tensor(self, + tensor, + watch_key, + np_printoptions, + print_all=False, + tensor_slicing=None): + """Generate formatted str to represent a tensor or its slices. + + Args: + tensor: (numpy ndarray) The tensor value. + watch_key: (str) Tensor debug watch key. + np_printoptions: (dict) Numpy tensor formatting options. + print_all: (bool) Whether the tensor is to be displayed in its entirety, + instead of printing ellipses, even if its number of elements exceeds + the default numpy display threshold. + (Note: Even if this is set to true, the screen output can still be cut + off by the UI frontend if it consist of more lines than the frontend + can handle.) + tensor_slicing: (str or None) Slicing of the tensor, e.g., "[:, 1]". If + None, no slicing will be performed on the tensor. + + Returns: + (str) Formatted str representing the (potentially sliced) tensor. + + Raises: + ValueError: If tehsor_slicing is not a valid numpy ndarray slicing str. + """ + + if tensor_slicing: + # Validate the indexing. + if not command_parser.validate_slicing_string(tensor_slicing): + raise ValueError("Invalid tensor-slicing string.") + + value = eval("tensor" + tensor_slicing) # pylint: disable=eval-used + sliced_name = watch_key + tensor_slicing + else: + value = tensor + sliced_name = watch_key + + if print_all: + np_printoptions["threshold"] = value.size + else: + np_printoptions["threshold"] = self.default_ndarray_display_threshold return tensor_format.format_tensor( - tensor, - matching_data[0].watch_key, + value, + sliced_name, include_metadata=True, np_printoptions=np_printoptions) diff --git a/tensorflow/python/debug/cli/analyzer_cli_test.py b/tensorflow/python/debug/cli/analyzer_cli_test.py index ee62dcc270d..f5552017666 100644 --- a/tensorflow/python/debug/cli/analyzer_cli_test.py +++ b/tensorflow/python/debug/cli/analyzer_cli_test.py @@ -505,6 +505,59 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase): self.assertIn(4, out.annotations) self.assertIn(5, out.annotations) + def testPrintTensorWithSlicing(self): + out = self._registry.dispatch_command( + "print_tensor", ["simple_mul_add/matmul:0[1, :]"], + screen_info={"cols": 80}) + + self.assertEqual([ + "Tensor \"simple_mul_add/matmul:0:DebugIdentity[1, :]\":", + " dtype: float64", " shape: (1,)", "", "array([-2.])" + ], out.lines) + + self.assertIn("tensor_metadata", out.annotations) + self.assertIn(4, out.annotations) + + def testPrintTensorInvalidSlicingString(self): + out = self._registry.dispatch_command( + "print_tensor", ["simple_mul_add/matmul:0[1, foo()]"], + screen_info={"cols": 80}) + + self.assertEqual("Error occurred during handling of command: print_tensor " + "simple_mul_add/matmul:0[1, foo()]:", out.lines[0]) + self.assertEqual("ValueError: Invalid tensor-slicing string.", + out.lines[-2]) + + def testPrintTensorValidExplicitNumber(self): + out = self._registry.dispatch_command( + "print_tensor", ["simple_mul_add/matmul:0", "-n", "0"], + screen_info={"cols": 80}) + + self.assertEqual([ + "Tensor \"simple_mul_add/matmul:0:DebugIdentity\":", + " dtype: float64", + " shape: (2, 1)", + "", + "array([[ 7.],", + " [-2.]])", + ], out.lines) + + self.assertIn("tensor_metadata", out.annotations) + self.assertIn(4, out.annotations) + self.assertIn(5, out.annotations) + + def testPrintTensorInvalidExplicitNumber(self): + out = self._registry.dispatch_command( + "print_tensor", ["simple_mul_add/matmul:0", "-n", "1"], + screen_info={"cols": 80}) + + self.assertEqual([ + "ERROR: Invalid number (1) for tensor simple_mul_add/matmul:0, " + "which generated one dump." + ], out.lines) + + self.assertNotIn("tensor_metadata", out.annotations) + def testPrintTensorMissingOutputSlot(self): out = self._registry.dispatch_command( "print_tensor", ["simple_mul_add/matmul"]) @@ -568,6 +621,78 @@ class AnalyzerCLISimpleMulAddTest(test_util.TensorFlowTestCase): analyzer.get_tensor_filter("bar") +class AnalyzerCLIPrintLargeTensorTest(test_util.TensorFlowTestCase): + + @classmethod + def setUpClass(cls): + cls._dump_root = tempfile.mkdtemp() + + with session.Session() as sess: + # 2400 elements should exceed the default threshold (2000). + x = constant_op.constant(np.zeros([300, 8]), name="large_tensors/x") + + run_options = config_pb2.RunOptions(output_partition_graphs=True) + debug_utils.watch_graph( + run_options, + sess.graph, + debug_ops=["DebugIdentity"], + debug_urls="file://%s" % cls._dump_root) + + # Invoke Session.run(). + run_metadata = config_pb2.RunMetadata() + sess.run(x, options=run_options, run_metadata=run_metadata) + + cls._debug_dump = debug_data.DebugDumpDir( + cls._dump_root, partition_graphs=run_metadata.partition_graphs) + + # Construct the analyzer. + cls._analyzer = analyzer_cli.DebugAnalyzer(cls._debug_dump) + + # Construct the handler registry. + cls._registry = debugger_cli_common.CommandHandlerRegistry() + + # Register command handler. + cls._registry.register_command_handler( + "print_tensor", + cls._analyzer.print_tensor, + cls._analyzer.get_help("print_tensor"), + prefix_aliases=["pt"]) + + @classmethod + def tearDownClass(cls): + # Tear down temporary dump directory. + shutil.rmtree(cls._dump_root) + + def testPrintLargeTensorWithoutAllOption(self): + out = self._registry.dispatch_command( + "print_tensor", ["large_tensors/x:0"], screen_info={"cols": 80}) + + print(out.lines) # DEBUG + + # Assert that ellipses are present in the tensor value printout. + self.assertIn("...,", out.lines[4]) + + # 2100 still exceeds 2000. + out = self._registry.dispatch_command( + "print_tensor", ["large_tensors/x:0[:, 0:7]"], + screen_info={"cols": 80}) + + self.assertIn("...,", out.lines[4]) + + def testPrintLargeTensorWithAllOption(self): + out = self._registry.dispatch_command( + "print_tensor", ["large_tensors/x:0", "-a"], + screen_info={"cols": 80}) + + # Assert that ellipses are not present in the tensor value printout. + self.assertNotIn("...,", out.lines[4]) + + out = self._registry.dispatch_command( + "print_tensor", ["large_tensors/x:0[:, 0:7]", "--all"], + screen_info={"cols": 80}) + self.assertNotIn("...,", out.lines[4]) + + class AnalyzerCLIControlDepTest(test_util.TensorFlowTestCase): @classmethod @@ -811,5 +936,94 @@ class AnalyzerCLIControlDepTest(test_util.TensorFlowTestCase): " [Op]: Input node has op type Op."], out.lines) +class AnalyzerCLIWhileLoopTest(test_util.TensorFlowTestCase): + + @classmethod + def setUpClass(cls): + cls._dump_root = tempfile.mkdtemp() + + with session.Session() as sess: + loop_var = constant_op.constant(0, name="while_loop_test/loop_var") + cond = lambda loop_var: math_ops.less(loop_var, 10) + body = lambda loop_var: math_ops.add(loop_var, 1) + while_loop = control_flow_ops.while_loop( + cond, body, [loop_var], parallel_iterations=1) + + run_options = config_pb2.RunOptions(output_partition_graphs=True) + debug_url = "file://%s" % cls._dump_root + + watch_opts = run_options.debug_tensor_watch_opts + + # Add debug tensor watch for "while/Identity". + watch = watch_opts.add() + watch.node_name = "while/Identity" + watch.output_slot = 0 + watch.debug_ops.append("DebugIdentity") + watch.debug_urls.append(debug_url) + + # Invoke Session.run(). + run_metadata = config_pb2.RunMetadata() + sess.run(while_loop, options=run_options, run_metadata=run_metadata) + + cls._debug_dump = debug_data.DebugDumpDir( + cls._dump_root, partition_graphs=run_metadata.partition_graphs) + + cls._analyzer = analyzer_cli.DebugAnalyzer(cls._debug_dump) + cls._registry = debugger_cli_common.CommandHandlerRegistry() + cls._registry.register_command_handler( + "list_tensors", + cls._analyzer.list_tensors, + cls._analyzer.get_help("list_tensors"), + prefix_aliases=["lt"]) + cls._registry.register_command_handler( + "print_tensor", + cls._analyzer.print_tensor, + cls._analyzer.get_help("print_tensor"), + prefix_aliases=["pt"]) + + @classmethod + def tearDownClass(cls): + # Tear down temporary dump directory. + shutil.rmtree(cls._dump_root) + + def testMultipleDumpsPrintTensorNoNumber(self): + output = self._registry.dispatch_command("pt", ["while/Identity:0"]) + + self.assertEqual("Tensor \"while/Identity:0\" generated 10 dumps:", + output.lines[0]) + + for i in xrange(10): + self.assertTrue(output.lines[i + 1].startswith("#%d" % i)) + self.assertTrue(output.lines[i + 1].endswith( + " ms] while/Identity:0:DebugIdentity")) + + self.assertEqual( + "Use the -n (--number) flag to specify which dump to print.", + output.lines[-3]) + self.assertEqual("For example:", output.lines[-2]) + self.assertEqual(" print_tensor while/Identity:0 -n 0", output.lines[-1]) + + def testMultipleDumpsPrintTensorWithNumber(self): + for i in xrange(5): + output = self._registry.dispatch_command( + "pt", ["while/Identity:0", "-n", "%d" % i]) + + self.assertEqual("Tensor \"while/Identity:0:DebugIdentity (dump #%d)\":" % + i, output.lines[0]) + self.assertEqual(" dtype: int32", output.lines[1]) + self.assertEqual(" shape: ()", output.lines[2]) + self.assertEqual("", output.lines[3]) + self.assertEqual("array(%d, dtype=int32)" % i, output.lines[4]) + + def testMultipleDumpsPrintTensorInvalidNumber(self): + output = self._registry.dispatch_command("pt", + ["while/Identity:0", "-n", "10"]) + + self.assertEqual([ + "ERROR: Specified number (10) exceeds the number of available dumps " + "(10) for tensor while/Identity:0" + ], output.lines) + + if __name__ == "__main__": googletest.main() diff --git a/tensorflow/python/debug/cli/command_parser.py b/tensorflow/python/debug/cli/command_parser.py new file mode 100644 index 00000000000..4a70468e278 --- /dev/null +++ b/tensorflow/python/debug/cli/command_parser.py @@ -0,0 +1,110 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Command parsing module for TensorFlow Debugger (tfdbg).""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import re + +_BRACKETS_PATTERN = re.compile(r"\[[^\]]*\]") +_QUOTES_PATTERN = re.compile(r"\"[^\"]*\"") +_WHITESPACE_PATTERN = re.compile(r"\s+") + + +def parse_command(command): + """Parse command string into a list of arguments. + + - Disregards whitespace inside double quotes and brackets. + - Strips paired leading and trailing double quotes in arguments. + - Splits the command at whitespace. + + Nested double quotes and brackets are not handled. + + Args: + command: (str) Input command. + + Returns: + (list of str) List of arguments. + """ + + command = command.strip() + if not command: + return [] + + brackets_intervals = [f.span() for f in _BRACKETS_PATTERN.finditer(command)] + quotes_intervals = [f.span() for f in _QUOTES_PATTERN.finditer(command)] + whitespaces_intervals = [ + f.span() for f in _WHITESPACE_PATTERN.finditer(command) + ] + + if not whitespaces_intervals: + return [command] + + arguments = [] + idx0 = 0 + for start, end in whitespaces_intervals + [(len(command), None)]: + # Skip whitespace stretches enclosed in brackets or double quotes. + + if not any(interval[0] < start < interval[1] + for interval in brackets_intervals + quotes_intervals): + argument = command[idx0:start] + + # Strip leading and trailing double quote if they are paired. + if argument.startswith("\"") and argument.endswith("\""): + argument = argument[1:-1] + arguments.append(argument) + idx0 = end + + return arguments + + +def parse_tensor_name_with_slicing(in_str): + """Parse tensor name, potentially suffixed by slicing string. + + Args: + in_str: (str) Input name of the tensor, potentially followed by a slicing + string. E.g.: Without slicing string: "hidden/weights/Variable:0", with + slicing string: "hidden/weights/Varaible:0[1, :]" + + Returns: + (str) name of the tensor + (str) sliciing string, if any. If no slicing string is present, return "". + """ + + if in_str.count("[") == 1 and in_str.endswith("]"): + tensor_name = in_str[:in_str.index("[")] + tensor_slicing = in_str[in_str.index("["):] + else: + tensor_name = in_str + tensor_slicing = "" + + return tensor_name, tensor_slicing + + +def validate_slicing_string(slicing_string): + """Validate a slicing string. + + Check if the input string contains only brackets, digits, commas and + colons that are valid characters in numpy-style array slicing. + + Args: + slicing_string: (str) Input slicing string to be validated. + + Returns: + (bool) True if and only if the slicing string is valid. + """ + + return bool(re.search(r"^\[(\d|,|\s|:)+\]$", slicing_string)) diff --git a/tensorflow/python/debug/cli/command_parser_test.py b/tensorflow/python/debug/cli/command_parser_test.py new file mode 100644 index 00000000000..b819f25e69b --- /dev/null +++ b/tensorflow/python/debug/cli/command_parser_test.py @@ -0,0 +1,133 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for TensorFlow Debugger command parser.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.debug.cli import command_parser +from tensorflow.python.framework import test_util +from tensorflow.python.platform import googletest + + +class ParseCommandTest(test_util.TensorFlowTestCase): + + def testParseNoBracketsOrQuotes(self): + command = "" + self.assertEqual([], command_parser.parse_command(command)) + + command = "a" + self.assertEqual(["a"], command_parser.parse_command(command)) + + command = "foo bar baz qux" + self.assertEqual(["foo", "bar", "baz", "qux"], + command_parser.parse_command(command)) + + command = "foo bar\tbaz\t qux" + self.assertEqual(["foo", "bar", "baz", "qux"], + command_parser.parse_command(command)) + + def testParseLeadingTrailingWhitespaces(self): + command = " foo bar baz qux " + self.assertEqual(["foo", "bar", "baz", "qux"], + command_parser.parse_command(command)) + + command = "\nfoo bar baz qux\n" + self.assertEqual(["foo", "bar", "baz", "qux"], + command_parser.parse_command(command)) + + def testParseCommandsWithBrackets(self): + command = "pt foo[1, 2, :]" + self.assertEqual(["pt", "foo[1, 2, :]"], + command_parser.parse_command(command)) + command = "pt foo[1, 2, :] -a" + self.assertEqual(["pt", "foo[1, 2, :]", "-a"], + command_parser.parse_command(command)) + + command = "inject_value foo [1, 2,:] 0" + self.assertEqual(["inject_value", "foo", "[1, 2,:]", "0"], + command_parser.parse_command(command)) + + def testParseCommandWithTwoArgsContainingBrackets(self): + command = "pt foo[1, :] bar[:, 2]" + self.assertEqual(["pt", "foo[1, :]", "bar[:, 2]"], + command_parser.parse_command(command)) + + command = "pt foo[] bar[:, 2]" + self.assertEqual(["pt", "foo[]", "bar[:, 2]"], + command_parser.parse_command(command)) + + def testParseCommandWithUnmatchedBracket(self): + command = "pt foo[1, 2, :" + self.assertNotEqual(["pt", "foo[1, 2, :]"], + command_parser.parse_command(command)) + + def testParseCommandsWithQuotes(self): + command = "inject_value foo \"np.zeros([100, 500])\"" + self.assertEqual(["inject_value", "foo", "np.zeros([100, 500])"], + command_parser.parse_command(command)) + # The pair of double quotes should have been stripped. + + command = "\"command prefix with spaces\" arg1" + self.assertEqual(["command prefix with spaces", "arg1"], + command_parser.parse_command(command)) + + def testParseCommandWithTwoArgsContainingQuotes(self): + command = "foo \"bar\" \"qux\"" + self.assertEqual(["foo", "bar", "qux"], + command_parser.parse_command(command)) + + command = "foo \"\" \"qux\"" + self.assertEqual(["foo", "", "qux"], + command_parser.parse_command(command)) + + +class ParseTensorNameTest(test_util.TensorFlowTestCase): + + def testParseTensorNameWithoutSlicing(self): + (tensor_name, + tensor_slicing) = command_parser.parse_tensor_name_with_slicing( + "hidden/weights/Variable:0") + + self.assertEqual("hidden/weights/Variable:0", tensor_name) + self.assertEqual("", tensor_slicing) + + def testParseTensorNameWithSlicing(self): + (tensor_name, + tensor_slicing) = command_parser.parse_tensor_name_with_slicing( + "hidden/weights/Variable:0[:, 1]") + + self.assertEqual("hidden/weights/Variable:0", tensor_name) + self.assertEqual("[:, 1]", tensor_slicing) + + +class ValidateSlicingStringTest(test_util.TensorFlowTestCase): + + def testValidateValidSlicingStrings(self): + self.assertTrue(command_parser.validate_slicing_string("[1]")) + self.assertTrue(command_parser.validate_slicing_string("[2,3]")) + self.assertTrue(command_parser.validate_slicing_string("[4, 5, 6]")) + self.assertTrue(command_parser.validate_slicing_string("[7,:, :]")) + + def testValidateInvalidSlicingStrings(self): + self.assertFalse(command_parser.validate_slicing_string("")) + self.assertFalse(command_parser.validate_slicing_string("[1,")) + self.assertFalse(command_parser.validate_slicing_string("2,3]")) + self.assertFalse(command_parser.validate_slicing_string("[4, foo()]")) + self.assertFalse(command_parser.validate_slicing_string("[5, bar]")) + + +if __name__ == "__main__": + googletest.main() diff --git a/tensorflow/python/debug/cli/curses_ui.py b/tensorflow/python/debug/cli/curses_ui.py index 5aafe541c8d..bcdd675f9b1 100644 --- a/tensorflow/python/debug/cli/curses_ui.py +++ b/tensorflow/python/debug/cli/curses_ui.py @@ -25,6 +25,7 @@ import sys from six.moves import xrange # pylint: disable=redefined-builtin +from tensorflow.python.debug.cli import command_parser from tensorflow.python.debug.cli import debugger_cli_common @@ -444,18 +445,11 @@ class CursesUI(object): args: (list of str) The command arguments (i.e., not including the prefix). """ - - # TODO(cais): Support parsing of arguments surrounded by pairs of quotes - # and with spaces in them. - command = command.strip() if not command: return "", [] - # Split and remove extra spaces. - command_items = command.split(" ") - command_items = [item for item in command_items if item] - + command_items = command_parser.parse_command(command) return command_items[0], command_items[1:] def _screen_gather_textbox_str(self): diff --git a/tensorflow/python/debug/examples/README.md b/tensorflow/python/debug/examples/README.md index 5a69749e788..26219600a8f 100644 --- a/tensorflow/python/debug/examples/README.md +++ b/tensorflow/python/debug/examples/README.md @@ -143,6 +143,7 @@ Try the following commands at the `tfdbg>` prompt: | Command example | Explanation | | ------------- |:--------------------- | | `pt hidden/Relu:0` | Print the value of the tensor `hidden/Relu:0`. | +| `pt hidden/Relu:0[:, 1]` | Print a subarray of the tensor `hidden/Relu:0`, using numpy-style array slicing. | | `ni -a hidden/Relu` | Displays information about the node `hidden/Relu`, including node attributes. | | `li -r hidden/Relu:0` | List the inputs to the node `hidden/Relu`, recursively, i.e., the input tree. | | `lo -r hidden/Relu:0` | List the recipients of the output of the node `hidden/Relu`, recursively, i.e., the output recipient tree. | diff --git a/tensorflow/python/framework/cpp_shape_inference.cc b/tensorflow/python/framework/cpp_shape_inference.cc index acf7aa8a1da..0d8703fe8fe 100644 --- a/tensorflow/python/framework/cpp_shape_inference.cc +++ b/tensorflow/python/framework/cpp_shape_inference.cc @@ -74,7 +74,6 @@ Status RunCppShapeInferenceImpl( // Run shape inference. tensorflow::shape_inference::InferenceContext c(&node, op_reg_data->op_def, - {} /* input_shape_strings */, input_shapes, input_tensors); TF_RETURN_IF_ERROR(c.construction_status()); diff --git a/tensorflow/python/framework/gen_docs_combined.py b/tensorflow/python/framework/gen_docs_combined.py index e512362a26d..f7d0351e71c 100644 --- a/tensorflow/python/framework/gen_docs_combined.py +++ b/tensorflow/python/framework/gen_docs_combined.py @@ -63,6 +63,7 @@ def module_names(): "tf.contrib.copy_graph", "tf.contrib.crf", "tf.contrib.distributions", + "tf.contrib.distributions.bijector", "tf.contrib.ffmpeg", "tf.contrib.framework", "tf.contrib.graph_editor", @@ -212,6 +213,9 @@ def all_libraries(module_to_name, members, documented): library("contrib.crf", "CRF (contrib)", tf.contrib.crf), library("contrib.distributions", "Statistical distributions (contrib)", tf.contrib.distributions), + library("contrib.distributions.bijector", + "Random variable transformations (contrib)", + tf.contrib.distributions.bijector), library("contrib.ffmpeg", "FFmpeg (contrib)", ffmpeg), library("contrib.framework", "Framework (contrib)", tf.contrib.framework), library("contrib.graph_editor", "Graph Editor (contrib)", diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 318e6568681..50f914d1f88 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -513,13 +513,17 @@ class Tensor(object): # ... ``` + This disallows ambiguities between testing the Python value vs testing the + dynamic condition of the `Tensor`. + Raises: `TypeError`. """ raise TypeError("Using a `tf.Tensor` as a Python `bool` is not allowed. " "Use `if t is not None:` instead of `if t:` to test if a " - "tensor is defined, and use the logical TensorFlow ops " - "to test the value of a tensor.") + "tensor is defined, and use TensorFlow ops such as " + "tf.cond to execute subgraphs conditioned on the value of " + "a tensor.") def __nonzero__(self): """Dummy method to prevent a tensor from being used as a Python `bool`. @@ -531,8 +535,9 @@ class Tensor(object): """ raise TypeError("Using a `tf.Tensor` as a Python `bool` is not allowed. " "Use `if t is not None:` instead of `if t:` to test if a " - "tensor is defined, and use the logical TensorFlow ops " - "to test the value of a tensor.") + "tensor is defined, and use TensorFlow ops such as " + "tf.cond to execute subgraphs conditioned on the value of " + "a tensor.") def eval(self, feed_dict=None, session=None): """Evaluates this tensor in a `Session`. @@ -2157,6 +2162,16 @@ class Graph(object): """ self._finalized = True + def _unsafe_unfinalize(self): + """Opposite of `finalize`. Internal interface. + + NOTE: Unfinalizing a graph could have negative impact on performance, + especially in a multi-threaded environment. Unfinalizing a graph + when it is in use by a Session may lead to undefined behavior. Ensure + that all sessions using a graph are closed before calling this method. + """ + self._finalized = False + def _get_control_flow_context(self): """Returns the current control flow context. diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index 5bac51d34ac..34c6b326b4e 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -364,6 +364,10 @@ class CreateOpTest(test_util.TensorFlowTestCase): with self.assertRaises(RuntimeError): g.create_op("const", [], [dtypes.float32], None, name="myop1") + # Test unfinalize. + g._unsafe_unfinalize() + g.create_op("const", [], [dtypes.float32], None, name="myop1") + class ApplyOpTest(test_util.TensorFlowTestCase): diff --git a/tensorflow/python/kernel_tests/linalg_ops_test.py b/tensorflow/python/kernel_tests/linalg_ops_test.py index 5b56b547d6f..4ef4af4d550 100644 --- a/tensorflow/python/kernel_tests/linalg_ops_test.py +++ b/tensorflow/python/kernel_tests/linalg_ops_test.py @@ -53,5 +53,186 @@ class CholeskySolveGpuTest(CholeskySolveTest): _use_gpu = True +class EyeTest(tf.test.TestCase): + + def test_non_batch_2x2(self): + num_rows = 2 + dtype = np.float32 + np_eye = np.eye(num_rows).astype(dtype) + with self.test_session(): + eye = tf.eye(num_rows, dtype=dtype) + self.assertAllEqual((num_rows, num_rows), eye.get_shape()) + self.assertAllEqual(np_eye, eye.eval()) + + def test_non_batch_2x3(self): + num_rows = 2 + num_columns = 3 + dtype = np.float32 + np_eye = np.eye(num_rows, num_columns).astype(dtype) + with self.test_session(): + eye = tf.eye(num_rows, num_columns=num_columns, dtype=dtype) + self.assertAllEqual((num_rows, num_columns), eye.get_shape()) + self.assertAllEqual(np_eye, eye.eval()) + + def test_1x3_batch_4x4(self): + num_rows = 4 + batch_shape = [1, 3] + dtype = np.float32 + np_eye = np.eye(num_rows).astype(dtype) + with self.test_session(): + eye = tf.eye(num_rows, batch_shape=batch_shape, dtype=dtype) + self.assertAllEqual(batch_shape + [num_rows, num_rows], eye.get_shape()) + eye_v = eye.eval() + for i in range(batch_shape[0]): + for j in range(batch_shape[1]): + self.assertAllEqual(np_eye, eye_v[i, j, :, :]) + + def test_1x3_batch_4x4_dynamic(self): + num_rows = 4 + batch_shape = [1, 3] + dtype = np.float32 + np_eye = np.eye(num_rows).astype(dtype) + with self.test_session(): + num_rows_ph = tf.placeholder(tf.int32) + batch_shape_ph = tf.placeholder(tf.int32) + eye = tf.eye( + num_rows_ph, + batch_shape=batch_shape_ph, + dtype=dtype) + eye_v = eye.eval( + feed_dict={ + num_rows_ph: num_rows, + batch_shape_ph: batch_shape}) + for i in range(batch_shape[0]): + for j in range(batch_shape[1]): + self.assertAllEqual(np_eye, eye_v[i, j, :, :]) + + def test_1x3_batch_5x4(self): + num_rows = 5 + num_columns = 4 + batch_shape = [1, 3] + dtype = np.float32 + np_eye = np.eye(num_rows, num_columns).astype(dtype) + with self.test_session(): + eye = tf.eye( + num_rows, + num_columns=num_columns, + batch_shape=batch_shape, + dtype=dtype) + self.assertAllEqual( + batch_shape + [num_rows, num_columns], + eye.get_shape()) + eye_v = eye.eval() + for i in range(batch_shape[0]): + for j in range(batch_shape[1]): + self.assertAllEqual(np_eye, eye_v[i, j, :, :]) + + def test_1x3_batch_5x4_dynamic(self): + num_rows = 5 + num_columns = 4 + batch_shape = [1, 3] + dtype = np.float32 + np_eye = np.eye(num_rows, num_columns).astype(dtype) + with self.test_session(): + num_rows_ph = tf.placeholder(tf.int32) + num_columns_ph = tf.placeholder(tf.int32) + batch_shape_ph = tf.placeholder(tf.int32) + eye = tf.eye( + num_rows_ph, + num_columns=num_columns_ph, + batch_shape=batch_shape_ph, + dtype=dtype) + eye_v = eye.eval( + feed_dict={ + num_rows_ph: num_rows, + num_columns_ph: num_columns, + batch_shape_ph: batch_shape}) + for i in range(batch_shape[0]): + for j in range(batch_shape[1]): + self.assertAllEqual(np_eye, eye_v[i, j, :, :]) + + def test_non_batch_0x0(self): + num_rows = 0 + dtype = np.int64 + np_eye = np.eye(num_rows).astype(dtype) + with self.test_session(): + eye = tf.eye(num_rows, dtype=dtype) + self.assertAllEqual((num_rows, num_rows), eye.get_shape()) + self.assertAllEqual(np_eye, eye.eval()) + + def test_non_batch_2x0(self): + num_rows = 2 + num_columns = 0 + dtype = np.int64 + np_eye = np.eye(num_rows, num_columns).astype(dtype) + with self.test_session(): + eye = tf.eye(num_rows, num_columns=num_columns, dtype=dtype) + self.assertAllEqual((num_rows, num_columns), eye.get_shape()) + self.assertAllEqual(np_eye, eye.eval()) + + def test_non_batch_0x2(self): + num_rows = 0 + num_columns = 2 + dtype = np.int64 + np_eye = np.eye(num_rows, num_columns).astype(dtype) + with self.test_session(): + eye = tf.eye(num_rows, num_columns=num_columns, dtype=dtype) + self.assertAllEqual((num_rows, num_columns), eye.get_shape()) + self.assertAllEqual(np_eye, eye.eval()) + + def test_1x3_batch_0x0(self): + num_rows = 0 + batch_shape = [1, 3] + dtype = np.float32 + np_eye = np.eye(num_rows).astype(dtype) + with self.test_session(): + eye = tf.eye(num_rows, batch_shape=batch_shape, dtype=dtype) + self.assertAllEqual((1, 3, 0, 0), eye.get_shape()) + eye_v = eye.eval() + for i in range(batch_shape[0]): + for j in range(batch_shape[1]): + self.assertAllEqual(np_eye, eye_v[i, j, :, :]) + + def test_1x3_batch_2x0(self): + num_rows = 2 + num_columns = 0 + batch_shape = [1, 3] + dtype = np.float32 + np_eye = np.eye(num_rows, num_columns).astype(dtype) + with self.test_session(): + eye = tf.eye( + num_rows, + num_columns=num_columns, + batch_shape=batch_shape, + dtype=dtype) + self.assertAllEqual( + batch_shape + [num_rows, num_columns], + eye.get_shape()) + eye_v = eye.eval() + for i in range(batch_shape[0]): + for j in range(batch_shape[1]): + self.assertAllEqual(np_eye, eye_v[i, j, :, :]) + + def test_1x3_batch_0x2(self): + num_rows = 0 + num_columns = 2 + batch_shape = [1, 3] + dtype = np.float32 + np_eye = np.eye(num_rows, num_columns).astype(dtype) + with self.test_session(): + eye = tf.eye( + num_rows, + num_columns=num_columns, + batch_shape=batch_shape, + dtype=dtype) + self.assertAllEqual( + batch_shape + [num_rows, num_columns], + eye.get_shape()) + eye_v = eye.eval() + for i in range(batch_shape[0]): + for j in range(batch_shape[1]): + self.assertAllEqual(np_eye, eye_v[i, j, :, :]) + + if __name__ == '__main__': tf.test.main() diff --git a/tensorflow/python/kernel_tests/shape_ops_test.py b/tensorflow/python/kernel_tests/shape_ops_test.py index 7826800d0fb..465a02bfe69 100644 --- a/tensorflow/python/kernel_tests/shape_ops_test.py +++ b/tensorflow/python/kernel_tests/shape_ops_test.py @@ -354,11 +354,10 @@ class TileTest(tf.test.TestCase): bytes: (tf.string, bytes) } for dtype_np, (dtype_tf, cast) in types_to_test.items(): - with self.test_session(): + with self.test_session(use_gpu=True): inp = np.random.rand(4, 1).astype(dtype_np) - a = tf.constant([cast(x) for x in inp.ravel(order="C")], - shape=[4, 1], - dtype=dtype_tf) + a = tf.constant([cast(x) for x in inp.ravel(order="C")], shape=[4, 1], + dtype=dtype_tf) tiled = tf.tile(a, [1, 4]) result = tiled.eval() self.assertEqual(result.shape, (4, 4)) diff --git a/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py b/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py index e87e8b02b23..29659d39c51 100644 --- a/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py +++ b/tensorflow/python/kernel_tests/sparse_conditional_accumulator_test.py @@ -539,6 +539,34 @@ class IndexedSlicesConditionalAccumulatorTest(tf.test.TestCase): val = sess.run(q.take_indexed_slices_grad(1)) self.assertAllEqual(val.dense_shape, [-1, 2, 2, 3]) + def testApplyGradtInt32IndicesAndShape(self): + with self.test_session() as sess: + q = tf.SparseConditionalAccumulator( + tf.float32, name="Q", shape=tf.TensorShape([3, 3])) + accum_op = q.apply_grad( + grad_indices=tf.constant( + [0, 2], dtype=tf.int32), + grad_values=tf.constant( + [[0, 0, 1], [3, 0, 4]], dtype=tf.float32), + grad_shape=tf.constant( + [3, 3], dtype=tf.int32)) + accum_op.run() + accum_op = q.apply_indexed_slices_grad( + tf.IndexedSlices( + indices=tf.constant( + [0, 2], dtype=tf.int32), + values=tf.constant( + [[0, 0, 1], [3, 0, 4]], dtype=tf.float32), + dense_shape=tf.constant( + [3, 3], dtype=tf.int32))) + accum_op.run() + self.assertEqual(q.num_accumulated().eval(), 2) + + val = sess.run(q.take_indexed_slices_grad(1)) + self.assertAllEqual(val.indices, [0, 2]) + self.assertAllEqual(val.values, [[0, 0, 1], [3, 0, 4]]) + self.assertAllEqual(val.dense_shape, [3, 3]) + if __name__ == "__main__": tf.test.main() diff --git a/tensorflow/python/kernel_tests/summary_tensor_op_test.py b/tensorflow/python/kernel_tests/summary_tensor_op_test.py index e65fd66f2e9..7ea8e6680be 100644 --- a/tensorflow/python/kernel_tests/summary_tensor_op_test.py +++ b/tensorflow/python/kernel_tests/summary_tensor_op_test.py @@ -38,12 +38,12 @@ class SummaryOpsTest(tf.test.TestCase): def testNodeNames(self): with self.test_session() as sess: c = tf.constant(1) - s1 = tf.summary.tensor_summary("", c, name="s1") + s1 = tf.summary.tensor_summary("s1", c) with tf.name_scope("foo"): - s2 = tf.summary.tensor_summary("", c, name="s2") + s2 = tf.summary.tensor_summary("s2", c) with tf.name_scope("zod"): - s3 = tf.summary.tensor_summary("", c, name="s3") - s4 = tf.summary.tensor_summary("", c) + s3 = tf.summary.tensor_summary("s3", c) + s4 = tf.summary.tensor_summary("TensorSummary", c) summ1, summ2, summ3, summ4 = sess.run([s1, s2, s3, s4]) v1 = self._SummarySingleValue(summ1) @@ -61,7 +61,7 @@ class SummaryOpsTest(tf.test.TestCase): def testScalarSummary(self): with self.test_session() as sess: const = tf.constant(10.0) - summ = tf.summary.tensor_summary("foo", const) + summ = tf.summary.scalar("foo", const) result = sess.run(summ) value = self._SummarySingleValue(result) diff --git a/tensorflow/python/lib/io/file_io.i b/tensorflow/python/lib/io/file_io.i index 55a9d503f3c..9688a9e02ad 100644 --- a/tensorflow/python/lib/io/file_io.i +++ b/tensorflow/python/lib/io/file_io.i @@ -176,10 +176,11 @@ tensorflow::io::BufferedInputStream* CreateBufferedInputStream( return nullptr; } std::unique_ptr input_stream( - new tensorflow::io::RandomAccessInputStream(file.release())); + new tensorflow::io::RandomAccessInputStream( + file.release(), true /* owns_file */)); std::unique_ptr buffered_input_stream( - new tensorflow::io::BufferedInputStream(input_stream.release(), - buffer_size)); + new tensorflow::io::BufferedInputStream( + input_stream.release(), buffer_size, true /* owns_input_stream */)); return buffered_input_stream.release(); } @@ -207,13 +208,6 @@ void AppendToFile(const string& file_content, tensorflow::WritableFile* file, } } -void FlushWritableFile(tensorflow::WritableFile* file, TF_Status* out_status) { - tensorflow::Status status = file->Flush(); - if (!status.ok()) { - Set_TF_Status_from_Status(out_status, status); - } -} - string ReadFromStream(tensorflow::io::BufferedInputStream* stream, size_t bytes, TF_Status* out_status) { @@ -226,14 +220,6 @@ string ReadFromStream(tensorflow::io::BufferedInputStream* stream, return result; } -void SeekInStream(tensorflow::io::BufferedInputStream* stream, int64 position, - TF_Status* out_status) { - tensorflow::Status status = stream->Seek(position); - if (!status.ok()) { - Set_TF_Status_from_Status(out_status, status); - } -} - %} // Ensure that the returned object is destroyed when its wrapper is @@ -266,24 +252,28 @@ tensorflow::WritableFile* CreateWritableFile(const string& filename, TF_Status* out_status); void AppendToFile(const string& file_content, tensorflow::WritableFile* file, TF_Status* out_status); -void FlushWritableFile(tensorflow::WritableFile* file, TF_Status* out_status); string ReadFromStream(tensorflow::io::BufferedInputStream* stream, size_t bytes, TF_Status* out_status); -void SeekInStream(tensorflow::io::BufferedInputStream* stream, int64 position, - TF_Status* out_status); + +%ignore tensorflow::Status::operator=; +%include "tensorflow/core/lib/core/status.h" %ignoreall %unignore tensorflow::io::BufferedInputStream; %unignore tensorflow::io::BufferedInputStream::~BufferedInputStream; %unignore tensorflow::io::BufferedInputStream::ReadLineAsString; +%unignore tensorflow::io::BufferedInputStream::Seek; %unignore tensorflow::io::BufferedInputStream::Tell; %unignore tensorflow::WritableFile; +%unignore tensorflow::WritableFile::Close; +%unignore tensorflow::WritableFile::Flush; %unignore tensorflow::WritableFile::~WritableFile; %include "tensorflow/core/platform/file_system.h" %include "tensorflow/core/lib/io/inputstream_interface.h" %include "tensorflow/core/lib/io/buffered_inputstream.h" %unignoreall +%include "tensorflow/c/tf_status_helper.h" %include "tensorflow/core/lib/io/path.h" -%include "tensorflow/core/platform/file_statistics.h" +%include "tensorflow/core/platform/file_statistics.h" \ No newline at end of file diff --git a/tensorflow/python/lib/io/file_io.py b/tensorflow/python/lib/io/file_io.py index a250903da8f..b8295dcb65a 100644 --- a/tensorflow/python/lib/io/file_io.py +++ b/tensorflow/python/lib/io/file_io.py @@ -114,7 +114,8 @@ class FileIO(object): """Seeks to the position in the file.""" self._preread_check() with errors.raise_exception_on_not_ok_status() as status: - return pywrap_tensorflow.SeekInStream(self._read_buf, position, status) + ret_status = self._read_buf.Seek(position) + pywrap_tensorflow.Set_TF_Status_from_Status(status, ret_status) def readline(self): r"""Reads the next line from the file. Leaves the '\n' at the end.""" @@ -168,12 +169,16 @@ class FileIO(object): """ if self._writable_file: with errors.raise_exception_on_not_ok_status() as status: - pywrap_tensorflow.FlushWritableFile(self._writable_file, status) + ret_status = self._writable_file.Flush() + pywrap_tensorflow.Set_TF_Status_from_Status(status, ret_status) def close(self): """Closes FileIO. Should be called for the WritableFile to be flushed.""" self._read_buf = None - self.flush() + if self._writable_file: + with errors.raise_exception_on_not_ok_status() as status: + ret_status = self._writable_file.Close() + pywrap_tensorflow.Set_TF_Status_from_Status(status, ret_status) self._writable_file = None diff --git a/tensorflow/python/lib/io/file_io_test.py b/tensorflow/python/lib/io/file_io_test.py index efaba32738b..cd4ccf35781 100644 --- a/tensorflow/python/lib/io/file_io_test.py +++ b/tensorflow/python/lib/io/file_io_test.py @@ -59,6 +59,15 @@ class FileIoTest(tf.test.TestCase): file_contents = f.read() self.assertEqual(b"begin\na1\na2\n", file_contents) + def testMultipleFiles(self): + file_prefix = os.path.join(self._base_dir, "temp_file") + for i in range(5000): + f = file_io.FileIO(file_prefix + str(i), mode="w+") + f.write("testing") + f.flush() + self.assertEquals(b"testing", f.read()) + f.close() + def testMultipleWrites(self): file_path = os.path.join(self._base_dir, "temp_file") with file_io.FileIO(file_path, mode="w") as f: @@ -358,6 +367,9 @@ class FileIoTest(tf.test.TestCase): self.assertEqual(0, f.tell()) self.assertEqual("testing1\n", f.readline()) + with self.assertRaises(errors.InvalidArgumentError): + f.seek(-1) + def testReadingIterator(self): file_path = os.path.join(self._base_dir, "temp_file") data = ["testing1\n", "testing2\n", "testing3\n", "\n", "testing5"] diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index 5d2b43e8ebe..fd1c31f5ab1 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -2746,9 +2746,9 @@ def case(pred_fn_pairs, default, exclusive=False, name="case"): in `pred_fn_pairs` as well as `default` should return the same number and types of tensors. - If `exclusive==True`, all predicates are evaluated, and a logging operation - with an error is returned if more than one of the predicates evaluates to - True. If `exclusive==False`, execution stops are the first predicate which + If `exclusive==True`, all predicates are evaluated, and an exception is + thrown if more than one of the predicates evaluates to `True`. + If `exclusive==False`, execution stops are the first predicate which evaluates to True, and the tensors generated by the corresponding function are returned immediately. If none of the predicates evaluate to True, this operation returns the tensors generated by `default`. @@ -2792,7 +2792,7 @@ def case(pred_fn_pairs, default, exclusive=False, name="case"): pred_fn_pairs: Dict or list of pairs of a boolean scalar tensor and a callable which returns a list of tensors. default: A callable that returns a list of tensors. - exclusive: True iff more than one predicate is allowed to evaluate to True. + exclusive: True iff at most one predicate is allowed to evaluate to `True`. name: A name for this operation (optional). Returns: diff --git a/tensorflow/python/ops/data_flow_ops.py b/tensorflow/python/ops/data_flow_ops.py index 3d3149f640e..06d7308b384 100644 --- a/tensorflow/python/ops/data_flow_ops.py +++ b/tensorflow/python/ops/data_flow_ops.py @@ -1395,9 +1395,10 @@ class SparseConditionalAccumulator(ConditionalAccumulatorBase): return gen_data_flow_ops.sparse_accumulator_apply_gradient( self._accumulator_ref, local_step=local_step, - gradient_indices=grad_indices, + gradient_indices=math_ops.to_int64(grad_indices), gradient_values=grad_values, - gradient_shape=[] if grad_shape is None else grad_shape, + gradient_shape=math_ops.to_int64([] if grad_shape is None else + grad_shape), has_known_shape=(grad_shape is not None), name=name) diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py index e1f0ba51f8f..b55a8003622 100644 --- a/tensorflow/python/ops/init_ops.py +++ b/tensorflow/python/ops/init_ops.py @@ -139,7 +139,7 @@ def constant_initializer(value=0, dtype=dtypes.float32): >>> x = tf.get_variable('x', shape=[2, 3], initializer=init) ValueError: Too many elements provided. Needed at most 6, but received 8 - ``` + ``` """ def _initializer(shape, dtype=dtype, partition_info=None): return constant_op.constant(value, dtype=dtype, shape=shape) diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py index 52740063b9e..da411044384 100644 --- a/tensorflow/python/ops/linalg_ops.py +++ b/tensorflow/python/ops/linalg_ops.py @@ -19,7 +19,9 @@ from __future__ import division from __future__ import print_function from tensorflow.python.framework import common_shapes +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_linalg_ops from tensorflow.python.ops import math_ops # go/tf-wildcard-import @@ -84,6 +86,65 @@ def cholesky_solve(chol, rhs, name=None): return x +def eye( + num_rows, + num_columns=None, + batch_shape=None, + dtype=dtypes.float32, + name=None): + """Construct an identity matrix, or a batch of matrices. + + ```python + # Construct one identity matrix. + tf.eye(2) + ==> [[1., 0.], + [0., 1.]] + + # Construct a batch of 3 identity matricies, each 2 x 2. + # batch_identity[i, :, :] is a 2 x 2 identity matrix, i = 0, 1, 2. + batch_identity = tf.eye(2, batch_shape=[3]) + + # Construct one 2 x 3 "identity" matrix + tf.eye(2, num_columns=3) + ==> [[ 1., 0., 0.], + [ 0., 1., 0.]] + ``` + + Args: + num_rows: Non-negative `int32` scalar `Tensor` giving the number of rows + in each batch matrix. + num_columns: Optional non-negative `int32` scalar `Tensor` giving the number + of columns in each batch matrix. Defaults to `num_rows`. + batch_shape: `int32` `Tensor`. If provided, returned `Tensor` will have + leading batch dimensions of this shape. + dtype: The type of an element in the resulting `Tensor` + name: A name for this `Op`. Defaults to "eye". + + Returns: + A `Tensor` of shape `batch_shape + [num_rows, num_columns]` + """ + with ops.name_scope( + name, default_name="eye", values=[num_rows, num_columns, batch_shape]): + + batch_shape = [] if batch_shape is None else batch_shape + batch_shape = ops.convert_to_tensor( + batch_shape, name="shape", dtype=dtypes.int32) + + if num_columns is None: + diag_size = num_rows + else: + diag_size = math_ops.minimum(num_rows, num_columns) + diag_shape = array_ops.concat(0, (batch_shape, [diag_size])) + diag_ones = array_ops.ones(diag_shape, dtype=dtype) + + if num_columns is None: + return array_ops.matrix_diag(diag_ones) + else: + shape = array_ops.concat(0, (batch_shape, [num_rows, num_columns])) + zero_matrix = array_ops.zeros(shape, dtype=dtype) + return array_ops.matrix_set_diag(zero_matrix, diag_ones) + + def matrix_solve_ls(matrix, rhs, l2_regularizer=0.0, fast=True, name=None): r"""Solves one or more linear least-squares problems. diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 4c61669d94a..23f141039a3 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -80,6 +80,7 @@ functions on matrices to your graph. @@trace @@transpose +@@eye @@matrix_diag @@matrix_diag_part @@matrix_band_part diff --git a/tensorflow/python/ops/summary_ops.py b/tensorflow/python/ops/summary_ops.py index 0feb456fe9a..d0c6fe8aa5e 100644 --- a/tensorflow/python/ops/summary_ops.py +++ b/tensorflow/python/ops/summary_ops.py @@ -18,13 +18,14 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from google.protobuf import json_format +from tensorflow.core.framework import summary_pb2 from tensorflow.python.framework import common_shapes from tensorflow.python.framework import ops from tensorflow.python.ops import gen_logging_ops # go/tf-wildcard-import # pylint: disable=wildcard-import from tensorflow.python.ops.gen_logging_ops import * - # pylint: enable=wildcard-import @@ -35,12 +36,11 @@ def _Collect(val, collections, default_collections): ops.add_to_collection(key, val) -def tensor_summary(display_name, # pylint: disable=invalid-name - tensor, - description="", - labels=None, - collections=None, - name=None): +def tensor_summary( # pylint: disable=invalid-name + name, + tensor, + summary_description=None, + collections=None): # pylint: disable=line-too-long """Outputs a `Summary` protocol buffer with a serialized tensor.proto. @@ -49,19 +49,12 @@ def tensor_summary(display_name, # pylint: disable=invalid-name has one summary value containing the input tensor. Args: - display_name: A name to associate with the data series. Will be used to - organize output data and as a name in visualizers. + name: A name for the generated node. Will also serve as the series name in + TensorBoard. tensor: A tensor of any type and shape to serialize. - description: An optional long description of the data being output. - labels: a list of strings used to specify how the data can be interpreted, - for example: - * `'encoding:image/jpg'` for a string tensor containing jpg images - * `'encoding:proto/X/Y/foo.proto'` for a string tensor containing Foos - * `'group:$groupName/$roleInGroup'` for a tensor that is related to - other tensors that are all in a group. (e.g. bounding boxes and images) + summary_description: Optional summary_pb2.SummaryDescription() collections: Optional list of graph collections keys. The new summary op is added to these collections. Defaults to `[GraphKeys.SUMMARIES]`. - name: An optional name for the generated node (optional). Returns: A scalar `Tensor` of type `string`. The serialized `Summary` protocol @@ -69,12 +62,14 @@ def tensor_summary(display_name, # pylint: disable=invalid-name """ # pylint: enable=line-too-long - with ops.name_scope(name, "TensorSummary", [tensor]) as scope: + if summary_description is None: + summary_description = summary_pb2.SummaryDescription() + + description = json_format.MessageToJson(summary_description) + with ops.name_scope(name, None, [tensor]) as scope: val = gen_logging_ops._tensor_summary( - display_name=display_name, tensor=tensor, description=description, - labels=labels, name=scope) _Collect(val, collections, [ops.GraphKeys.SUMMARIES]) return val diff --git a/tensorflow/python/platform/resource_loader.py b/tensorflow/python/platform/resource_loader.py index 5bcd054fd2d..3daf2d8729b 100644 --- a/tensorflow/python/platform/resource_loader.py +++ b/tensorflow/python/platform/resource_loader.py @@ -23,8 +23,6 @@ import inspect import os.path import sys -from tensorflow.python.platform import tf_logging as logging - def load_resource(path): """Load the resource at given path, where path is relative to tensorflow/. @@ -43,12 +41,8 @@ def load_resource(path): os.path.dirname(__file__), os.pardir, os.pardir)) path = os.path.join(tensorflow_root, path) path = os.path.abspath(path) - try: - with open(path, 'rb') as f: - return f.read() - except IOError as e: - logging.warning('IOError %s on path %s', e, path) - raise e + with open(path, 'rb') as f: + return f.read() # pylint: disable=protected-access diff --git a/tensorflow/python/saved_model/example/saved_model_half_plus_two.py b/tensorflow/python/saved_model/example/saved_model_half_plus_two.py index 5d084a319f0..71cff17c6ac 100644 --- a/tensorflow/python/saved_model/example/saved_model_half_plus_two.py +++ b/tensorflow/python/saved_model/example/saved_model_half_plus_two.py @@ -29,13 +29,36 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os import tensorflow as tf from tensorflow.core.protobuf import meta_graph_pb2 +from tensorflow.python.lib.io import file_io from tensorflow.python.saved_model import builder as saved_model_builder from tensorflow.python.saved_model import constants from tensorflow.python.saved_model import signature_constants from tensorflow.python.saved_model import utils +from tensorflow.python.util import compat + + +def _write_assets(assets_directory, assets_filename): + """Writes asset files to be used with SavedModel for half plus two. + + Args: + assets_directory: The directory to which the assets should be written. + assets_filename: Name of the file to which the asset contents should be + written. + + Returns: + The path to which the assets file was written. + """ + if not file_io.file_exists(assets_directory): + file_io.recursive_create_dir(assets_directory) + + path = os.path.join( + compat.as_bytes(assets_directory), compat.as_bytes(assets_filename)) + file_io.write_string_to_file(path, "asset-file-contents") + return path def _generate_saved_model_for_half_plus_two(export_dir, as_text=False): @@ -64,6 +87,17 @@ def _generate_saved_model_for_half_plus_two(export_dir, as_text=False): x = tf.identity(tf_example["x"], name="x") y = tf.add(tf.mul(a, x), b, name="y") + # Create an assets file that can be saved and restored as part of the + # SavedModel. + original_assets_directory = "/tmp/original/export/assets" + original_assets_filename = "foo.txt" + original_assets_filepath = _write_assets(original_assets_directory, + original_assets_filename) + + # Set up the assets collection. + assets_filepath = tf.constant(original_assets_filepath) + tf.add_to_collection(tf.GraphKeys.ASSET_FILEPATHS, assets_filepath) + # Set up the signature for regression with input and output tensor # specification. input_tensor = meta_graph_pb2.TensorInfo() @@ -84,16 +118,19 @@ def _generate_saved_model_for_half_plus_two(export_dir, as_text=False): signature_def_map={ signature_constants.REGRESS_METHOD_NAME: signature_def - }) + }, + assets_collection=tf.get_collection(tf.GraphKeys.ASSET_FILEPATHS)) builder.save(as_text) def main(_): export_dir_pb = "/tmp/saved_model/half_plus_two" _generate_saved_model_for_half_plus_two(export_dir_pb) + print("SavedModel generated at: %s" % export_dir_pb) export_dir_pbtxt = "/tmp/saved_model/half_plus_two_pbtxt" _generate_saved_model_for_half_plus_two(export_dir_pbtxt, as_text=True) + print("SavedModel generated at: %s" % export_dir_pbtxt) if __name__ == "__main__": diff --git a/tensorflow/python/summary/summary.py b/tensorflow/python/summary/summary.py index 46e33aae271..b47e4e4e447 100644 --- a/tensorflow/python/summary/summary.py +++ b/tensorflow/python/summary/summary.py @@ -27,34 +27,25 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.framework import ops +from tensorflow.core.framework import summary_pb2 from tensorflow.python.framework import tensor_shape from tensorflow.python.framework.dtypes import as_dtype from tensorflow.python.ops.summary_ops import tensor_summary from tensorflow.python.util.all_util import make_all -SCALAR_SUMMARY_LABEL = "tf_summary_type:scalar" - -def scalar(display_name, - tensor, - description="", - labels=None, - collections=None, - name=None): +def scalar(name, tensor, summary_description=None, collections=None): """Outputs a `Summary` protocol buffer containing a single scalar value. The generated Summary has a Tensor.proto containing the input Tensor. Args: - display_name: A name to associate with the data series. Will be used to - organize output data and as a name in visualizers. + name: A name for the generated node. Will also serve as the series name in + TensorBoard. tensor: A tensor containing a single floating point or integer value. - description: An optional long description of the data being output. - labels: a list of strings used to attach metadata. + summary_description: Optional summary_description_pb2.SummaryDescription collections: Optional list of graph collections keys. The new summary op is added to these collections. Defaults to `[GraphKeys.SUMMARIES]`. - name: An optional name for the generated node (optional). Returns: A scalar `Tensor` of type `string`. Which contains a `Summary` protobuf. @@ -62,7 +53,6 @@ def scalar(display_name, Raises: ValueError: If tensor has the wrong shape or type. """ - dtype = as_dtype(tensor.dtype) if dtype.is_quantized or not (dtype.is_integer or dtype.is_floating): raise ValueError("Can't create scalar summary for type %s." % dtype) @@ -71,17 +61,11 @@ def scalar(display_name, if not shape.is_compatible_with(tensor_shape.scalar()): raise ValueError("Can't create scalar summary for shape %s." % shape) - if labels is None: - labels = [] - else: - labels = labels[:] # Otherwise we would mutate the input argument + if summary_description is None: + summary_description = summary_pb2.SummaryDescription() + summary_description.type_hint = "scalar" - labels.append(SCALAR_SUMMARY_LABEL) - - with ops.name_scope(name, "ScalarSummary", [tensor]): - tensor = ops.convert_to_tensor(tensor) - return tensor_summary(display_name, tensor, description, labels, - collections, name) + return tensor_summary(name, tensor, summary_description, collections) __all__ = make_all(__name__) diff --git a/tensorflow/python/summary/summary_test.py b/tensorflow/python/summary/summary_test.py index fb49759ba0b..34c14dbd878 100644 --- a/tensorflow/python/summary/summary_test.py +++ b/tensorflow/python/summary/summary_test.py @@ -17,9 +17,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import six import tensorflow as tf +from google.protobuf import json_format +from tensorflow.core.framework import summary_pb2 from tensorflow.core.framework import types_pb2 @@ -64,24 +65,15 @@ class ScalarSummaryTest(tf.test.TestCase): with self.assertRaises(ValueError): tf.summary.scalar('3', c3) - def testLabelsAdded(self): - c = tf.constant(0) - - no_labels = tf.summary.scalar('2', c) - labels = tf.summary.scalar('1', c, labels=['foo']) - - def _GetLabels(n): - return n.op.get_attr('labels') - - expected_label = six.b(tf.summary.SCALAR_SUMMARY_LABEL) - self.assertEquals(_GetLabels(no_labels), [expected_label]) - self.assertEquals(_GetLabels(labels), [six.b('foo'), expected_label]) - def testTensorSummaryOpCreated(self): c = tf.constant(0) - s = tf.summary.scalar('', c) - self.assertEquals(s.op.type, 'TensorSummary') - self.assertEquals(s.op.inputs[0], c) + s = tf.summary.scalar('x', c) + self.assertEqual(s.op.type, 'TensorSummary') + self.assertEqual(s.op.inputs[0], c) + description = s.op.get_attr('description') + summary_description = summary_pb2.SummaryDescription() + json_format.Parse(description, summary_description) + self.assertEqual(summary_description.type_hint, 'scalar') if __name__ == '__main__': diff --git a/tensorflow/python/training/basic_session_run_hooks.py b/tensorflow/python/training/basic_session_run_hooks.py index 5e134777c02..e6e614c8bb6 100644 --- a/tensorflow/python/training/basic_session_run_hooks.py +++ b/tensorflow/python/training/basic_session_run_hooks.py @@ -45,6 +45,56 @@ from tensorflow.python.training.session_run_hook import SessionRunArgs from tensorflow.python.training.summary_io import SummaryWriterCache +class _SecondOrStepTimer(object): + """Timer that triggers at most once every N seconds or once every N steps. + """ + + def __init__(self, every_secs=None, every_steps=None): + self._every_secs = every_secs + self._every_steps = every_steps + self._last_triggered_step = None + self._last_triggered_time = None + + if self._every_secs is None and self._every_steps is None: + raise ValueError("Either every_secs or every_steps should be provided.") + if (self._every_secs is not None) and (self._every_steps is not None): + raise ValueError("Can not provide both every_secs and every_steps.") + + def should_trigger_for_step(self, step): + """Return true if the timer should trigger for the specified step. + + Args: + step: Training step to trigger on. + + Returns: + True if the difference between the current time and the time of the last + trigger exceeds `every_secs`, or if the difference between the current + step and the last triggered step exceeds `every_steps`. False otherwise. + """ + if self._last_triggered_step == step: + return False + + if self._last_triggered_step is None: + return True + + if self._every_secs is not None: + if time.time() >= self._last_triggered_time + self._every_secs: + return True + + if self._every_steps is not None: + if step >= self._last_triggered_step + self._every_steps: + return True + + return False + + def update_last_triggered_step(self, step): + self._last_triggered_time = time.time() + self._last_triggered_step = step + + def last_triggered_step(self): + return self._last_triggered_step + + class LoggingTensorHook(session_run_hook.SessionRunHook): """Prints given tensors every N iteration. @@ -165,26 +215,17 @@ class CheckpointSaverHook(session_run_hook.SessionRunHook): self._summary_writer = SummaryWriterCache.get(checkpoint_dir) self._save_path = os.path.join(checkpoint_dir, checkpoint_basename) self._scaffold = scaffold - self._save_secs = save_secs - self._save_steps = save_steps - self._last_saved_time = None - self._last_saved_step = None - - if save_steps is None and save_secs is None: - raise ValueError("Either save_steps or save_secs should be provided") - if (save_steps is not None) and (save_secs is not None): - raise ValueError("Can not provide both save_steps and save_secs.") + self._timer = _SecondOrStepTimer(every_secs=save_secs, + every_steps=save_steps) def begin(self): - self._last_saved_time = None - self._last_saved_step = None self._global_step_tensor = training_util.get_global_step() if self._global_step_tensor is None: raise RuntimeError( "Global step should be created to use CheckpointSaverHook.") def before_run(self, run_context): # pylint: disable=unused-argument - if self._last_saved_time is None: + if self._timer.last_triggered_step() is None: # Write graph in the first call. training_util.write_graph( ops.get_default_graph().as_graph_def(add_shapes=True), @@ -202,28 +243,18 @@ class CheckpointSaverHook(session_run_hook.SessionRunHook): def after_run(self, run_context, run_values): global_step = run_values.results - if self._last_saved_time is None: + if self._timer.should_trigger_for_step(global_step): + self._timer.update_last_triggered_step(global_step) self._save(global_step, run_context.session) - if self._save_steps is not None: - if global_step >= self._last_saved_step + self._save_steps: - self._save(global_step, run_context.session) - - if self._save_secs is not None: - if time.time() >= self._last_saved_time + self._save_secs: - self._save(global_step, run_context.session) - def end(self, session): last_step = session.run(training_util.get_global_step()) - self._save(last_step, session) + if last_step != self._timer.last_triggered_step(): + self._save(last_step, session) def _save(self, step, session): """Saves the latest checkpoint.""" - if step == self._last_saved_step: - return logging.info("Saving checkpoints for %d into %s.", step, self._save_path) - self._last_saved_time = time.time() - self._last_saved_step = step if self._saver is None: self._scaffold.saver.save(session, self._save_path, global_step=step) else: @@ -320,6 +351,7 @@ class SummarySaverHook(session_run_hook.SessionRunHook): def __init__(self, save_steps=100, + save_secs=None, output_dir=None, summary_writer=None, scaffold=None, @@ -327,7 +359,9 @@ class SummarySaverHook(session_run_hook.SessionRunHook): """Initializes a `SummarySaver` monitor. Args: - save_steps: `int`, save summaries every N steps. See `EveryN`. + save_steps: `int`, save summaries every N steps. Exactly one of + `save_secs` and `save_steps` should be set. + save_secs: `int`, save summaries every N seconds. output_dir: `string`, the directory to save the summaries to. Only used if no `summary_writer` is supplied. summary_writer: `SummaryWriter`. If `None` and an `output_dir` was passed, @@ -337,24 +371,26 @@ class SummarySaverHook(session_run_hook.SessionRunHook): buffer, as output by TF summary methods like `scalar_summary` or `merge_all_summaries`. """ - # TODO(ipolosukhin): Implement every N seconds. self._summary_op = summary_op self._summary_writer = summary_writer if summary_writer is None and output_dir: self._summary_writer = SummaryWriterCache.get(output_dir) self._scaffold = scaffold - self._save_steps = save_steps + self._timer = _SecondOrStepTimer(every_secs=save_secs, + every_steps=save_steps) # TODO(mdan): Throw an error if output_dir and summary_writer are None. def begin(self): - self._last_saved_step = None - self._request_summary = True + self._next_step = None self._global_step_tensor = training_util.get_global_step() if self._global_step_tensor is None: raise RuntimeError( "Global step should be created to use SummarySaverHook.") def before_run(self, run_context): # pylint: disable=unused-argument + self._request_summary = ( + self._next_step is None or + self._timer.should_trigger_for_step(self._next_step)) requests = {"global_step": self._global_step_tensor} if self._request_summary: if self._summary_op is not None: @@ -371,18 +407,17 @@ class SummarySaverHook(session_run_hook.SessionRunHook): global_step = run_values.results["global_step"] - if self._last_saved_step is None: + if self._next_step is None: self._summary_writer.add_session_log( SessionLog(status=SessionLog.START), global_step) if self._request_summary: - self._last_saved_step = global_step + self._timer.update_last_triggered_step(global_step) if "summary" in run_values.results: self._summary_writer.add_summary(run_values.results["summary"], global_step) - self._request_summary = ( - global_step >= self._last_saved_step + self._save_steps - 1) + self._next_step = global_step + 1 def end(self, session=None): if self._summary_writer: diff --git a/tensorflow/python/training/basic_session_run_hooks_test.py b/tensorflow/python/training/basic_session_run_hooks_test.py index b18d2b7dac7..858f4bc1a87 100644 --- a/tensorflow/python/training/basic_session_run_hooks_test.py +++ b/tensorflow/python/training/basic_session_run_hooks_test.py @@ -19,16 +19,50 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function - import shutil import tempfile import time import tensorflow as tf +from tensorflow.contrib import testing +from tensorflow.python.training import basic_session_run_hooks from tensorflow.python.training import monitored_session +class SecondOrStepTimerTest(tf.test.TestCase): + + def test_raise_in_both_secs_and_steps(self): + with self.assertRaises(ValueError): + basic_session_run_hooks._SecondOrStepTimer(every_secs=2.0, every_steps=10) + + def test_raise_in_none_secs_and_steps(self): + with self.assertRaises(ValueError): + basic_session_run_hooks._SecondOrStepTimer() + + def test_every_secs(self): + timer = basic_session_run_hooks._SecondOrStepTimer(every_secs=1.0) + self.assertTrue(timer.should_trigger_for_step(1)) + + timer.update_last_triggered_step(1) + self.assertFalse(timer.should_trigger_for_step(1)) + self.assertFalse(timer.should_trigger_for_step(2)) + + time.sleep(1.0) + self.assertFalse(timer.should_trigger_for_step(1)) + self.assertTrue(timer.should_trigger_for_step(2)) + + def test_every_steps(self): + timer = basic_session_run_hooks._SecondOrStepTimer(every_steps=3) + self.assertTrue(timer.should_trigger_for_step(1)) + + timer.update_last_triggered_step(1) + self.assertFalse(timer.should_trigger_for_step(1)) + self.assertFalse(timer.should_trigger_for_step(2)) + self.assertFalse(timer.should_trigger_for_step(3)) + self.assertTrue(timer.should_trigger_for_step(4)) + + class StopAtStepTest(tf.test.TestCase): def test_raise_in_both_last_step_and_num_steps(self): @@ -246,5 +280,144 @@ class CheckpointSaverHookTest(tf.test.TestCase): self.model_dir, self.global_step.name)) +class StepCounterHookTest(tf.test.TestCase): + + def setUp(self): + self.log_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.log_dir, ignore_errors=True) + + def test_step_counter(self): + with tf.Graph().as_default() as g, tf.Session() as sess: + global_step = tf.contrib.framework.get_or_create_global_step() + train_op = tf.assign_add(global_step, 1) + summary_writer = testing.FakeSummaryWriter(self.log_dir, g) + hook = tf.train.StepCounterHook( + summary_writer=summary_writer, every_n_steps=10) + hook.begin() + sess.run(tf.initialize_all_variables()) + mon_sess = monitored_session._HookedSession(sess, [hook]) + for _ in range(30): + time.sleep(0.01) + mon_sess.run(train_op) + hook.end(sess) + summary_writer.assert_summaries( + test_case=self, + expected_logdir=self.log_dir, + expected_graph=g, + expected_summaries={}) + for step in [11, 21]: + summary_value = summary_writer.summaries[step][0].value[0] + self.assertTrue(summary_value.tag, 'global_step/sec') + # check at least 10 steps per sec is recorded. + self.assertGreater(summary_value.simple_value, 10) + + +class SummarySaverHookTest(tf.test.TestCase): + + def setUp(self): + tf.test.TestCase.setUp(self) + + self.log_dir = 'log/dir' + self.summary_writer = testing.FakeSummaryWriter(self.log_dir) + + var = tf.Variable(0.0) + tensor = tf.assign_add(var, 1.0) + self.summary_op = tf.scalar_summary('my_summary', tensor) + + global_step = tf.contrib.framework.get_or_create_global_step() + self.train_op = tf.assign_add(global_step, 1) + + def test_raise_in_both_secs_and_steps(self): + with self.assertRaises(ValueError): + tf.train.SummarySaverHook( + save_secs=10, + save_steps=20, + summary_writer=self.summary_writer) + + def test_raise_in_none_secs_and_steps(self): + with self.assertRaises(ValueError): + tf.train.SummarySaverHook( + save_secs=None, + save_steps=None, + summary_writer=self.summary_writer) + + def test_save_steps(self): + hook = tf.train.SummarySaverHook( + save_steps=8, + summary_writer=self.summary_writer, + summary_op=self.summary_op) + + with self.test_session() as sess: + hook.begin() + sess.run(tf.initialize_all_variables()) + mon_sess = monitored_session._HookedSession(sess, [hook]) + for _ in range(30): + mon_sess.run(self.train_op) + hook.end(sess) + + self.summary_writer.assert_summaries( + test_case=self, + expected_logdir=self.log_dir, + expected_summaries={ + 1: {'my_summary': 1.0}, + 9: {'my_summary': 2.0}, + 17: {'my_summary': 3.0}, + 25: {'my_summary': 4.0}, + }) + + def test_save_secs_saving_once_every_step(self): + hook = tf.train.SummarySaverHook( + save_steps=None, + save_secs=0.5, + summary_writer=self.summary_writer, + summary_op=self.summary_op) + + with self.test_session() as sess: + hook.begin() + sess.run(tf.initialize_all_variables()) + mon_sess = monitored_session._HookedSession(sess, [hook]) + for _ in range(4): + mon_sess.run(self.train_op) + time.sleep(0.5) + hook.end(sess) + + self.summary_writer.assert_summaries( + test_case=self, + expected_logdir=self.log_dir, + expected_summaries={ + 1: {'my_summary': 1.0}, + 2: {'my_summary': 2.0}, + 3: {'my_summary': 3.0}, + 4: {'my_summary': 4.0}, + }) + + def test_save_secs_saving_once_every_three_steps(self): + hook = tf.train.SummarySaverHook( + save_steps=None, + save_secs=0.9, + summary_writer=self.summary_writer, + summary_op=self.summary_op) + + with self.test_session() as sess: + hook.begin() + sess.run(tf.initialize_all_variables()) + mon_sess = monitored_session._HookedSession(sess, [hook]) + for _ in range(8): + mon_sess.run(self.train_op) + time.sleep(0.3) + hook.end(sess) + + self.summary_writer.assert_summaries( + test_case=self, + expected_logdir=self.log_dir, + expected_summaries={ + 1: {'my_summary': 1.0}, + 4: {'my_summary': 2.0}, + 7: {'my_summary': 3.0}, + }) + + if __name__ == '__main__': tf.test.main() diff --git a/tensorflow/python/training/sync_replicas_optimizer_test.py b/tensorflow/python/training/sync_replicas_optimizer_test.py index 3025c9fb291..492adfc9b23 100644 --- a/tensorflow/python/training/sync_replicas_optimizer_test.py +++ b/tensorflow/python/training/sync_replicas_optimizer_test.py @@ -237,6 +237,9 @@ class SyncReplicasOptimizerV2Test(tf.test.TestCase): # The global step should have been updated since we only need to collect 2 # gradients. The variables should now have the new values after the average # of the gradients from worker 0/2 are applied. + while global_step.eval(session=sessions[1]) != 1: + time.sleep(0.01) + self.assertAllEqual(1, global_step.eval(session=sessions[1])) self.assertAllClose(0-(0.1+0.5)/2*2.0, var_0_g_1.eval(session=sessions[1])) self.assertAllClose(1-(0.9+1.3)/2*2.0, var_1_g_1.eval(session=sessions[1])) diff --git a/tensorflow/tensorboard/backend/handler.py b/tensorflow/tensorboard/backend/handler.py index ef484e36d59..2b1c65399e9 100644 --- a/tensorflow/tensorboard/backend/handler.py +++ b/tensorflow/tensorboard/backend/handler.py @@ -506,7 +506,8 @@ class TensorboardHandler(BaseHTTPServer.BaseHTTPRequestHandler): plugin = REGISTERED_PLUGINS[name]() # Initialize the plugin by passing the main http handler. plugin.initialize(self) - plugin_handlers = plugin.get_plugin_handlers(self._multiplexer.RunPaths()) + plugin_handlers = plugin.get_plugin_handlers(self._multiplexer.RunPaths(), + self._logdir) for route, handler in six.iteritems(plugin_handlers): path = DATA_PREFIX + PLUGIN_PREFIX + '/' + name + route data_handlers[path] = handler diff --git a/tensorflow/tensorboard/backend/server.py b/tensorflow/tensorboard/backend/server.py index a3c7a790977..ddefa4f5948 100644 --- a/tensorflow/tensorboard/backend/server.py +++ b/tensorflow/tensorboard/backend/server.py @@ -68,8 +68,8 @@ def ParseEventFilesSpec(logdir): if logdir is None: return files for specification in logdir.split(','): - # If it's a gcs path, don't split on colon - if gcs.IsGCSPath(specification): + # If it's a gcs or hdfs path, don't split on colon + if gcs.IsGCSPath(specification) or specification.startswith('hdfs://'): run_name = None path = specification # If the spec looks like /foo:bar/baz, then we assume it's a path with a diff --git a/tensorflow/tensorboard/components/index.html b/tensorflow/tensorboard/components/index.html index 8765834fe6f..c790a76f753 100644 --- a/tensorflow/tensorboard/components/index.html +++ b/tensorflow/tensorboard/components/index.html @@ -28,12 +28,12 @@ limitations under the License. font-family: "RobotoDraft","Roboto",sans-serif; } - TensorBoard + diff --git a/tensorflow/tensorboard/components/tf-dashboard-common/tf-categorizer.html b/tensorflow/tensorboard/components/tf-dashboard-common/tf-categorizer.html index 4b588f63231..090e74fbc7e 100644 --- a/tensorflow/tensorboard/components/tf-dashboard-common/tf-categorizer.html +++ b/tensorflow/tensorboard/components/tf-dashboard-common/tf-categorizer.html @@ -44,7 +44,7 @@ categories are exclusive.