From 73e38c29c74d9d9bf7128bf4737a410ff005611e Mon Sep 17 00:00:00 2001 From: Yifei Feng <yifeif@google.com> Date: Mon, 2 Jul 2018 17:07:06 -0700 Subject: [PATCH] Merge changes from github. PiperOrigin-RevId: 203037623 --- README.md | 2 + RELEASE.md | 40 +- tensorflow/BUILD | 32 +- tensorflow/cc/gradients/array_grad.cc | 52 +++ tensorflow/cc/gradients/array_grad_test.cc | 7 + tensorflow/compiler/xla/service/cpu/BUILD | 9 +- tensorflow/contrib/BUILD | 2 +- .../autograph/converters/control_flow.py | 1 - tensorflow/contrib/cmake/CMakeLists.txt | 17 +- .../contrib/cmake/external/boringssl.cmake | 2 +- .../contrib/cmake/tf_core_framework.cmake | 9 - .../contrib/cmake/tf_core_kernels.cmake | 13 +- .../contrib/cmake/tf_stream_executor.cmake | 2 - .../estimator/python/estimator/head.py | 3 +- .../estimator/python/estimator/head_test.py | 27 ++ .../gan/python/estimator/python/head_impl.py | 13 +- .../python/losses/python/losses_impl_test.py | 2 +- tensorflow/contrib/image/kernels/image_ops.cc | 2 + tensorflow/contrib/image/kernels/image_ops.h | 25 +- tensorflow/contrib/image/ops/image_ops.cc | 2 +- .../python/kernel_tests/image_ops_test.py | 3 +- .../contrib/image/python/ops/image_ops.py | 3 +- .../layers/python/layers/layers_test.py | 2 +- .../contrib/lite/java/demo/app/build.gradle | 36 ++ .../contrib/lite/kernels/expand_dims_test.cc | 4 +- .../lite/kernels/maximum_minimum_test.cc | 4 +- tensorflow/contrib/lite/kernels/neg_test.cc | 4 +- .../contrib/lite/kernels/select_test.cc | 24 +- .../lite/kernels/strided_slice_test.cc | 15 +- .../contrib/lite/kernels/test_util_test.cc | 12 +- tensorflow/contrib/lite/kernels/tile_test.cc | 16 +- .../contrib/lite/kernels/topk_v2_test.cc | 24 +- .../contrib/lite/python/tflite_convert.py | 2 +- tensorflow/contrib/mpi_collectives/BUILD | 1 + .../mpi_collectives/kernels/mpi_ops.cc | 2 +- tensorflow/contrib/opt/__init__.py | 1 + .../quantize/python/fold_batch_norms.py | 14 +- .../quantize/python/fold_batch_norms_test.py | 6 +- .../python/util/receptive_field_test.py | 2 +- .../tensorrt/test/tf_trt_integration_test.py | 391 +++++++++++++----- .../api_def/base_api/api_def_GatherNd.pbtxt | 2 +- .../api_def/base_api/api_def_LinSpace.pbtxt | 6 +- .../base_api/api_def_MatrixExponential.pbtxt | 2 +- .../base_api/api_def_MatrixLogarithm.pbtxt | 2 +- .../api_def/base_api/api_def_ReduceJoin.pbtxt | 2 +- .../base_api/api_def_ScatterNdAdd.pbtxt | 6 +- .../api_def_ScatterNdNonAliasingAdd.pbtxt | 6 +- .../base_api/api_def_ScatterNdSub.pbtxt | 6 +- .../base_api/api_def_ScatterNdUpdate.pbtxt | 6 +- .../api_def/base_api/api_def_Softmax.pbtxt | 2 +- .../base_api/api_def_SparseApplyAdagrad.pbtxt | 4 +- .../api_def_SparseApplyCenteredRMSProp.pbtxt | 6 +- .../base_api/api_def_SparseApplyFtrl.pbtxt | 10 +- .../api_def_SparseApplyMomentum.pbtxt | 4 +- .../api_def_SparseApplyProximalAdagrad.pbtxt | 8 +- ...f_SparseApplyProximalGradientDescent.pbtxt | 4 +- .../base_api/api_def_SparseApplyRMSProp.pbtxt | 6 +- .../base_api/api_def_SparseSliceGrad.pbtxt | 40 ++ .../base_api/api_def_UnsortedSegmentSum.pbtxt | 2 +- .../python_api/api_def_BroadcastTo.pbtxt | 4 - .../python_api/api_def_SparseSliceGrad.pbtxt | 4 + tensorflow/core/kernels/BUILD | 7 + tensorflow/core/kernels/conv_ops_test.cc | 4 +- tensorflow/core/kernels/mkl_concat_op.cc | 6 +- .../core/kernels/sparse_slice_grad_op.cc | 126 ++++++ tensorflow/core/lib/db/sqlite_test.cc | 15 + tensorflow/core/ops/sparse_ops.cc | 14 + tensorflow/core/ops/sparse_ops_test.cc | 12 + tensorflow/docs_src/get_started/_index.yaml | 12 +- tensorflow/docs_src/get_started/leftnav_files | 6 +- tensorflow/docs_src/get_started/next_steps.md | 2 +- .../docs_src/guide/custom_estimators.md | 8 +- tensorflow/docs_src/guide/keras.md | 24 +- .../docs_src/install/install_sources.md | 22 +- .../docs_src/mobile/tflite/demo_android.md | 23 +- tensorflow/docs_src/tutorials/layers.md | 45 +- tensorflow/go/op/wrappers.go | 12 +- tensorflow/java/src/gen/cc/source_writer.cc | 1 + .../src/main/java/org/tensorflow/Graph.java | 79 ++++ .../org/tensorflow/op/core/Gradients.java | 153 +++++++ tensorflow/java/src/main/native/graph_jni.cc | 54 +++ tensorflow/java/src/main/native/graph_jni.h | 9 + .../java/src/main/native/session_jni.cc | 32 +- tensorflow/java/src/main/native/utils_jni.cc | 53 +++ tensorflow/java/src/main/native/utils_jni.h | 33 ++ .../test/java/org/tensorflow/GraphTest.java | 103 +++++ .../test/java/org/tensorflow/SessionTest.java | 38 +- .../test/java/org/tensorflow/TestUtil.java | 34 +- tensorflow/python/estimator/model_fn.py | 4 +- tensorflow/python/framework/ops.py | 30 +- tensorflow/python/framework/ops_test.py | 9 + .../python/grappler/layout_optimizer_test.py | 4 +- tensorflow/python/kernel_tests/BUILD | 1 + .../python/kernel_tests/init_ops_test.py | 40 +- .../python/kernel_tests/shape_ops_test.py | 23 ++ .../kernel_tests/sparse_slice_op_test.py | 22 +- tensorflow/python/ops/array_grad.py | 8 +- tensorflow/python/ops/control_flow_ops.py | 1 + tensorflow/python/ops/init_ops.py | 24 +- tensorflow/python/ops/losses/losses_impl.py | 3 +- tensorflow/python/ops/nn_ops.py | 3 +- tensorflow/python/ops/sparse_grad.py | 29 ++ tensorflow/stream_executor/BUILD | 9 + .../tools/api/generator/create_python_api.py | 2 +- ...orflow.initializers.variance_scaling.pbtxt | 2 +- ...keras.initializers.-variance-scaling.pbtxt | 2 +- tensorflow/tools/api/golden/tensorflow.pbtxt | 4 + ...sorflow.variance_scaling_initializer.pbtxt | 2 +- .../tools/ci_build/Dockerfile.cpu.ppc64le | 1 + .../tools/ci_build/Dockerfile.gpu.ppc64le | 1 + tensorflow/tools/ci_build/Dockerfile.rbe.cpu | 4 +- .../tools/ci_build/ci_parameterized_build.sh | 8 + .../ci_build/install/install_hdf5_ppc64le.sh | 30 ++ .../ci_build/linux/mkl/build-dev-container.sh | 53 +++ .../tools/ci_build/pi/build_raspberry_pi.sh | 4 + tensorflow/tools/ci_build/update_version.py | 2 +- tensorflow/tools/docker/Dockerfile.devel-mkl | 128 ++++++ tensorflow/tools/docker/Dockerfile.mkl | 75 ++++ .../docker/parameterized_docker_build.sh | 142 +++++-- tensorflow/tools/pip_package/setup.py | 12 +- tensorflow/workspace.bzl | 8 +- third_party/eigen.BUILD | 6 + third_party/eigen3/BUILD | 60 ++- third_party/kafka/BUILD | 5 +- third_party/repo.bzl | 1 - third_party/sqlite.BUILD | 1 + third_party/toolchains/BUILD | 22 + 127 files changed, 2132 insertions(+), 540 deletions(-) create mode 100644 tensorflow/core/api_def/base_api/api_def_SparseSliceGrad.pbtxt delete mode 100644 tensorflow/core/api_def/python_api/api_def_BroadcastTo.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseSliceGrad.pbtxt create mode 100644 tensorflow/core/kernels/sparse_slice_grad_op.cc create mode 100644 tensorflow/java/src/main/java/org/tensorflow/op/core/Gradients.java create mode 100644 tensorflow/java/src/main/native/utils_jni.cc create mode 100644 tensorflow/java/src/main/native/utils_jni.h create mode 100755 tensorflow/tools/ci_build/install/install_hdf5_ppc64le.sh create mode 100755 tensorflow/tools/ci_build/linux/mkl/build-dev-container.sh create mode 100755 tensorflow/tools/docker/Dockerfile.devel-mkl create mode 100755 tensorflow/tools/docker/Dockerfile.mkl create mode 100644 third_party/toolchains/BUILD diff --git a/README.md b/README.md index 42d7bbc104f..05fcb23f7ed 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,8 @@ The TensorFlow project strives to abide by generally accepted best practices in | --- | --- | --- | | **IBM s390x** | [](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/) | TBA | | **IBM ppc64le CPU** | [](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_CPU/) | TBA | +| **IBM ppc64le GPU** | [](http://powerci.osuosl.org/job/TensorFlow_Ubuntu_16.04_PPC64LE_GPU/) | TBA | +| **Linux CPU with IntelĀ® MKL-DNNĀ®** | [](https://tensorflow-ci.intel.com/job/tensorflow-mkl-linux-cpu/) | TBA | ## For more information diff --git a/RELEASE.md b/RELEASE.md index 377a8eda37f..4b033944276 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,18 +1,38 @@ # Release 1.9.0 ## Major Features And Improvements -* Update tf.keras to the Keras 2.1.6 API. -* `tfe.Network` is deprecated. Please inherit from `tf.keras.Model`. -* Adding support of core feature columns and losses to gradient boosted trees estimators. -* The distributions.Bijector API supports broadcasting for Bijectors with new API changes. See [here](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/distributions/bijectors/Bijector) for more details. -* Layered variable names have changed in the following conditions: - * Using `tf.keras.layers` with custom variable scopes. - * Using `tf.layers` in a subclassed `tf.keras.Model` class. See [here](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/layers) for more details - -## Breaking Changes - * If you're opening empty variable scopes; replace `variable_scope`('', ...) by `variable_scope`(`tf.get_variable_scope()`, ...). +* Updated docs for `tf.keras`: New Keras-based [get started](http://tensorflow.org/versions/r1.9/get_started), + and [programmers guide page](http://tensorflow.org/versions/r1.9/programmers_guide/keras). +* Update `tf.keras` to the Keras 2.1.6 API. +* Added [`tf.keras.layers.CuDNNGRU`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/keras/layers/CuDNNGRU) and [`tf.keras.layers.CuDNNLSTM`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/keras/layers/CuDNNLSTM) layers. [Try it](https://colab.sandbox.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb?linkId=53292082). +* Adding support of core [feature columns](https://www.tensorflow.org/get_started/feature_columns) and [losses](https://www.tensorflow.org/api_docs/python/tf/losses) to [gradient boosted trees estimators](https://github.com/tensorflow/models/tree/master/official/boosted_trees). +* The [python interface](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/contrib/lite) + for the [TFLite Optimizing Converter](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/toco/README.md) + has been expanded, and the command line interface (AKA: `toco`, `tflite_convert`) is once again + included in the standard `pip` installation. +* Improved data-loading and text processing with: + * [`tf.decode_compressed`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/decode_compressed) + * [`tf.string_strip`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/string_strip) + * [`tf.strings.regex_full_match`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/strings/regex_full_match) +* Added experimental support for new pre-made Estimators: + * [`tf.contrib.estimator.BaselineEstimator`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/contrib/estimator/BaselineEstimator) + * [`tf.contrib.estimator.RNNClassifier`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/contrib/estimator/RNNEstimator) + * [`tf.contrib.estimator.RNNEstimator`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/contrib/estimator/RNNClassifier) +* The [distributions.Bijector](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/contrib/distributions/bijectors/Bijector) + API supports broadcasting for Bijectors with new API changes. + +## Breaking Chances + * If you're opening empty variable scopes; replace `variable_scope('', ...)` by + `variable_scope(tf.get_variable_scope(), ...)`. + * Headers used for building custom ops have been moved from site-packages/external into site-packages/tensorflow/include/external. ## Bug Fixes and Other Changes + +* `tfe.Network` is deprecated. Please inherit from `tf.keras.Model`. +* Layered variable names have changed in the following conditions: + * Using `tf.keras.layers` with custom variable scopes. + * Using `tf.layers` in a subclassed `tf.keras.Model` class. See + [here](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/layers) for more details * `tf.data`: * The `DatasetBase::DebugString()` method is now `const`. * Added the `tf.contrib.data.sample_from_datasets()` API for randomly sampling from multiple datasets. diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 0bce474dfa6..f362900387e 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -438,6 +438,22 @@ filegroup( data = glob(["docs_src/**/*.md"]), ) +cc_library( + name = "grpc", + deps = select({ + ":linux_s390x": ["@grpc//:grpc_unsecure"], + "//conditions:default": ["@grpc"], + }), +) + +cc_library( + name = "grpc++", + deps = select({ + ":linux_s390x": ["@grpc//:grpc++_unsecure"], + "//conditions:default": ["@grpc//:grpc++"], + }), +) + # A shared object which includes registration mechanisms for ops and # kernels. Does not include the implementations of any ops or kernels. Instead, # the library which loads libtensorflow_framework.so @@ -587,19 +603,3 @@ py_library( visibility = ["//visibility:public"], deps = ["//tensorflow/python:no_contrib"], ) - -cc_library( - name = "grpc", - deps = select({ - ":linux_s390x": ["@grpc//:grpc_unsecure"], - "//conditions:default": ["@grpc"], - }), -) - -cc_library( - name = "grpc++", - deps = select({ - ":linux_s390x": ["@grpc//:grpc++_unsecure"], - "//conditions:default": ["@grpc//:grpc++"], - }), -) diff --git a/tensorflow/cc/gradients/array_grad.cc b/tensorflow/cc/gradients/array_grad.cc index ff348fadb24..b353accddcb 100644 --- a/tensorflow/cc/gradients/array_grad.cc +++ b/tensorflow/cc/gradients/array_grad.cc @@ -421,6 +421,58 @@ Status StridedSliceGradHelper(const Scope& scope, const Operation& op, } REGISTER_GRADIENT_OP("StridedSlice", StridedSliceGradHelper); +Status SliceGrad(const Scope& scope, const Operation& op, + const std::vector<Output>& grad_inputs, + std::vector<Output>* grad_outputs) { + // Propagate the incoming gradient along all the selected values, + // and zero everywhere else. Use the Pad operator for this. + // + // First create an Nx2 padding where N is the number of input + // dimensions. The first column is the number of prepended zeros + // for each dimension, and the second column is the number of + // appended zeros. + // + // The first column is just the begin vector. + // The second column is the shape of the input element-wise + // subtracted by begin+size + + // Running example: + // input.shape = [3, 5, 3] + // begin = [1, 2, 1], size = [1, 3, 2] + Input input = op.input(0); + Input begin = op.input(1); + // input_rank = 3 + auto input_rank = Rank(scope, input); + // slice_size = [1, 3, 2] + auto slice_size = Shape(scope, op.output(0)); + // padding_shape = [3, 1] + auto padding_shape = Stack(scope, {input_rank, 1}); + // before_padding = [[1] + // [2] + // [1]] + Input before_padding = Reshape(scope, begin, padding_shape); + // after_padding_sizes = shape(input) - slice_size - begin + // = [3, 5, 3] - [1, 3, 2] - [1, 2, 1] + // = [1, 0, 0] + auto after_padding_sizes = + Sub(scope, Sub(scope, Shape(scope, input), slice_size), begin); + // after_padding = [[1] + // [0] + // [0]] + Input after_padding = Reshape(scope, after_padding_sizes, padding_shape); + // paddings = [[1 1] + // [2 0] + // [1 0]] + auto paddings = + Concat(scope, {before_padding, after_padding}, Const(scope, 1)); + grad_outputs->push_back(Pad(scope, grad_inputs[0], paddings)); + // Nothing propagated for "begin" and "size" inputs + grad_outputs->push_back(NoGradient()); + grad_outputs->push_back(NoGradient()); + return scope.status(); +} +REGISTER_GRADIENT_OP("Slice", SliceGrad); + } // anonymous namespace } // namespace ops } // namespace tensorflow diff --git a/tensorflow/cc/gradients/array_grad_test.cc b/tensorflow/cc/gradients/array_grad_test.cc index de3bd0fc9e2..d09275b6487 100644 --- a/tensorflow/cc/gradients/array_grad_test.cc +++ b/tensorflow/cc/gradients/array_grad_test.cc @@ -378,5 +378,12 @@ TEST_F(ArrayGradTest, StridedSliceGrad) { RunTest(x, x_shape, y, {1, 2, 2, 2}); } +TEST_F(ArrayGradTest, SliceGrad) { + TensorShape x_shape({3, 5, 3}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); + auto y = Slice(scope_, x, {1, 2, 1}, {1, 3, 2}); + RunTest(x, x_shape, y, {1, 3, 2}); +} + } // namespace } // namespace tensorflow diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index f68db134283..3479240610a 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -128,7 +128,14 @@ cc_library( "@llvm//:target", # fixdeps: keep "@llvm//:x86_code_gen", # fixdeps: keep "@llvm//:x86_disassembler", # fixdeps: keep - ], + ] + select({ + "//tensorflow:linux_ppc64le": [ + "@llvm//:powerpc_disassembler", + "@llvm//:powerpc_code_gen", + ], + "//conditions:default": [ + ], + }), alwayslink = True, # Contains compiler registration ) diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index 8974e6867d9..5ce44c01b88 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -125,9 +125,9 @@ py_library( }) + if_not_windows_cuda([ "//tensorflow/contrib/fused_conv:fused_conv_py", # unresolved symbols, need to export more symbols ]) + if_not_windows([ - "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py", "//tensorflow/contrib/cloud:cloud_py", # depends on bigtable "//tensorflow/contrib/bigtable", # doesn't compile on Windows + "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py", "//tensorflow/contrib/lite/python:lite", # unix dependency, need to fix code ]), ) diff --git a/tensorflow/contrib/autograph/converters/control_flow.py b/tensorflow/contrib/autograph/converters/control_flow.py index 22a671262c7..f4a87106279 100644 --- a/tensorflow/contrib/autograph/converters/control_flow.py +++ b/tensorflow/contrib/autograph/converters/control_flow.py @@ -47,7 +47,6 @@ class SymbolNamer(object): class ControlFlowTransformer(converter.Base): """Transforms control flow structures like loops an conditionals.""" - def _create_cond_branch(self, body_name, aliased_orig_names, aliased_new_names, body, returns): if aliased_orig_names: diff --git a/tensorflow/contrib/cmake/CMakeLists.txt b/tensorflow/contrib/cmake/CMakeLists.txt index 4ca7a1b28c6..a0a5b0e00c1 100644 --- a/tensorflow/contrib/cmake/CMakeLists.txt +++ b/tensorflow/contrib/cmake/CMakeLists.txt @@ -299,17 +299,20 @@ include_directories( ${double_conversion_INCLUDE_DIR} ) -if(tensorflow_ENABLE_SSL_SUPPORT) - include(boringssl) - list(APPEND tensorflow_EXTERNAL_LIBRARIES ${boringssl_STATIC_LIBRARIES}) - list(APPEND tensorflow_EXTERNAL_DEPENDENCIES boringssl) - include_directories(${boringssl_INCLUDE_DIR}) -endif() if(tensorflow_ENABLE_GRPC_SUPPORT) + if(tensorflow_ENABLE_SSL_SUPPORT) + include(boringssl) + include_directories(${boringssl_INCLUDE_DIR}) + endif() include(grpc) + include_directories(${GRPC_INCLUDE_DIRS}) + # Place boringssl after grpc as grpc depends on boringssl. list(APPEND tensorflow_EXTERNAL_LIBRARIES ${grpc_STATIC_LIBRARIES}) list(APPEND tensorflow_EXTERNAL_DEPENDENCIES grpc) - include_directories(${GRPC_INCLUDE_DIRS}) + if(tensorflow_ENABLE_SSL_SUPPORT) + list(APPEND tensorflow_EXTERNAL_LIBRARIES ${boringssl_STATIC_LIBRARIES}) + list(APPEND tensorflow_EXTERNAL_DEPENDENCIES boringssl) + endif() endif() if(tensorflow_ENABLE_JEMALLOC_SUPPORT) include(jemalloc) diff --git a/tensorflow/contrib/cmake/external/boringssl.cmake b/tensorflow/contrib/cmake/external/boringssl.cmake index 3c4bb01e24f..fbb14b2515a 100644 --- a/tensorflow/contrib/cmake/external/boringssl.cmake +++ b/tensorflow/contrib/cmake/external/boringssl.cmake @@ -17,7 +17,7 @@ include (ExternalProject) set(boringssl_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/boringssl/src/boringssl/include) #set(boringssl_EXTRA_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/boringssl/src) set(boringssl_URL https://boringssl.googlesource.com/boringssl) -set(boringssl_TAG ee7aa02) +set(boringssl_TAG 7f8c553d7f4db0a6ce727f2986d41bf8fe8ec4bf) set(boringssl_BUILD ${CMAKE_BINARY_DIR}/boringssl/src/boringssl-build) #set(boringssl_LIBRARIES ${boringssl_BUILD}/obj/so/libboringssl.so) set(boringssl_STATIC_LIBRARIES diff --git a/tensorflow/contrib/cmake/tf_core_framework.cmake b/tensorflow/contrib/cmake/tf_core_framework.cmake index 9f02d6cbab3..872b016d2b6 100644 --- a/tensorflow/contrib/cmake/tf_core_framework.cmake +++ b/tensorflow/contrib/cmake/tf_core_framework.cmake @@ -236,15 +236,6 @@ if(WIN32) list(APPEND tf_core_lib_srcs ${tf_core_platform_windows_srcs}) endif(WIN32) -if(tensorflow_ENABLE_SSL_SUPPORT) - # Cloud libraries require boringssl. - file(GLOB tf_core_platform_cloud_srcs - "${tensorflow_source_dir}/tensorflow/core/platform/cloud/*.h" - "${tensorflow_source_dir}/tensorflow/core/platform/cloud/*.cc" - ) - list(APPEND tf_core_lib_srcs ${tf_core_platform_cloud_srcs}) -endif() - if (tensorflow_ENABLE_HDFS_SUPPORT) list(APPEND tf_core_platform_hdfs_srcs "${tensorflow_source_dir}/tensorflow/core/platform/hadoop/hadoop_file_system.cc" diff --git a/tensorflow/contrib/cmake/tf_core_kernels.cmake b/tensorflow/contrib/cmake/tf_core_kernels.cmake index 2d76bf530a2..844f62649d9 100644 --- a/tensorflow/contrib/cmake/tf_core_kernels.cmake +++ b/tensorflow/contrib/cmake/tf_core_kernels.cmake @@ -134,14 +134,13 @@ if(tensorflow_BUILD_CONTRIB_KERNELS) list(APPEND tf_core_kernels_srcs ${tf_contrib_kernels_srcs}) endif(tensorflow_BUILD_CONTRIB_KERNELS) -if(NOT tensorflow_ENABLE_SSL_SUPPORT) - # Cloud libraries require boringssl. - file(GLOB tf_core_kernels_cloud_srcs - "${tensorflow_source_dir}/tensorflow/contrib/cloud/kernels/*.h" - "${tensorflow_source_dir}/tensorflow/contrib/cloud/kernels/*.cc" - ) +# Cloud libraries require curl and boringssl. +# Curl is not supported yet anyway so we remove for now. +file(GLOB tf_core_kernels_cloud_srcs + "${tensorflow_source_dir}/tensorflow/contrib/cloud/kernels/*.h" + "${tensorflow_source_dir}/tensorflow/contrib/cloud/kernels/*.cc" +) list(REMOVE_ITEM tf_core_kernels_srcs ${tf_core_kernels_cloud_srcs}) -endif() file(GLOB_RECURSE tf_core_kernels_exclude_srcs "${tensorflow_source_dir}/tensorflow/core/kernels/*test*.h" diff --git a/tensorflow/contrib/cmake/tf_stream_executor.cmake b/tensorflow/contrib/cmake/tf_stream_executor.cmake index 2f70e59d54d..6d634cb1709 100644 --- a/tensorflow/contrib/cmake/tf_stream_executor.cmake +++ b/tensorflow/contrib/cmake/tf_stream_executor.cmake @@ -64,8 +64,6 @@ file(GLOB tf_stream_executor_srcs if (tensorflow_ENABLE_GPU) file(GLOB tf_stream_executor_gpu_srcs "${tensorflow_source_dir}/tensorflow/stream_executor/cuda/*.cc" - "${tensorflow_source_dir}/tensorflow/compiler/xla/statusor.h" - "${tensorflow_source_dir}/tensorflow/compiler/xla/statusor.cc" ) if (NOT tensorflow_BUILD_CC_TESTS) file(GLOB tf_stream_executor_gpu_tests diff --git a/tensorflow/contrib/estimator/python/estimator/head.py b/tensorflow/contrib/estimator/python/estimator/head.py index 9594e5132fd..c9d86ef4ab8 100644 --- a/tensorflow/contrib/estimator/python/estimator/head.py +++ b/tensorflow/contrib/estimator/python/estimator/head.py @@ -534,7 +534,8 @@ def multi_label_head(n_classes, * An integer `SparseTensor` of class indices. The `dense_shape` must be `[D0, D1, ... DN, ?]` and the values within `[0, n_classes)`. * If `label_vocabulary` is given, a string `SparseTensor`. The `dense_shape` - must be `[D0, D1, ... DN, ?]` and the values within `label_vocabulary`. + must be `[D0, D1, ... DN, ?]` and the values within `label_vocabulary` or a + multi-hot tensor of shape `[D0, D1, ... DN, n_classes]`. If `weight_column` is specified, weights must be of shape `[D0, D1, ... DN]`, or `[D0, D1, ... DN, 1]`. diff --git a/tensorflow/contrib/estimator/python/estimator/head_test.py b/tensorflow/contrib/estimator/python/estimator/head_test.py index b2b57fa06ba..7b884402d46 100644 --- a/tensorflow/contrib/estimator/python/estimator/head_test.py +++ b/tensorflow/contrib/estimator/python/estimator/head_test.py @@ -568,6 +568,33 @@ class MultiLabelHead(test.TestCase): expected_loss=expected_loss, expected_metrics=expected_metrics) + def test_eval_with_label_vocabulary_with_multi_hot_input(self): + n_classes = 2 + head = head_lib.multi_label_head( + n_classes, label_vocabulary=['class0', 'class1']) + logits = np.array([[-1., 1.], [-1.5, 1.5]], dtype=np.float32) + labels_multi_hot = np.array([[1, 0], [1, 1]], dtype=np.int64) + # loss = labels * -log(sigmoid(logits)) + + # (1 - labels) * -log(1 - sigmoid(logits)) + # Sum over examples, divide by batch_size. + expected_loss = 0.5 * np.sum( + _sigmoid_cross_entropy(labels=labels_multi_hot, logits=logits)) + keys = metric_keys.MetricKeys + expected_metrics = { + # Average loss over examples. + keys.LOSS_MEAN: expected_loss, + # auc and auc_pr cannot be reliably calculated for only 4 samples, but + # this assert tests that the algorithm remains consistent. + keys.AUC: 0.3333, + keys.AUC_PR: 0.7639, + } + self._test_eval( + head=head, + logits=logits, + labels=labels_multi_hot, + expected_loss=expected_loss, + expected_metrics=expected_metrics) + def test_eval_with_thresholds(self): n_classes = 2 thresholds = [0.25, 0.5, 0.75] diff --git a/tensorflow/contrib/gan/python/estimator/python/head_impl.py b/tensorflow/contrib/gan/python/estimator/python/head_impl.py index 5b5557bd8f1..d1441e1eb2a 100644 --- a/tensorflow/contrib/gan/python/estimator/python/head_impl.py +++ b/tensorflow/contrib/gan/python/estimator/python/head_impl.py @@ -103,9 +103,20 @@ class GANHead(head._Head): # pylint: disable=protected-access name: name of the head. If provided, summary and metrics keys will be suffixed by `"/" + name`. """ + + if not callable(generator_loss_fn): + raise TypeError('generator_loss_fn must be callable.') + if not callable(discriminator_loss_fn): + raise TypeError('discriminator_loss_fn must be callable.') + if not use_loss_summaries in [True, False, None]: + raise ValueError('use_loss_summaries must be True, False or None.') + if get_hooks_fn is not None and not callable(get_hooks_fn): + raise TypeError('get_hooks_fn must be callable.') + if name is not None and not isinstance(name, str): + raise TypeError('name must be string.') + if get_hooks_fn is None: get_hooks_fn = tfgan_train.get_sequential_train_hooks() - # TODO(joelshor): Validate inputs. if use_loss_summaries in [True, False]: generator_loss_fn = functools.partial( diff --git a/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py b/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py index 2889e937436..9f5fee45422 100644 --- a/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py +++ b/tensorflow/contrib/gan/python/losses/python/losses_impl_test.py @@ -570,7 +570,7 @@ class MutualInformationPenaltyTest(test.TestCase, _PenaltyTest): 'predicted_distributions': self._predicted_distributions, } self._expected_loss = 1.61610 - self._expected_op_name = 'mutual_information_loss/mul' + self._expected_op_name = 'mutual_information_loss/mul_1' self._batch_size = 2 diff --git a/tensorflow/contrib/image/kernels/image_ops.cc b/tensorflow/contrib/image/kernels/image_ops.cc index c2e32da133b..022e17d1396 100644 --- a/tensorflow/contrib/image/kernels/image_ops.cc +++ b/tensorflow/contrib/image/kernels/image_ops.cc @@ -35,6 +35,7 @@ typedef Eigen::ThreadPoolDevice CPUDevice; template struct FillProjectiveTransform<CPUDevice, uint8>; template struct FillProjectiveTransform<CPUDevice, int32>; template struct FillProjectiveTransform<CPUDevice, int64>; +template struct FillProjectiveTransform<CPUDevice, Eigen::half>; template struct FillProjectiveTransform<CPUDevice, float>; template struct FillProjectiveTransform<CPUDevice, double>; @@ -99,6 +100,7 @@ class ImageProjectiveTransform : public OpKernel { TF_CALL_uint8(REGISTER); TF_CALL_int32(REGISTER); TF_CALL_int64(REGISTER); +TF_CALL_half(REGISTER); TF_CALL_float(REGISTER); TF_CALL_double(REGISTER); diff --git a/tensorflow/contrib/image/kernels/image_ops.h b/tensorflow/contrib/image/kernels/image_ops.h index 8408fd6f2ea..209aa245484 100644 --- a/tensorflow/contrib/image/kernels/image_ops.h +++ b/tensorflow/contrib/image/kernels/image_ops.h @@ -21,6 +21,7 @@ limitations under the License. #define EIGEN_USE_THREADS #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/platform/types.h" @@ -110,21 +111,21 @@ class ProjectiveGenerator { // f(x, y_floor) = (x_ceil - x) / (x_ceil - x_floor) * f(x_floor, y_floor) // + (x - x_floor) / (x_ceil - x_floor) * f(x_ceil, y_floor) const float value_yfloor = - (x_ceil - x) * read_with_fill_value(batch, DenseIndex(y_floor), - DenseIndex(x_floor), channel, - fill_value) + - (x - x_floor) * read_with_fill_value(batch, DenseIndex(y_floor), - DenseIndex(x_ceil), channel, - fill_value); + (x_ceil - x) * static_cast<float>(read_with_fill_value( + batch, DenseIndex(y_floor), DenseIndex(x_floor), + channel, fill_value)) + + (x - x_floor) * static_cast<float>(read_with_fill_value( + batch, DenseIndex(y_floor), DenseIndex(x_ceil), + channel, fill_value)); // f(x, y_ceil) = (x_ceil - x) / (x_ceil - x_floor) * f(x_floor, y_ceil) // + (x - x_floor) / (x_ceil - x_floor) * f(x_ceil, y_ceil) const float value_yceil = - (x_ceil - x) * read_with_fill_value(batch, DenseIndex(y_ceil), - DenseIndex(x_floor), channel, - fill_value) + - (x - x_floor) * read_with_fill_value(batch, DenseIndex(y_ceil), - DenseIndex(x_ceil), channel, - fill_value); + (x_ceil - x) * static_cast<float>(read_with_fill_value( + batch, DenseIndex(y_ceil), DenseIndex(x_floor), + channel, fill_value)) + + (x - x_floor) * static_cast<float>(read_with_fill_value( + batch, DenseIndex(y_ceil), DenseIndex(x_ceil), + channel, fill_value)); // f(x, y) = (y_ceil - y) / (y_ceil - y_floor) * f(x, y_floor) // + (y - y_floor) / (y_ceil - y_floor) * f(x, y_ceil) return T((y_ceil - y) * value_yfloor + (y - y_floor) * value_yceil); diff --git a/tensorflow/contrib/image/ops/image_ops.cc b/tensorflow/contrib/image/ops/image_ops.cc index ebdcaea7aba..e59f1bf8443 100644 --- a/tensorflow/contrib/image/ops/image_ops.cc +++ b/tensorflow/contrib/image/ops/image_ops.cc @@ -29,7 +29,7 @@ using shape_inference::ShapeHandle; REGISTER_OP("ImageProjectiveTransform") .Input("images: dtype") .Input("transforms: float32") - .Attr("dtype: {uint8, int32, int64, float32, float64}") + .Attr("dtype: {uint8, int32, int64, float16, float32, float64}") .Attr("interpolation: string") .Output("transformed_images: dtype") .SetShapeFn([](InferenceContext* c) { diff --git a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py index 6c9ff858abd..62a22dcf341 100644 --- a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py +++ b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py @@ -30,7 +30,8 @@ from tensorflow.python.ops import math_ops from tensorflow.python.platform import googletest _DTYPES = set( - [dtypes.uint8, dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64]) + [dtypes.uint8, dtypes.int32, dtypes.int64, + dtypes.float16, dtypes.float32, dtypes.float64]) class ImageOpsTest(test_util.TensorFlowTestCase): diff --git a/tensorflow/contrib/image/python/ops/image_ops.py b/tensorflow/contrib/image/python/ops/image_ops.py index cd984c80543..86b0ffe9a0f 100644 --- a/tensorflow/contrib/image/python/ops/image_ops.py +++ b/tensorflow/contrib/image/python/ops/image_ops.py @@ -33,7 +33,8 @@ _image_ops_so = loader.load_op_library( resource_loader.get_path_to_datafile("_image_ops.so")) _IMAGE_DTYPES = set( - [dtypes.uint8, dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64]) + [dtypes.uint8, dtypes.int32, dtypes.int64, + dtypes.float16, dtypes.float32, dtypes.float64]) ops.RegisterShape("ImageConnectedComponents")(common_shapes.call_cpp_shape_fn) ops.RegisterShape("ImageProjectiveTransform")(common_shapes.call_cpp_shape_fn) diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py index 0e8c89fe3af..c5c7269b1f1 100644 --- a/tensorflow/contrib/layers/python/layers/layers_test.py +++ b/tensorflow/contrib/layers/python/layers/layers_test.py @@ -1356,7 +1356,7 @@ class DropoutTest(test.TestCase): with self.test_session(): images = np.random.uniform(size=(5, height, width, 3)) output = _layers.dropout(images) - self.assertEqual(output.op.name, 'Dropout/dropout/mul') + self.assertEqual(output.op.name, 'Dropout/dropout_1/mul') output.get_shape().assert_is_compatible_with( ops.convert_to_tensor(images).get_shape()) diff --git a/tensorflow/contrib/lite/java/demo/app/build.gradle b/tensorflow/contrib/lite/java/demo/app/build.gradle index 908549321be..49868c5a756 100644 --- a/tensorflow/contrib/lite/java/demo/app/build.gradle +++ b/tensorflow/contrib/lite/java/demo/app/build.gradle @@ -57,3 +57,39 @@ dependencies { testCompile 'junit:junit:4.12' } + +def modelDownloadUrl = "https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip" +def localCache = "build/intermediates/mobilenet_v1_224_android_quant_2017_11_08.zip" +def targetFolder = "src/main/assets" + +task downloadModel(type: DownloadUrlTask) { + doFirst { + println "Downloading ${modelDownloadUrl}" + } + sourceUrl = "${modelDownloadUrl}" + target = file("${localCache}") +} + +task unzipModel(type: Copy, dependsOn: 'downloadModel') { + doFirst { + println "Unzipping ${localCache}" + } + from zipTree("${localCache}") + into "${targetFolder}" +} + +// Ensure the model file is downloaded and extracted before every build +preBuild.dependsOn unzipModel + +class DownloadUrlTask extends DefaultTask { + @Input + String sourceUrl + + @OutputFile + File target + + @TaskAction + void download() { + ant.get(src: sourceUrl, dest: target) + } +} diff --git a/tensorflow/contrib/lite/kernels/expand_dims_test.cc b/tensorflow/contrib/lite/kernels/expand_dims_test.cc index b755e8ce293..50dc860e5a8 100644 --- a/tensorflow/contrib/lite/kernels/expand_dims_test.cc +++ b/tensorflow/contrib/lite/kernels/expand_dims_test.cc @@ -39,7 +39,7 @@ class ExpandDimsOpModel : public SingleOpModel { void SetInputFloat(std::initializer_list<float> data) { PopulateTensor<float>(input_, data); } - void SetAxis(int axis) { PopulateTensor<int32>(axis_, {axis}); } + void SetAxis(int axis) { PopulateTensor<int32_t>(axis_, {axis}); } std::vector<float> GetValuesFloat() { return ExtractVector<float>(output_); } std::vector<int> GetOutputShape() { return GetTensorShape(output_); } @@ -51,7 +51,7 @@ class ExpandDimsOpModel : public SingleOpModel { TEST(ExpandDimsOpTest, DifferentAxis) { ExpandDimsOpModel m({2, 2}, TensorType_FLOAT32); - const auto values = {-1.f, 1.f, -2.f, 2.f}; + std::initializer_list<float> values = {-1.f, 1.f, -2.f, 2.f}; m.SetInputFloat(values); m.SetAxis(0); m.Invoke(); diff --git a/tensorflow/contrib/lite/kernels/maximum_minimum_test.cc b/tensorflow/contrib/lite/kernels/maximum_minimum_test.cc index 0752aa18047..fd4d5367c5a 100644 --- a/tensorflow/contrib/lite/kernels/maximum_minimum_test.cc +++ b/tensorflow/contrib/lite/kernels/maximum_minimum_test.cc @@ -126,10 +126,10 @@ TEST(MaximumOpTest, FloatWithBroadcastTest) { TEST(MaximumOpTest, Int32WithBroadcastTest) { std::initializer_list<int32_t> data1 = {1, 0, -1, -2, 3, 11}; std::initializer_list<int32_t> data2 = {2}; - TestModel<int32>(BuiltinOperator_MAXIMUM, {TensorType_INT32, {3, 1, 2}}, + TestModel<int32_t>(BuiltinOperator_MAXIMUM, {TensorType_INT32, {3, 1, 2}}, {TensorType_INT32, {1}}, {TensorType_INT32, {3, 1, 2}}, data1, data2, {2, 2, 2, 2, 3, 11}); - TestModel<int32>(BuiltinOperator_MINIMUM, {TensorType_INT32, {3, 1, 2}}, + TestModel<int32_t>(BuiltinOperator_MINIMUM, {TensorType_INT32, {3, 1, 2}}, {TensorType_INT32, {1}}, {TensorType_INT32, {3, 1, 2}}, data1, data2, {1, 0, -1, -2, 2, 2}); } diff --git a/tensorflow/contrib/lite/kernels/neg_test.cc b/tensorflow/contrib/lite/kernels/neg_test.cc index 3c95ac8cc27..3d3594c60bb 100644 --- a/tensorflow/contrib/lite/kernels/neg_test.cc +++ b/tensorflow/contrib/lite/kernels/neg_test.cc @@ -58,9 +58,9 @@ TEST(NegOpModel, NegFloat) { TEST(NegOpModel, NegInt32) { NegOpModel m({TensorType_INT32, {2, 3}}, {TensorType_INT32, {2, 3}}); - m.SetInput<int32>({-2, -1, 0, 1, 2, 3}); + m.SetInput<int32_t>({-2, -1, 0, 1, 2, 3}); m.Invoke(); - EXPECT_THAT(m.GetOutput<int32>(), ElementsAreArray({2, 1, 0, -1, -2, -3})); + EXPECT_THAT(m.GetOutput<int32_t>(), ElementsAreArray({2, 1, 0, -1, -2, -3})); } TEST(NegOpModel, NegInt64) { diff --git a/tensorflow/contrib/lite/kernels/select_test.cc b/tensorflow/contrib/lite/kernels/select_test.cc index cfe24a5fc92..4664b9acb44 100644 --- a/tensorflow/contrib/lite/kernels/select_test.cc +++ b/tensorflow/contrib/lite/kernels/select_test.cc @@ -88,11 +88,11 @@ TEST(SelectOpTest, SelectUInt8) { TensorType_UINT8); model.PopulateTensor<bool>(model.input1(), {false, true, false, false}); - model.PopulateTensor<uint8>(model.input2(), {1, 2, 3, 4}); - model.PopulateTensor<uint8>(model.input3(), {5, 6, 7, 8}); + model.PopulateTensor<uint8_t>(model.input2(), {1, 2, 3, 4}); + model.PopulateTensor<uint8_t>(model.input3(), {5, 6, 7, 8}); model.Invoke(); - EXPECT_THAT(model.GetOutput<uint8>(), ElementsAreArray({5, 2, 7, 8})); + EXPECT_THAT(model.GetOutput<uint8_t>(), ElementsAreArray({5, 2, 7, 8})); EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 1, 1, 4})); } @@ -101,11 +101,11 @@ TEST(SelectOpTest, SelectInt32) { TensorType_INT32); model.PopulateTensor<bool>(model.input1(), {false, true, false, false}); - model.PopulateTensor<int32>(model.input2(), {1, 2, 3, 4}); - model.PopulateTensor<int32>(model.input3(), {5, 6, 7, 8}); + model.PopulateTensor<int32_t>(model.input2(), {1, 2, 3, 4}); + model.PopulateTensor<int32_t>(model.input3(), {5, 6, 7, 8}); model.Invoke(); - EXPECT_THAT(model.GetOutput<int32>(), ElementsAreArray({5, 2, 7, 8})); + EXPECT_THAT(model.GetOutput<int32_t>(), ElementsAreArray({5, 2, 7, 8})); EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 1, 1, 4})); } @@ -113,11 +113,11 @@ TEST(SelectOpTest, RankOneSelectInt32) { SelectOpModel model({2}, {2, 1, 2, 1}, {2, 1, 2, 1}, TensorType_INT32); model.PopulateTensor<bool>(model.input1(), {false, true}); - model.PopulateTensor<int32>(model.input2(), {1, 2, 3, 4}); - model.PopulateTensor<int32>(model.input3(), {5, 6, 7, 8}); + model.PopulateTensor<int32_t>(model.input2(), {1, 2, 3, 4}); + model.PopulateTensor<int32_t>(model.input3(), {5, 6, 7, 8}); model.Invoke(); - EXPECT_THAT(model.GetOutput<int32>(), ElementsAreArray({5, 6, 3, 4})); + EXPECT_THAT(model.GetOutput<int32_t>(), ElementsAreArray({5, 6, 3, 4})); EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({2, 1, 2, 1})); } @@ -125,11 +125,11 @@ TEST(SelectOpTest, RankZeroSelectInt32) { SelectOpModel model({1}, {1, 2, 2, 1}, {1, 2, 2, 1}, TensorType_INT32); model.PopulateTensor<bool>(model.input1(), {false}); - model.PopulateTensor<int32>(model.input2(), {1, 2, 3, 4}); - model.PopulateTensor<int32>(model.input3(), {5, 6, 7, 8}); + model.PopulateTensor<int32_t>(model.input2(), {1, 2, 3, 4}); + model.PopulateTensor<int32_t>(model.input3(), {5, 6, 7, 8}); model.Invoke(); - EXPECT_THAT(model.GetOutput<int32>(), ElementsAreArray({5, 6, 7, 8})); + EXPECT_THAT(model.GetOutput<int32_t>(), ElementsAreArray({5, 6, 7, 8})); EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({1, 2, 2, 1})); } diff --git a/tensorflow/contrib/lite/kernels/strided_slice_test.cc b/tensorflow/contrib/lite/kernels/strided_slice_test.cc index 716b11d4327..c5d4f9affb4 100644 --- a/tensorflow/contrib/lite/kernels/strided_slice_test.cc +++ b/tensorflow/contrib/lite/kernels/strided_slice_test.cc @@ -21,7 +21,6 @@ limitations under the License. namespace tflite { namespace { -using ::int32; using ::testing::ElementsAreArray; template <typename input_type = float, @@ -50,14 +49,14 @@ class StridedSliceOpModel : public SingleOpModel { void SetInput(std::initializer_list<input_type> data) { PopulateTensor<input_type>(input_, data); } - void SetBegin(std::initializer_list<int32> data) { - PopulateTensor<int32>(begin_, data); + void SetBegin(std::initializer_list<int32_t> data) { + PopulateTensor<int32_t>(begin_, data); } - void SetEnd(std::initializer_list<int32> data) { - PopulateTensor<int32>(end_, data); + void SetEnd(std::initializer_list<int32_t> data) { + PopulateTensor<int32_t>(end_, data); } - void SetStrides(std::initializer_list<int32> data) { - PopulateTensor<int32>(strides_, data); + void SetStrides(std::initializer_list<int32_t> data) { + PopulateTensor<int32_t>(strides_, data); } std::vector<input_type> GetOutput() { @@ -566,7 +565,7 @@ TEST(StridedSliceOpTest, RunTwice) { } TEST(StridedSliceOpTest, In3D_IdentityShrinkAxis1Uint8) { - StridedSliceOpModel<uint8, TensorType_UINT8> m({2, 3, 2}, {3}, {3}, {3}, 0, 0, + StridedSliceOpModel<uint8_t, TensorType_UINT8> m({2, 3, 2}, {3}, {3}, {3}, 0, 0, 0, 0, 1); m.SetInput({1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); m.SetBegin({0, 0, 0}); diff --git a/tensorflow/contrib/lite/kernels/test_util_test.cc b/tensorflow/contrib/lite/kernels/test_util_test.cc index 1e10e890612..23658034725 100644 --- a/tensorflow/contrib/lite/kernels/test_util_test.cc +++ b/tensorflow/contrib/lite/kernels/test_util_test.cc @@ -22,22 +22,22 @@ using ::testing::ElementsAreArray; TEST(TestUtilTest, QuantizeVector) { std::vector<float> data = {-1.0, -0.5, 0.0, 0.5, 1.0, 1000.0}; - auto q_data = Quantize<uint8>(data, /*scale=*/1.0, /*zero_point=*/0); - std::vector<uint8> expected = {0, 0, 0, 1, 1, 255}; + auto q_data = Quantize<uint8_t>(data, /*scale=*/1.0, /*zero_point=*/0); + std::vector<uint8_t> expected = {0, 0, 0, 1, 1, 255}; EXPECT_THAT(q_data, ElementsAreArray(expected)); } TEST(TestUtilTest, QuantizeVectorScalingDown) { std::vector<float> data = {-1.0, -0.5, 0.0, 0.5, 1.0, 1000.0}; - auto q_data = Quantize<uint8>(data, /*scale=*/10.0, /*zero_point=*/0); - std::vector<uint8> expected = {0, 0, 0, 0, 0, 100}; + auto q_data = Quantize<uint8_t>(data, /*scale=*/10.0, /*zero_point=*/0); + std::vector<uint8_t> expected = {0, 0, 0, 0, 0, 100}; EXPECT_THAT(q_data, ElementsAreArray(expected)); } TEST(TestUtilTest, QuantizeVectorScalingUp) { std::vector<float> data = {-1.0, -0.5, 0.0, 0.5, 1.0, 1000.0}; - auto q_data = Quantize<uint8>(data, /*scale=*/0.1, /*zero_point=*/0); - std::vector<uint8> expected = {0, 0, 0, 5, 10, 255}; + auto q_data = Quantize<uint8_t>(data, /*scale=*/0.1, /*zero_point=*/0); + std::vector<uint8_t> expected = {0, 0, 0, 5, 10, 255}; EXPECT_THAT(q_data, ElementsAreArray(expected)); } diff --git a/tensorflow/contrib/lite/kernels/tile_test.cc b/tensorflow/contrib/lite/kernels/tile_test.cc index a134a75d56a..4f78c224e54 100644 --- a/tensorflow/contrib/lite/kernels/tile_test.cc +++ b/tensorflow/contrib/lite/kernels/tile_test.cc @@ -38,27 +38,27 @@ class TileOpModel : public SingleOpModel { PopulateTensor<float>(input_, data); } - void SetInputUInt8(std::initializer_list<uint8> data) { - PopulateTensor<uint8>(input_, data); + void SetInputUInt8(std::initializer_list<uint8_t> data) { + PopulateTensor<uint8_t>(input_, data); } - void SetInputInt32(std::initializer_list<int32> data) { - PopulateTensor<int32>(input_, data); + void SetInputInt32(std::initializer_list<int32_t> data) { + PopulateTensor<int32_t>(input_, data); } void SetInputInt64(std::initializer_list<int64_t> data) { PopulateTensor<int64_t>(input_, data); } - void SetMultipliers(std::initializer_list<int32> data) { - PopulateTensor<int32>(multipliers_, data); + void SetMultipliers(std::initializer_list<int32_t> data) { + PopulateTensor<int32_t>(multipliers_, data); } std::vector<float> GetOutputFloat() { return ExtractVector<float>(output_); } - std::vector<uint8> GetOutputUInt8() { return ExtractVector<uint8>(output_); } + std::vector<uint8_t> GetOutputUInt8() { return ExtractVector<uint8_t>(output_); } - std::vector<int32> GetOutputInt32() { return ExtractVector<int32>(output_); } + std::vector<int32_t> GetOutputInt32() { return ExtractVector<int32_t>(output_); } std::vector<int64_t> GetOutputInt64() { return ExtractVector<int64_t>(output_); diff --git a/tensorflow/contrib/lite/kernels/topk_v2_test.cc b/tensorflow/contrib/lite/kernels/topk_v2_test.cc index 212f8acc76d..2abb89b6177 100644 --- a/tensorflow/contrib/lite/kernels/topk_v2_test.cc +++ b/tensorflow/contrib/lite/kernels/topk_v2_test.cc @@ -42,32 +42,32 @@ class TopKV2OpModel : public SingleOpModel { PopulateTensor<float>(input_, data); } - void SetInputUInt8(std::initializer_list<uint8> data) { - PopulateTensor<uint8>(input_, data); + void SetInputUInt8(std::initializer_list<uint8_t> data) { + PopulateTensor<uint8_t>(input_, data); } - void SetInputInt32(std::initializer_list<int32> data) { - PopulateTensor<int32>(input_, data); + void SetInputInt32(std::initializer_list<int32_t> data) { + PopulateTensor<int32_t>(input_, data); } void SetInputInt64(std::initializer_list<int64_t> data) { PopulateTensor<int64_t>(input_, data); } - std::vector<int32> GetIndexes() { - return ExtractVector<int32>(output_indexes_); + std::vector<int32_t> GetIndexes() { + return ExtractVector<int32_t>(output_indexes_); } std::vector<float> GetValuesFloat() { return ExtractVector<float>(output_values_); } - std::vector<uint8> GetValuesUInt8() { - return ExtractVector<uint8>(output_values_); + std::vector<uint8_t> GetValuesUInt8() { + return ExtractVector<uint8_t>(output_values_); } - std::vector<int32> GetValuesInt32() { - return ExtractVector<int32>(output_values_); + std::vector<int32_t> GetValuesInt32() { + return ExtractVector<int32_t>(output_values_); } std::vector<int64_t> GetValuesInt64() { @@ -119,7 +119,7 @@ TEST(TopKV2OpTest, VectorFloat) { EXPECT_THAT(m.GetValuesFloat(), ElementsAreArray(ArrayFloatNear({0.8, 0.2}))); } -// Check that uint8 works. +// Check that uint8_t works. TEST(TopKV2OpTest, TypeUint8) { TopKV2OpModel m({2, 3}, TensorType_UINT8, 2); m.SetInputUInt8({1, 2, 3, 251, 250, 249}); @@ -128,7 +128,7 @@ TEST(TopKV2OpTest, TypeUint8) { EXPECT_THAT(m.GetValuesUInt8(), ElementsAreArray({3, 2, 251, 250})); } -// Check that int32 works. +// Check that int32_t works. TEST(TopKV2OpTest, TypeInt32) { TopKV2OpModel m({2, 3}, TensorType_INT32, 2); m.SetInputInt32({1, 2, 3, 10251, 10250, 10249}); diff --git a/tensorflow/contrib/lite/python/tflite_convert.py b/tensorflow/contrib/lite/python/tflite_convert.py index 286d15984fe..9bd1f4f76ee 100644 --- a/tensorflow/contrib/lite/python/tflite_convert.py +++ b/tensorflow/contrib/lite/python/tflite_convert.py @@ -105,7 +105,7 @@ def _convert_model(flags): input_arrays = converter.get_input_arrays() std_dev_values = _parse_array(flags.std_dev_values, type_fn=int) mean_values = _parse_array(flags.mean_values, type_fn=int) - quant_stats = zip(mean_values, std_dev_values) + quant_stats = list(zip(mean_values, std_dev_values)) if ((not flags.input_arrays and len(input_arrays) > 1) or (len(input_arrays) != len(quant_stats))): raise ValueError("Mismatching --input_arrays, --std_dev_values, and " diff --git a/tensorflow/contrib/mpi_collectives/BUILD b/tensorflow/contrib/mpi_collectives/BUILD index a7be92a35e0..ecac06354d2 100644 --- a/tensorflow/contrib/mpi_collectives/BUILD +++ b/tensorflow/contrib/mpi_collectives/BUILD @@ -52,6 +52,7 @@ tf_custom_op_library( deps = [ ":mpi_defines", ":mpi_message_proto_cc", + "//tensorflow/stream_executor:stream_executor_headers_lib", "//third_party/mpi", ], ) diff --git a/tensorflow/contrib/mpi_collectives/kernels/mpi_ops.cc b/tensorflow/contrib/mpi_collectives/kernels/mpi_ops.cc index ed22ee667f1..e4b0c2c6541 100644 --- a/tensorflow/contrib/mpi_collectives/kernels/mpi_ops.cc +++ b/tensorflow/contrib/mpi_collectives/kernels/mpi_ops.cc @@ -73,7 +73,7 @@ limitations under the License. */ template <class T> -using StatusOr = se::port::StatusOr<T>; +using StatusOr = stream_executor::port::StatusOr<T>; using CPUDevice = Eigen::ThreadPoolDevice; using GPUDevice = Eigen::GpuDevice; diff --git a/tensorflow/contrib/opt/__init__.py b/tensorflow/contrib/opt/__init__.py index 65777b13231..3e63e99030c 100644 --- a/tensorflow/contrib/opt/__init__.py +++ b/tensorflow/contrib/opt/__init__.py @@ -30,6 +30,7 @@ from tensorflow.contrib.opt.python.training.model_average_optimizer import * from tensorflow.contrib.opt.python.training.moving_average_optimizer import * from tensorflow.contrib.opt.python.training.multitask_optimizer_wrapper import * from tensorflow.contrib.opt.python.training.nadam_optimizer import * +from tensorflow.contrib.opt.python.training.weight_decay_optimizers import * from tensorflow.contrib.opt.python.training.powersign import * from tensorflow.contrib.opt.python.training.variable_clipping_optimizer import * from tensorflow.contrib.opt.python.training.weight_decay_optimizers import * diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py index 804cd8d72d8..e3c48998305 100644 --- a/tensorflow/contrib/quantize/python/fold_batch_norms.py +++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py @@ -506,7 +506,7 @@ def _FoldUnfusedBatchNorms(graph, is_training, freeze_batch_norm_delay): def _IsValidUnfusedBatchNorm(graph, context): """Checks that the output of the unfused batch norm has consumers.""" add_shift = graph.get_operation_by_name( - context + '/BatchNorm/batchnorm/add_1') + context + '/BatchNorm/batchnorm_1/add_1') # Ensure that the output tensor of batch norm has consumers, otherwise this # is a dangling node and not a match. return bool(add_shift.outputs[0].consumers()) @@ -599,7 +599,7 @@ def _GetBatchNormParams(graph, context, has_scaling): op_suffix_mean = '/BatchNorm/moments/Squeeze' op_suffix_variance = '/BatchNorm/moments/Squeeze_1' - op_suffix_epsilon = '/BatchNorm/batchnorm/add/y' + op_suffix_epsilon = '/BatchNorm/batchnorm_1/add/y' op_suffix_bn_decay_mean = '/BatchNorm/AssignMovingAvg/decay' op_suffix_bn_decay_var = '/BatchNorm/AssignMovingAvg_1/decay' @@ -675,12 +675,12 @@ def _CreateFoldedOp(graph, context, has_scaling, freeze_batch_norm_delay, Returns: A pair of Operations, the first is the original consumer node of the batch - norm (../BatchNorm/batchnorm/add_1), the second is the consumer node of + norm (../BatchNorm/batchnorm_1/add_1), the second is the consumer node of the folded graph (add_fold). """ mul_scale_name = 'mul_1' if has_scaling else 'mul' mul_scale = graph.get_operation_by_name(context + - '/BatchNorm/batchnorm/' + + '/BatchNorm/batchnorm_1/' + mul_scale_name) op_below = mul_scale.inputs[0].op # Skip over the BatchToSpace operation in the case of atrous convolutions. @@ -707,7 +707,7 @@ def _CreateFoldedOp(graph, context, has_scaling, freeze_batch_norm_delay, ] scale_name = 'mul' if has_scaling else 'Rsqrt' scale = graph.get_operation_by_name( - context + '/BatchNorm/batchnorm/' + scale_name) + context + '/BatchNorm/batchnorm_1/' + scale_name) scale = array_ops.reshape(scale.outputs[0], new_shape, context + '/scale_reshape') @@ -735,7 +735,7 @@ def _CreateFoldedOp(graph, context, has_scaling, freeze_batch_norm_delay, [(1, mul_fold.outputs[0])]) add_shift = graph.get_operation_by_name( - context + '/BatchNorm/batchnorm/add_1') + context + '/BatchNorm/batchnorm_1/add_1') corrected_output = conv_or_fc_folded.outputs[0] # Copy the batch to space operation if we have a atrous convolution. @@ -930,7 +930,7 @@ def _HasScaling(graph, input_to_ops_map, bn): Returns: A boolean indicating whether this batch norm layer has scaling enabled. """ - rsqrt_op = graph.get_operation_by_name(bn + '/BatchNorm/batchnorm/Rsqrt') + rsqrt_op = graph.get_operation_by_name(bn + '/BatchNorm/batchnorm_1/Rsqrt') rsqrt_consumers = input_to_ops_map.ConsumerOperations(rsqrt_op) return sum(1 for op in rsqrt_consumers if op.type == 'Mul') == 1 diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms_test.py b/tensorflow/contrib/quantize/python/fold_batch_norms_test.py index 272afcdf075..7c907ffd92c 100644 --- a/tensorflow/contrib/quantize/python/fold_batch_norms_test.py +++ b/tensorflow/contrib/quantize/python/fold_batch_norms_test.py @@ -600,13 +600,13 @@ class FoldBatchNormsTest(test_util.TensorFlowTestCase): if has_scaling: if fused: return scope + '/BatchNorm_Fold/mul' - return scope + '/BatchNorm/batchnorm/mul' - return scope + '/BatchNorm/batchnorm/Rsqrt' + return scope + '/BatchNorm/batchnorm_1/mul' + return scope + '/BatchNorm/batchnorm_1/Rsqrt' def _BathNormBiasName(self, scope, fused): if fused: return scope + '/BatchNorm_Fold/bias' - return scope + '/BatchNorm/batchnorm/sub' + return scope + '/BatchNorm/batchnorm_1/sub' def _WeightInit(self, stddev): """Returns a truncated normal variable initializer. diff --git a/tensorflow/contrib/receptive_field/python/util/receptive_field_test.py b/tensorflow/contrib/receptive_field/python/util/receptive_field_test.py index cf55da27236..a42bbca6113 100644 --- a/tensorflow/contrib/receptive_field/python/util/receptive_field_test.py +++ b/tensorflow/contrib/receptive_field/python/util/receptive_field_test.py @@ -385,7 +385,7 @@ class ReceptiveFieldTest(test.TestCase): effective_stride_y, effective_padding_x, effective_padding_y) = ( receptive_field.compute_receptive_field_from_graph_def( graph_def, input_node, output_node, - ['Dropout/dropout/random_uniform'])) + ['Dropout/dropout_1/random_uniform'])) self.assertEqual(receptive_field_x, 3) self.assertEqual(receptive_field_y, 3) self.assertEqual(effective_stride_x, 4) diff --git a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py index 0403b652d72..d9c41f90d0a 100644 --- a/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py +++ b/tensorflow/contrib/tensorrt/test/tf_trt_integration_test.py @@ -18,131 +18,330 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from collections import namedtuple +import itertools import warnings import numpy as np +import six from tensorflow.contrib import tensorrt as trt -from tensorflow.core.protobuf import config_pb2 as cpb2 -from tensorflow.python.framework import constant_op as cop -from tensorflow.python.framework import dtypes as dtypes -from tensorflow.python.framework import importer as importer -from tensorflow.python.framework import ops as ops +from tensorflow.core.protobuf import config_pb2 +from tensorflow.core.protobuf import rewriter_config_pb2 +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import importer +from tensorflow.python.framework import ops from tensorflow.python.framework import test_util -from tensorflow.python.ops import array_ops as aops -from tensorflow.python.ops import nn as nn -from tensorflow.python.ops import nn_ops as nn_ops -from tensorflow.python.platform import googletest +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn +from tensorflow.python.ops import nn_ops +from tensorflow.python.platform import test + +INPUT_NAME = "input" +OUTPUT_NAME = "output" +INPUT_DIMS = [100, 24, 24, 2] +MODE_FP32 = "FP32" +MODE_FP16 = "FP16" +MODE_INT8 = "INT8" + +if six.PY2: + to_bytes = lambda s: s + to_string = lambda s: s +else: + to_bytes = lambda s: s.encode("utf-8", errors="surrogateescape") + to_string = lambda s: s.decode("utf-8") -class IntegrationTest(test_util.TensorFlowTestCase): +# TODO(aaroey): test graph with different dtypes. +def GetSingleEngineGraphDef(dtype=dtypes.float32): + """Create a graph containing single segment.""" + g = ops.Graph() + with g.as_default(): + inp = array_ops.placeholder( + dtype=dtype, shape=[None] + INPUT_DIMS[1:], name=INPUT_NAME) + with g.device("/GPU:0"): + conv_filter = constant_op.constant( + [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]], + name="weights", + dtype=dtype) + conv = nn.conv2d( + input=inp, + filter=conv_filter, + strides=[1, 2, 2, 1], + padding="SAME", + name="conv") + bias = constant_op.constant( + [4., 1.5, 2., 3., 5., 7.], name="bias", dtype=dtype) + added = nn.bias_add(conv, bias, name="bias_add") + relu = nn.relu(added, "relu") + identity = array_ops.identity(relu, "identity") + pool = nn_ops.max_pool( + identity, [1, 2, 2, 1], [1, 2, 2, 1], "VALID", name="max_pool") + array_ops.squeeze(pool, name=OUTPUT_NAME) + return g.as_graph_def() + + +# TODO(aaroey): test graph with different dtypes. +def GetMultiEngineGraphDef(dtype=dtypes.float32): + """Create a graph containing multiple segment.""" + g = ops.Graph() + with g.as_default(): + inp = array_ops.placeholder( + dtype=dtype, shape=[None] + INPUT_DIMS[1:], name=INPUT_NAME) + with g.device("/GPU:0"): + conv_filter = constant_op.constant( + [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]], + name="weights", + dtype=dtype) + conv = nn.conv2d( + input=inp, + filter=conv_filter, + strides=[1, 2, 2, 1], + padding="SAME", + name="conv") + c1 = constant_op.constant( + np.random.randn(INPUT_DIMS[0], 12, 12, 6), dtype=dtype) + p = conv * c1 + c2 = constant_op.constant( + np.random.randn(INPUT_DIMS[0], 12, 12, 6), dtype=dtype) + q = conv / c2 + + edge = math_ops.sin(q) + edge /= edge + r = edge + edge + + p -= edge + q *= edge + s = p + q + s -= r + array_ops.squeeze(s, name=OUTPUT_NAME) + return g.as_graph_def() + + +TestGraph = namedtuple("TestGraph", + ["gdef", "num_expected_engines", "expected_output_dims"]) + +TEST_GRAPHS = { + "SingleEngineGraph": + TestGraph( + gdef=GetSingleEngineGraphDef(), + num_expected_engines=1, + expected_output_dims=(100, 6, 6, 6)), + "MultiEngineGraph": + TestGraph( + gdef=GetMultiEngineGraphDef(), + num_expected_engines=2, + expected_output_dims=(100, 12, 12, 6)), + # TODO(aaroey): add a large complex graph to test. +} + + +class TfTrtIntegrationTest(test_util.TensorFlowTestCase): """Class to test Tensorflow-TensorRT integration.""" def setUp(self): """Setup method.""" - super(IntegrationTest, self).setUp() + super(TfTrtIntegrationTest, self).setUp() warnings.simplefilter("always") - inp_dims = (100, 24, 24, 2) - self._input = np.random.random_sample(inp_dims) - self._original_graph = self.get_simple_graph_def() - self._gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) - self._config = cpb2.ConfigProto(gpu_options=self._gpu_options) - self._reference = self.run_graph(self._original_graph, self._input) + self._input = np.random.random_sample(INPUT_DIMS) - def get_simple_graph_def(self): - """Create a simple graph and return its graph_def.""" - g = ops.Graph() - with g.as_default(): - a = aops.placeholder( - dtype=dtypes.float32, shape=(None, 24, 24, 2), name="input") - e = cop.constant( - [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]], - name="weights", - dtype=dtypes.float32) - conv = nn.conv2d( - input=a, filter=e, strides=[1, 2, 2, 1], padding="SAME", name="conv") - b = cop.constant( - [4., 1.5, 2., 3., 5., 7.], name="bias", dtype=dtypes.float32) - t = nn.bias_add(conv, b, name="biasAdd") - relu = nn.relu(t, "relu") - idty = aops.identity(relu, "ID") - v = nn_ops.max_pool( - idty, [1, 2, 2, 1], [1, 2, 2, 1], "VALID", name="max_pool") - aops.squeeze(v, name="output") - return g.as_graph_def() + def _GetConfigProto(self, + use_optimizer, + precision_mode=None, + is_dynamic_op=None): + if use_optimizer: + rewriter_cfg = rewriter_config_pb2.RewriterConfig() + rewriter_cfg.optimizers.extend(["constfold", "layout"]) + custom_op = rewriter_cfg.custom_optimizers.add() + custom_op.name = "TensorRTOptimizer" + custom_op.parameter_map["minimum_segment_size"].i = 3 + custom_op.parameter_map["max_batch_size"].i = self._input.shape[0] + custom_op.parameter_map["is_dynamic_op"].b = is_dynamic_op + custom_op.parameter_map["max_workspace_size_bytes"].i = 1 << 25 + custom_op.parameter_map["precision_mode"].s = to_bytes(precision_mode) + graph_options = config_pb2.GraphOptions(rewrite_options=rewriter_cfg) + else: + graph_options = config_pb2.GraphOptions() - def run_graph(self, gdef, dumm_inp): - """Run given graphdef once.""" - ops.reset_default_graph() + gpu_options = config_pb2.GPUOptions() + if trt.trt_convert.get_linked_tensorrt_version()[0] == 3: + gpu_options.per_process_gpu_memory_fraction = 0.50 + + config = config_pb2.ConfigProto( + gpu_options=gpu_options, graph_options=graph_options) + return config + + def _RunGraph(self, graph_key, gdef, input_data, config, num_runs=2): + """Run given graphdef multiple times.""" g = ops.Graph() with g.as_default(): inp, out = importer.import_graph_def( - graph_def=gdef, return_elements=["input", "output"]) + graph_def=gdef, return_elements=[INPUT_NAME, OUTPUT_NAME], name="") inp = inp.outputs[0] out = out.outputs[0] with self.test_session( - graph=g, config=self._config, use_gpu=True, force_gpu=True) as sess: - val = sess.run(out, {inp: dumm_inp}) + graph=g, config=config, use_gpu=True, force_gpu=True) as sess: + val = None + # Defaults to 2 runs to verify result across multiple runs is same. + for _ in range(num_runs): + new_val = sess.run(out, {inp: input_data}) + self.assertEquals(TEST_GRAPHS[graph_key].expected_output_dims, + new_val.shape) + if val is not None: + self.assertAllEqual(new_val, val) + val = new_val return val # Use real data that is representative of the inference dataset # for calibration. For this test script it is random data. - def run_calibration(self, gdef, dumm_inp): - """Run given calibration graph multiple times.""" - ops.reset_default_graph() - g = ops.Graph() - with g.as_default(): - inp, out = importer.import_graph_def( - graph_def=gdef, return_elements=["input", "output"]) - inp = inp.outputs[0] - out = out.outputs[0] - # run over real calibration data here, we are mimicking a calibration - # set of 30 different batches. Use as much calibration data as you want - with self.test_session( - graph=g, config=self._config, use_gpu=True, force_gpu=True) as sess: - for _ in range(30): - val = sess.run(out, {inp: dumm_inp}) - return val + def _RunCalibration(self, graph_key, gdef, input_data, config): + """Run calibration on given graph.""" + return self._RunGraph(graph_key, gdef, input_data, config, 30) - def get_trt_graph(self, mode): + def _GetTrtGraph(self, gdef, precision_mode, is_dynamic_op): """Return trt converted graph.""" - if mode in ["FP32", "FP16", "INT8"]: - return trt.create_inference_graph( - input_graph_def=self._original_graph, - outputs=["output"], - max_batch_size=self._input.shape[0], - max_workspace_size_bytes=1 << 25, - precision_mode=mode, # TRT Engine precision "FP32","FP16" or "INT8" - minimum_segment_size=2 # minimum number of nodes in an engine - ) - return None + return trt.create_inference_graph( + input_graph_def=gdef, + outputs=[OUTPUT_NAME], + max_batch_size=self._input.shape[0], + max_workspace_size_bytes=1 << 25, + precision_mode=precision_mode, + minimum_segment_size=2, + is_dynamic_op=is_dynamic_op) - def testFP32(self): - """Test FP32 conversion. Results should be identical to native case.""" - trt_graph = self.get_trt_graph("FP32") - result = self.run_graph(trt_graph, self._input) - self.assertAllEqual(self._reference, result) - result1 = self.run_graph(trt_graph, self._input) - self.assertAllEqual(result1, result) + def _VerifyGraphDef(self, + graph_key, + gdef, + precision_mode=None, + is_calibrated=None, + dynamic_engine=None): + num_engines = 0 + for n in gdef.node: + if n.op == "TRTEngineOp": + num_engines += 1 + self.assertNotEqual("", n.attr["serialized_segment"].s) + self.assertNotEqual("", n.attr["segment_funcdef_name"].s) + self.assertEquals(n.attr["precision_mode"].s, precision_mode) + self.assertEquals(n.attr["static_engine"].b, not dynamic_engine) + if precision_mode == MODE_INT8 and is_calibrated: + self.assertNotEqual("", n.attr["calibration_data"].s) + else: + self.assertEquals("", n.attr["calibration_data"].s) + if precision_mode is None: + self.assertEquals(num_engines, 0) + else: + self.assertEquals(num_engines, + TEST_GRAPHS[graph_key].num_expected_engines) - def testFP16(self): - """Test FP16 conversion. Results may be different from native case.""" - trt_graph = self.get_trt_graph("FP16") - result = self.run_graph(trt_graph, self._input) - self.assertAllClose(self._reference, result, rtol=1.e-03) - result1 = self.run_graph(trt_graph, self._input) - self.assertAllEqual(result1, result) + def _RunTest(self, graph_key, use_optimizer, precision_mode, + dynamic_infer_engine, dynamic_calib_engine): + assert precision_mode in [MODE_FP32, MODE_FP16, MODE_INT8] + input_gdef = TEST_GRAPHS[graph_key].gdef + self._VerifyGraphDef(graph_key, input_gdef) - def testINT8(self): - """Test INT8 conversion. Results may be different from native case.""" - calib_graph = self.get_trt_graph("INT8") - result = self.run_calibration(calib_graph, self._input) - self.assertAllEqual(self._reference, result) - int8_graph = trt.calib_graph_to_infer_graph(calib_graph) - result = self.run_graph(int8_graph, self._input) - self.assertAllClose(self._reference, result, rtol=1.e-03) - result1 = self.run_graph(int8_graph, self._input) - self.assertAllEqual(result1, result) + # Get reference result without running trt. + config_no_trt = self._GetConfigProto(False) + print("Running original graph w/o trt, config:\n%s" % str(config_no_trt)) + ref_result = self._RunGraph(graph_key, input_gdef, self._input, + config_no_trt) + + # Run calibration if necessary. + if precision_mode == MODE_INT8: + + calib_config = self._GetConfigProto(use_optimizer, precision_mode, + dynamic_calib_engine) + print("Running calibration graph, config:\n%s" % str(calib_config)) + if use_optimizer: + self.assertTrue(False) + # TODO(aaroey): uncomment this and get infer_gdef when this mode is + # supported. + # result = self._RunCalibration(graph_key, input_gdef, self._input, + # calib_config) + else: + calib_gdef = self._GetTrtGraph(input_gdef, precision_mode, + dynamic_calib_engine) + self._VerifyGraphDef(graph_key, calib_gdef, precision_mode, False, + dynamic_calib_engine) + result = self._RunCalibration(graph_key, calib_gdef, self._input, + calib_config) + infer_gdef = trt.calib_graph_to_infer_graph(calib_gdef) + self._VerifyGraphDef(graph_key, infer_gdef, precision_mode, True, + dynamic_calib_engine) + self.assertAllClose(ref_result, result, rtol=1.e-03) + else: + infer_gdef = input_gdef + + # Run inference. + infer_config = self._GetConfigProto(use_optimizer, precision_mode, + dynamic_infer_engine) + print("Running final inference graph, config:\n%s" % str(infer_config)) + if use_optimizer: + result = self._RunGraph(graph_key, infer_gdef, self._input, infer_config) + else: + trt_infer_gdef = self._GetTrtGraph(infer_gdef, precision_mode, + dynamic_infer_engine) + self._VerifyGraphDef(graph_key, trt_infer_gdef, precision_mode, True, + dynamic_infer_engine) + result = self._RunGraph(graph_key, trt_infer_gdef, self._input, + infer_config) + self.assertAllClose(ref_result, result, rtol=1.e-03) + + def testIdempotence(self): + # Test that applying tensorrt optimizer or offline conversion tools multiple + # times to the same graph will result in same graph. + # TODO(aaroey): implement this. + pass + + +def GetTests(): + + def _GetTest(g, u, p, i, c): + + def _Test(self): + print("Running test with parameters: graph_key=%s, use_optimizer=%s, " + "precision_mode=%s, dynamic_infer_engine=%s, " + "dynamic_calib_engine=%s" % (g, u, p, i, c)) + self._RunTest(g, u, p, i, c) + + return _Test + + use_optimizer_options = [False, True] + precision_mode_options = [MODE_FP32, MODE_FP16, MODE_INT8] + dynamic_infer_engine_options = [False, True] + dynamic_calib_engine_options = [False, True] + for (graph_key, use_optimizer, precision_mode, + dynamic_infer_engine, dynamic_calib_engine) in itertools.product( + TEST_GRAPHS, use_optimizer_options, precision_mode_options, + dynamic_infer_engine_options, dynamic_calib_engine_options): + if precision_mode == MODE_INT8: + if not dynamic_calib_engine and dynamic_infer_engine: + # TODO(aaroey): test this case, the conversion from static calibration + # engine to dynamic inference engine should be a noop. + continue + if use_optimizer: + # TODO(aaroey): if use_optimizer is True we need to get the inference + # graphdef using custom python wrapper class, which is not currently + # supported yet. + continue + if not dynamic_calib_engine: + # TODO(aaroey): construction of static calibration engine is not + # supported yet. + continue + if dynamic_calib_engine and not dynamic_infer_engine: + # TODO(aaroey): construction of static inference engine using dynamic + # calibration engine is not supported yet. + continue + else: # In non int8 mode. + if dynamic_calib_engine: + # dynamic_calib_engine doesn't affect non-int8 modes, so just let + # related tests run once on dynamic_calib_engine=False. + continue + yield _GetTest(graph_key, use_optimizer, precision_mode, + dynamic_infer_engine, dynamic_calib_engine) if __name__ == "__main__": - googletest.main() + for index, t in enumerate(GetTests()): + setattr(TfTrtIntegrationTest, "testTfTRT_" + str(index), t) + test.main() diff --git a/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt b/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt index 6cd76ff340e..342a1f6b050 100644 --- a/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_GatherNd.pbtxt @@ -25,7 +25,7 @@ END (K-1)-dimensional tensor of indices into `params`, where each element defines a slice of `params`: - output[i_0, ..., i_{K-2}] = params[indices[i0, ..., i_{K-2}]] + output[\\(i_0, ..., i_{K-2}\\)] = params[indices[\\(i_0, ..., i_{K-2}\\)]] Whereas in @{tf.gather} `indices` defines slices into the first dimension of `params`, in `tf.gather_nd`, `indices` defines slices into the diff --git a/tensorflow/core/api_def/base_api/api_def_LinSpace.pbtxt b/tensorflow/core/api_def/base_api/api_def_LinSpace.pbtxt index 94a4ef574d9..f7068106627 100644 --- a/tensorflow/core/api_def/base_api/api_def_LinSpace.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_LinSpace.pbtxt @@ -3,19 +3,19 @@ op { in_arg { name: "start" description: <<END -First entry in the range. +0-D tensor. First entry in the range. END } in_arg { name: "stop" description: <<END -Last entry in the range. +0-D tensor. Last entry in the range. END } in_arg { name: "num" description: <<END -Number of values to generate. +0-D tensor. Number of values to generate. END } out_arg { diff --git a/tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt b/tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt index 0d680f65312..d7b56aec870 100644 --- a/tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_MatrixExponential.pbtxt @@ -18,7 +18,7 @@ END } summary: "Computes the matrix exponential of one or more square matrices:" description: <<END -exp(A) = \sum_{n=0}^\infty A^n/n! +\\(exp(A) = \sum_{n=0}^\infty A^n/n!\\) The exponential is computed using a combination of the scaling and squaring method and the Pade approximation. Details can be founds in: diff --git a/tensorflow/core/api_def/base_api/api_def_MatrixLogarithm.pbtxt b/tensorflow/core/api_def/base_api/api_def_MatrixLogarithm.pbtxt index a6c4d0d4008..9e80064d156 100644 --- a/tensorflow/core/api_def/base_api/api_def_MatrixLogarithm.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_MatrixLogarithm.pbtxt @@ -20,7 +20,7 @@ END summary: "Computes the matrix logarithm of one or more square matrices:" description: <<END -log(exp(A)) = A +\\(log(exp(A)) = A\\) This op is only defined for complex matrices. If A is positive-definite and real, then casting to a complex matrix, taking the logarithm and casting back diff --git a/tensorflow/core/api_def/base_api/api_def_ReduceJoin.pbtxt b/tensorflow/core/api_def/base_api/api_def_ReduceJoin.pbtxt index d13866ddaa1..b447d093778 100644 --- a/tensorflow/core/api_def/base_api/api_def_ReduceJoin.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ReduceJoin.pbtxt @@ -36,7 +36,7 @@ END summary: "Joins a string Tensor across the given dimensions." description: <<END Computes the string join across dimensions in the given string Tensor of shape -`[d_0, d_1, ..., d_n-1]`. Returns a new Tensor created by joining the input +`[\\(d_0, d_1, ..., d_{n-1}\\)]`. Returns a new Tensor created by joining the input strings with the given separator (default: empty string). Negative indices are counted backwards from the end, with `-1` being equivalent to `n - 1`. If indices are not specified, joins across all dimensions beginning from `n - 1` diff --git a/tensorflow/core/api_def/base_api/api_def_ScatterNdAdd.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScatterNdAdd.pbtxt index b0665ebf0e0..a9a76463146 100644 --- a/tensorflow/core/api_def/base_api/api_def_ScatterNdAdd.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ScatterNdAdd.pbtxt @@ -42,7 +42,7 @@ within a given variable according to `indices`. `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. `indices` must be integer tensor, containing indices into `ref`. -It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`. +It must be shape `\\([d_0, ..., d_{Q-2}, K]\\)` where `0 < K <= P`. The innermost dimension of `indices` (with length `K`) corresponds to indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th @@ -50,9 +50,7 @@ dimension of `ref`. `updates` is `Tensor` of rank `Q-1+P-K` with shape: -``` -[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]]. -``` +$$[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].$$ For example, say we want to add 4 scattered elements to a rank-1 tensor to 8 elements. In Python, that addition would look like this: diff --git a/tensorflow/core/api_def/base_api/api_def_ScatterNdNonAliasingAdd.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScatterNdNonAliasingAdd.pbtxt index e5c64c2b900..35116e5f6a2 100644 --- a/tensorflow/core/api_def/base_api/api_def_ScatterNdNonAliasingAdd.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ScatterNdNonAliasingAdd.pbtxt @@ -37,7 +37,7 @@ respect to both `input` and `updates`. `input` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. `indices` must be integer tensor, containing indices into `input`. -It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`. +It must be shape \\([d_0, ..., d_{Q-2}, K]\\) where `0 < K <= P`. The innermost dimension of `indices` (with length `K`) corresponds to indices into elements (if `K = P`) or `(P-K)`-dimensional slices @@ -45,9 +45,7 @@ indices into elements (if `K = P`) or `(P-K)`-dimensional slices `updates` is `Tensor` of rank `Q-1+P-K` with shape: -``` -[d_0, ..., d_{Q-2}, input.shape[K], ..., input.shape[P-1]]. -``` +$$[d_0, ..., d_{Q-2}, input.shape[K], ..., input.shape[P-1]].$$ For example, say we want to add 4 scattered elements to a rank-1 tensor to 8 elements. In Python, that addition would look like this: diff --git a/tensorflow/core/api_def/base_api/api_def_ScatterNdSub.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScatterNdSub.pbtxt index 333db017f56..99e5c4908bf 100644 --- a/tensorflow/core/api_def/base_api/api_def_ScatterNdSub.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ScatterNdSub.pbtxt @@ -42,7 +42,7 @@ within a given variable according to `indices`. `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. `indices` must be integer tensor, containing indices into `ref`. -It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`. +It must be shape \\([d_0, ..., d_{Q-2}, K]\\) where `0 < K <= P`. The innermost dimension of `indices` (with length `K`) corresponds to indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th @@ -50,9 +50,7 @@ dimension of `ref`. `updates` is `Tensor` of rank `Q-1+P-K` with shape: -``` -[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]]. -``` +$$[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].$$ For example, say we want to subtract 4 scattered elements from a rank-1 tensor with 8 elements. In Python, that subtraction would look like this: diff --git a/tensorflow/core/api_def/base_api/api_def_ScatterNdUpdate.pbtxt b/tensorflow/core/api_def/base_api/api_def_ScatterNdUpdate.pbtxt index 33d98262d54..cb57c171b97 100644 --- a/tensorflow/core/api_def/base_api/api_def_ScatterNdUpdate.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_ScatterNdUpdate.pbtxt @@ -42,7 +42,7 @@ variable according to `indices`. `ref` is a `Tensor` with rank `P` and `indices` is a `Tensor` of rank `Q`. `indices` must be integer tensor, containing indices into `ref`. -It must be shape `[d_0, ..., d_{Q-2}, K]` where `0 < K <= P`. +It must be shape \\([d_0, ..., d_{Q-2}, K]\\) where `0 < K <= P`. The innermost dimension of `indices` (with length `K`) corresponds to indices into elements (if `K = P`) or slices (if `K < P`) along the `K`th @@ -50,9 +50,7 @@ dimension of `ref`. `updates` is `Tensor` of rank `Q-1+P-K` with shape: -``` -[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]]. -``` +$$[d_0, ..., d_{Q-2}, ref.shape[K], ..., ref.shape[P-1]].$$ For example, say we want to update 4 scattered elements to a rank-1 tensor to 8 elements. In Python, that update would look like this: diff --git a/tensorflow/core/api_def/base_api/api_def_Softmax.pbtxt b/tensorflow/core/api_def/base_api/api_def_Softmax.pbtxt index 43884824c9e..b51b468c3da 100644 --- a/tensorflow/core/api_def/base_api/api_def_Softmax.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_Softmax.pbtxt @@ -16,6 +16,6 @@ END description: <<END For each batch `i` and class `j` we have - softmax[i, j] = exp(logits[i, j]) / sum_j(exp(logits[i, j])) + $$softmax[i, j] = exp(logits[i, j]) / sum_j(exp(logits[i, j]))$$ END } diff --git a/tensorflow/core/api_def/base_api/api_def_SparseApplyAdagrad.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseApplyAdagrad.pbtxt index 1698e2def07..06409d8db2f 100644 --- a/tensorflow/core/api_def/base_api/api_def_SparseApplyAdagrad.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_SparseApplyAdagrad.pbtxt @@ -47,7 +47,7 @@ END summary: "Update relevant entries in \'*var\' and \'*accum\' according to the adagrad scheme." description: <<END That is for rows we have grad for, we update var and accum as follows: -accum += grad * grad -var -= lr * grad * (1 / sqrt(accum)) +$$accum += grad * grad$$ +$$var -= lr * grad * (1 / sqrt(accum))$$ END } diff --git a/tensorflow/core/api_def/base_api/api_def_SparseApplyCenteredRMSProp.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseApplyCenteredRMSProp.pbtxt index 2c6a36bf456..b3f2d3ea62b 100644 --- a/tensorflow/core/api_def/base_api/api_def_SparseApplyCenteredRMSProp.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_SparseApplyCenteredRMSProp.pbtxt @@ -83,8 +83,8 @@ mean_square = decay * mean_square + (1-decay) * gradient ** 2 mean_grad = decay * mean_grad + (1-decay) * gradient Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2) -ms <- rho * ms_{t-1} + (1-rho) * grad * grad -mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) -var <- var - mom +$$ms <- rho * ms_{t-1} + (1-rho) * grad * grad$$ +$$mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)$$ +$$var <- var - mom$$ END } diff --git a/tensorflow/core/api_def/base_api/api_def_SparseApplyFtrl.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseApplyFtrl.pbtxt index 524b5c5a47d..9a6b6bca5f3 100644 --- a/tensorflow/core/api_def/base_api/api_def_SparseApplyFtrl.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_SparseApplyFtrl.pbtxt @@ -71,10 +71,10 @@ END summary: "Update relevant entries in \'*var\' according to the Ftrl-proximal scheme." description: <<END That is for rows we have grad for, we update var, accum and linear as follows: -accum_new = accum + grad * grad -linear += grad + (accum_new^(-lr_power) - accum^(-lr_power)) / lr * var -quadratic = 1.0 / (accum_new^(lr_power) * lr) + 2 * l2 -var = (sign(linear) * l1 - linear) / quadratic if |linear| > l1 else 0.0 -accum = accum_new +$$accum_new = accum + grad * grad$$ +$$linear += grad + (accum_{new}^{-lr_{power}} - accum^{-lr_{power}} / lr * var$$ +$$quadratic = 1.0 / (accum_{new}^{lr_{power}} * lr) + 2 * l2$$ +$$var = (sign(linear) * l1 - linear) / quadratic\ if\ |linear| > l1\ else\ 0.0$$ +$$accum = accum_{new}$$ END } diff --git a/tensorflow/core/api_def/base_api/api_def_SparseApplyMomentum.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseApplyMomentum.pbtxt index 8d9ac9ea3fa..17dbb488de0 100644 --- a/tensorflow/core/api_def/base_api/api_def_SparseApplyMomentum.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_SparseApplyMomentum.pbtxt @@ -64,7 +64,7 @@ Set use_nesterov = True if you want to use Nesterov momentum. That is for rows we have grad for, we update var and accum as follows: -accum = accum * momentum + grad -var -= lr * accum +$$accum = accum * momentum + grad$$ +$$var -= lr * accum$$ END } diff --git a/tensorflow/core/api_def/base_api/api_def_SparseApplyProximalAdagrad.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseApplyProximalAdagrad.pbtxt index 80541b91c7e..0b24f2ddd10 100644 --- a/tensorflow/core/api_def/base_api/api_def_SparseApplyProximalAdagrad.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_SparseApplyProximalAdagrad.pbtxt @@ -58,9 +58,9 @@ END summary: "Sparse update entries in \'*var\' and \'*accum\' according to FOBOS algorithm." description: <<END That is for rows we have grad for, we update var and accum as follows: -accum += grad * grad -prox_v = var -prox_v -= lr * grad * (1 / sqrt(accum)) -var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0} +$$accum += grad * grad$$ +$$prox_v = var$$ +$$prox_v -= lr * grad * (1 / sqrt(accum))$$ +$$var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0}$$ END } diff --git a/tensorflow/core/api_def/base_api/api_def_SparseApplyProximalGradientDescent.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseApplyProximalGradientDescent.pbtxt index 5200e5516df..9dc53860e52 100644 --- a/tensorflow/core/api_def/base_api/api_def_SparseApplyProximalGradientDescent.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_SparseApplyProximalGradientDescent.pbtxt @@ -52,7 +52,7 @@ END summary: "Sparse update \'*var\' as FOBOS algorithm with fixed learning rate." description: <<END That is for rows we have grad for, we update var as follows: -prox_v = var - alpha * grad -var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0} +$$prox_v = var - alpha * grad$$ +$$var = sign(prox_v)/(1+alpha*l2) * max{|prox_v|-alpha*l1,0}$$ END } diff --git a/tensorflow/core/api_def/base_api/api_def_SparseApplyRMSProp.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseApplyRMSProp.pbtxt index a4dbd608b89..ee9f57fa9d0 100644 --- a/tensorflow/core/api_def/base_api/api_def_SparseApplyRMSProp.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_SparseApplyRMSProp.pbtxt @@ -71,8 +71,8 @@ and mom will not update in iterations during which the grad is zero. mean_square = decay * mean_square + (1-decay) * gradient ** 2 Delta = learning_rate * gradient / sqrt(mean_square + epsilon) -ms <- rho * ms_{t-1} + (1-rho) * grad * grad -mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) -var <- var - mom +$$ms <- rho * ms_{t-1} + (1-rho) * grad * grad$$ +$$mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon)$$ +$$var <- var - mom$$ END } diff --git a/tensorflow/core/api_def/base_api/api_def_SparseSliceGrad.pbtxt b/tensorflow/core/api_def/base_api/api_def_SparseSliceGrad.pbtxt new file mode 100644 index 00000000000..51af6adcf19 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_SparseSliceGrad.pbtxt @@ -0,0 +1,40 @@ +op { + graph_op_name: "SparseSliceGrad" + in_arg { + name: "backprop_val_grad" + description: <<END +1-D. The gradient with respect to +the non-empty values of the sliced `SparseTensor`. +END + } + in_arg { + name: "input_indices" + description: <<END +2-D. The `indices` of the input `SparseTensor`. +END + } + in_arg { + name: "input_start" + description: <<END +1-D. tensor represents the start of the slice. +END + } + in_arg { + name: "output_indices" + description: <<END +2-D. The `indices` of the sliced `SparseTensor`. +END + } + out_arg { + name: "val_grad" + description: <<END +1-D. The gradient with respect to the non-empty values of input `SparseTensor`. +END + } + summary: "The gradient operator for the SparseSlice op." + description: <<END +This op takes in the upstream gradient w.r.t. non-empty values of +the sliced `SparseTensor`, and outputs the gradients w.r.t. +the non-empty values of input `SparseTensor`. +END +} diff --git a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt index eb5d0d12472..9aeabd030d6 100644 --- a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentSum.pbtxt @@ -20,7 +20,7 @@ Read @{$math_ops#Segmentation$the section on segmentation} for an explanation of segments. Computes a tensor such that -`(output[i] = sum_{j...} data[j...]` where the sum is over tuples `j...` such +\\(output[i] = sum_{j...} data[j...]\\) where the sum is over tuples `j...` such that `segment_ids[j...] == i`. Unlike `SegmentSum`, `segment_ids` need not be sorted and need not cover all values in the full range of valid values. diff --git a/tensorflow/core/api_def/python_api/api_def_BroadcastTo.pbtxt b/tensorflow/core/api_def/python_api/api_def_BroadcastTo.pbtxt deleted file mode 100644 index 083eeced81d..00000000000 --- a/tensorflow/core/api_def/python_api/api_def_BroadcastTo.pbtxt +++ /dev/null @@ -1,4 +0,0 @@ -op { - graph_op_name: "BroadcastTo" - visibility: HIDDEN -} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseSliceGrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseSliceGrad.pbtxt new file mode 100644 index 00000000000..6ea8df46eca --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseSliceGrad.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseSliceGrad" + visibility: HIDDEN +} diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 466f6014710..07360d594bd 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -3941,6 +3941,7 @@ cc_library( ":sparse_reduce_op", ":sparse_reorder_op", ":sparse_reshape_op", + ":sparse_slice_grad_op", ":sparse_slice_op", ":sparse_softmax", ":sparse_sparse_binary_op_shared", @@ -4026,6 +4027,12 @@ tf_kernel_library( ], ) +tf_kernel_library( + name = "sparse_slice_grad_op", + prefix = "sparse_slice_grad_op", + deps = SPARSE_DEPS, +) + tf_kernel_library( name = "sparse_slice_op", prefix = "sparse_slice_op", diff --git a/tensorflow/core/kernels/conv_ops_test.cc b/tensorflow/core/kernels/conv_ops_test.cc index 8afe6a2cbdf..9acc725ba80 100644 --- a/tensorflow/core/kernels/conv_ops_test.cc +++ b/tensorflow/core/kernels/conv_ops_test.cc @@ -221,7 +221,7 @@ class FusedResizePadConvOpTest : public OpsTestBase { std::vector<Tensor> fused_tensors; TF_ASSERT_OK(session->Run({}, {"fused_conv"}, {}, &fused_tensors)); - test::ExpectTensorNear<float>(unfused_tensors[0], fused_tensors[0], 1e-5); + test::ExpectClose(unfused_tensors[0], fused_tensors[0]); } void CompareFusedPadOnlyAndSeparate(int input_width, int input_height, @@ -269,7 +269,7 @@ class FusedResizePadConvOpTest : public OpsTestBase { std::vector<Tensor> fused_tensors; TF_ASSERT_OK(session->Run({}, {"fused_conv"}, {}, &fused_tensors)); - test::ExpectTensorNear<float>(unfused_tensors[0], fused_tensors[0], 1e-5); + test::ExpectClose(unfused_tensors[0], fused_tensors[0]); } }; diff --git a/tensorflow/core/kernels/mkl_concat_op.cc b/tensorflow/core/kernels/mkl_concat_op.cc index 31d1b949ef0..d054f0d404a 100644 --- a/tensorflow/core/kernels/mkl_concat_op.cc +++ b/tensorflow/core/kernels/mkl_concat_op.cc @@ -704,14 +704,14 @@ class MklConcatOp : public OpKernel { if (input_tensors[k].NumElements() == 0) continue; - auto src_dims = TFShapeToMklDnnDims( - mkl_input_shapes[k].GetTfShape()); auto src_md = mkl_input_shapes[k].GetMklLayout(); srcs[k].SetUsrMem(src_md, &input_tensors[k]); - if (src_md.data.format != mkl_common_format) + if (src_md.data.format != mkl_common_format) { + memory::dims src_dims(src_md.data.dims, &src_md.data.dims[src_md.data.ndims]); src_md = memory::desc(src_dims, MklDnnType<T>(), mkl_common_format); + } srcs_pd.push_back(memory::primitive_desc(src_md, cpu_engine)); } diff --git a/tensorflow/core/kernels/sparse_slice_grad_op.cc b/tensorflow/core/kernels/sparse_slice_grad_op.cc new file mode 100644 index 00000000000..90a39ed818f --- /dev/null +++ b/tensorflow/core/kernels/sparse_slice_grad_op.cc @@ -0,0 +1,126 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_util.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/util/sparse/sparse_tensor.h" + +namespace tensorflow { + +template <typename T> +class SparseSliceGradOp : public OpKernel { + public: + explicit SparseSliceGradOp(OpKernelConstruction *ctx) : OpKernel(ctx) {} + + void Compute(OpKernelContext *ctx) override { + const Tensor *backprop_val_grad, *input_indices, *output_indices, *input_start; + OP_REQUIRES_OK(ctx, ctx->input("backprop_val_grad", &backprop_val_grad)); + OP_REQUIRES_OK(ctx, ctx->input("input_indices", &input_indices)); + OP_REQUIRES_OK(ctx, ctx->input("input_start", &input_start)); + OP_REQUIRES_OK(ctx, ctx->input("output_indices", &output_indices)); + + OP_REQUIRES(ctx, + TensorShapeUtils::IsMatrix(input_indices->shape()) && + TensorShapeUtils::IsMatrix(output_indices->shape()), + errors::InvalidArgument( + "Input and output indices should be matrices " + "but received shapes: ", + input_indices->shape().DebugString(), " and ", + output_indices->shape().DebugString())); + OP_REQUIRES( + ctx, TensorShapeUtils::IsVector(backprop_val_grad->shape()), + errors::InvalidArgument( + "Input backprop_val_grad should be a vector but received shape: ", + backprop_val_grad->shape().DebugString())); + OP_REQUIRES( + ctx, + input_indices->dim_size(1) == output_indices->dim_size(1), + errors::InvalidArgument("The input and output should have the same " + "ndims: got: ", input_indices->dim_size(1), " and ", + output_indices->dim_size(1))); + OP_REQUIRES( + ctx, output_indices->dim_size(0) <= input_indices->dim_size(0), + errors::InvalidArgument("# rows of output_indices should be not greater " + "than of input_indices, got ", + output_indices->dim_size(0), " and ", + input_indices->dim_size(0))); + OP_REQUIRES( + ctx, backprop_val_grad->NumElements() == output_indices->dim_size(0), + errors::InvalidArgument("# elements of backprop_val_grad and # rows of " + "output_indices should match (#nnz of sum): got ", + backprop_val_grad->NumElements(), " and ", + output_indices->dim_size(0))); + OP_REQUIRES(ctx, TensorShapeUtils::IsVector(input_start->shape()), + errors::InvalidArgument( + "The input_start should be a vector but received shape ", + input_start->shape().DebugString())); + + const int num_dims = input_indices->dim_size(1); + OP_REQUIRES(ctx, num_dims == input_start->NumElements(), + errors::InvalidArgument( + "Expected input_start to be a vector of length ", num_dims, + " but got length ", input_start->NumElements())); + + const int64 input_nnz = input_indices->dim_size(0); + + Tensor *val_grad; + OP_REQUIRES_OK(ctx, + ctx->allocate_output(0, TensorShape({input_nnz}), &val_grad)); + + T *val_grad_flat = val_grad->flat<T>().data(); + const T *backprop_val_grad_flat = backprop_val_grad->flat<T>().data(); + memset(val_grad_flat, 0, sizeof(T) * input_nnz); + + // Fill gradients for position where indices of input and output are same. + const auto input_indices_mat = input_indices->matrix<int64>(); + const auto output_indices_mat = output_indices->matrix<int64>(); + const auto input_start_flat = input_start->flat<int64>(); + int64 j = 0; + for (int64 i = 0; i < input_nnz && j < backprop_val_grad->NumElements(); + ++i) { + bool is_same = true; + for (int d = 0; d < num_dims; ++d) { + const int64 a = input_indices_mat(i, d); + const int64 b = output_indices_mat(j, d); + const int64 offset = input_start_flat(d); + if (a != b + offset) { + is_same = false; + break; + } + } + if (is_same) { + val_grad_flat[i] = backprop_val_grad_flat[j]; + ++j; + } + } + OP_REQUIRES( + ctx, backprop_val_grad->NumElements() == j, + errors::Internal("Elements of backprop_val_grad aren't all propagated. " + "Num elements:", backprop_val_grad->NumElements(), + ", used: ", j)); + } +}; + +#define REGISTER_KERNELS(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("SparseSliceGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \ + SparseSliceGradOp<type>) + +TF_CALL_NUMBER_TYPES(REGISTER_KERNELS); +#undef REGISTER_KERNELS +} // namespace tensorflow diff --git a/tensorflow/core/lib/db/sqlite_test.cc b/tensorflow/core/lib/db/sqlite_test.cc index 1e88323d017..15900559601 100644 --- a/tensorflow/core/lib/db/sqlite_test.cc +++ b/tensorflow/core/lib/db/sqlite_test.cc @@ -73,6 +73,21 @@ TEST_F(SqliteTest, InsertAndSelectDouble) { EXPECT_EQ(1, stmt.ColumnInt(1)); } +#ifdef DSQLITE_ENABLE_JSON1 +TEST_F(SqliteTest, Json1Extension) { + string s1 = "{\"key\": 42}"; + string s2 = "{\"key\": \"value\"}"; + auto stmt = db_->PrepareOrDie("INSERT INTO T (a, b) VALUES (?, ?)"); + stmt.BindText(1, s1); + stmt.BindText(2, s2); + TF_ASSERT_OK(stmt.StepAndReset()); + stmt = db_->PrepareOrDie("SELECT json_extract(a, '$.key'), json_extract(b, '$.key') FROM T"); + TF_ASSERT_OK(stmt.Step(&is_done_)); + EXPECT_EQ(42, stmt.ColumnInt(0)); + EXPECT_EQ("value", stmt.ColumnString(1)); +} +#endif //DSQLITE_ENABLE_JSON1 + TEST_F(SqliteTest, NulCharsInString) { string s; // XXX: Want to write {2, '\0'} but not sure why not. s.append(static_cast<size_t>(2), '\0'); diff --git a/tensorflow/core/ops/sparse_ops.cc b/tensorflow/core/ops/sparse_ops.cc index acc8c782efe..bc0cb2095da 100644 --- a/tensorflow/core/ops/sparse_ops.cc +++ b/tensorflow/core/ops/sparse_ops.cc @@ -302,6 +302,20 @@ REGISTER_OP("SparseSplit") return Status::OK(); }); +REGISTER_OP("SparseSliceGrad") + .Input("backprop_val_grad: T") + .Input("input_indices: int64") + .Input("input_start: int64") + .Input("output_indices: int64") + .Output("val_grad: T") + .Attr("T: numbertype") + .SetShapeFn([](InferenceContext* c) { + ShapeHandle indices; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 2, &indices)); + c->set_output(0, c->Vector(c->Dim(indices, 0))); + return Status::OK(); + }); + REGISTER_OP("SparseSlice") .Input("indices: int64") .Input("values: T") diff --git a/tensorflow/core/ops/sparse_ops_test.cc b/tensorflow/core/ops/sparse_ops_test.cc index 0df33204842..6a9b5ce4d31 100644 --- a/tensorflow/core/ops/sparse_ops_test.cc +++ b/tensorflow/core/ops/sparse_ops_test.cc @@ -52,6 +52,18 @@ TEST(SparseOpsTest, SparseAddGrad_ShapeFn) { INFER_OK(op, "?;[?,?];[?,?];?", "[d1_0];[d2_0]"); } +TEST(SparseOpsTest, SparseSliceGrad_ShapeFn) { + ShapeInferenceTestOp op("SparseSliceGrad"); + + // Rank checks. + INFER_ERROR("must be rank 2", op, "?;[1];?;?"); + + INFER_OK(op, "?;?;?;?", "[?]"); + + // input[1].dim(0) determine output. + INFER_OK(op, "?;[?,?];?;?", "[d1_0]"); +} + TEST(SparseOpsTest, SparseReorder_ShapeFn) { ShapeInferenceTestOp op("SparseReorder"); diff --git a/tensorflow/docs_src/get_started/_index.yaml b/tensorflow/docs_src/get_started/_index.yaml index 277fc852fb5..40608048927 100644 --- a/tensorflow/docs_src/get_started/_index.yaml +++ b/tensorflow/docs_src/get_started/_index.yaml @@ -66,9 +66,7 @@ landing_page: } </style> <div class="devsite-landing-row-item-description"> - <a href="#"> - <h3 class="hide-from-toc">Learn and use ML</h3> - </a> + <h3 class="hide-from-toc">Learn and use ML</h3> <div class="devsite-landing-row-item-description-content"> <p> The high-level Keras API provides building blocks to create and @@ -117,9 +115,7 @@ landing_page: - items: - custom_html: > <div class="devsite-landing-row-item-description" style="border-right: 2px solid #eee;"> - <a href="https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/examples/notebooks"> - <h3 class="hide-from-toc">Research and experimentation</h3> - </a> + <h3 class="hide-from-toc">Research and experimentation</h3> <div class="devsite-landing-row-item-description-content"> <p> Eager execution provides an imperative, define-by-run interface for advanced operations. Write custom layers, forward passes, and training loops with autoādifferentiation. Start with @@ -170,9 +166,7 @@ landing_page: </div> - custom_html: > <div class="devsite-landing-row-item-description"> - <a href="#"> - <h3 class="hide-from-toc">ML at production scale</h3> - </a> + <h3 class="hide-from-toc">ML at production scale</h3> <div class="devsite-landing-row-item-description-content"> <p> Estimators can train large models on multiple machines in a diff --git a/tensorflow/docs_src/get_started/leftnav_files b/tensorflow/docs_src/get_started/leftnav_files index 5c400a67f0d..99d2b2c3e1f 100644 --- a/tensorflow/docs_src/get_started/leftnav_files +++ b/tensorflow/docs_src/get_started/leftnav_files @@ -1,7 +1,7 @@ ### Learn and use ML -basic_classification.md -basic_text_classification.md -basic_regression.md +basic_classification.md: Basic classification +basic_text_classification.md: Text classification +basic_regression.md: Regression overfit_and_underfit.md save_and_restore_models.md next_steps.md diff --git a/tensorflow/docs_src/get_started/next_steps.md b/tensorflow/docs_src/get_started/next_steps.md index 6318a39c6cd..01c9f7204a7 100644 --- a/tensorflow/docs_src/get_started/next_steps.md +++ b/tensorflow/docs_src/get_started/next_steps.md @@ -1,4 +1,4 @@ -# Next Steps +# Next steps ## Learn more about TensorFlow diff --git a/tensorflow/docs_src/guide/custom_estimators.md b/tensorflow/docs_src/guide/custom_estimators.md index fb20b35c128..a63e2bafb36 100644 --- a/tensorflow/docs_src/guide/custom_estimators.md +++ b/tensorflow/docs_src/guide/custom_estimators.md @@ -362,10 +362,10 @@ model's loss. This is the that will be optimized. We can calculate the loss by calling @{tf.losses.sparse_softmax_cross_entropy}. -The value returned by this function will be lowest, approximately 0, -probability of the correct class (at index `label`) is near 1.0. The loss value -returned is progressively larger as the probability of the correct class -decreases. +The value returned by this function will be approximately 0 at lowest, +when the probability of the correct class (at index `label`) is near 1.0. +The loss value returned is progressively larger as the probability of the +correct class decreases. This function returns the average over the whole batch. diff --git a/tensorflow/docs_src/guide/keras.md b/tensorflow/docs_src/guide/keras.md index 83172dab7fa..1d846df1044 100644 --- a/tensorflow/docs_src/guide/keras.md +++ b/tensorflow/docs_src/guide/keras.md @@ -35,7 +35,7 @@ from tensorflow import keras * The `tf.keras` version in the latest TensorFlow release might not be the same as the latest `keras` version from PyPI. Check `tf.keras.__version__`. * When [saving a model's weights](#weights_only), `tf.keras` defaults to the - [checkpoint format](../get_started/checkpoints.md). Pass `save_format='h5'` to + [checkpoint format](./checkpoints.md). Pass `save_format='h5'` to use HDF5. ## Build a simple model @@ -221,7 +221,7 @@ To *evaluate* the inference-mode loss and metrics for the data provided: ```python model.evaluate(x, y, batch_size=32) -model.evaluate(dataset, steps=30 +model.evaluate(dataset, steps=30) ``` And to *predict* the output of the last layer in inference for the data provided, @@ -442,7 +442,7 @@ model.load_weights('my_model') ``` By default, this saves the model's weights in the -[TensorFlow checkpoint](../get_started/checkpoints.md) file format. Weights can +[TensorFlow checkpoint](./checkpoints.md) file format. Weights can also be saved to the Keras HDF5 format (the default for the multi-backend implementation of Keras): @@ -581,15 +581,6 @@ model.compile(loss='binary_crossentropy', optimizer=optimizer) model.summary() ``` -Convert the Keras model to a `tf.estimator.Estimator` instance: - -```python -keras_estimator = keras.estimator.model_to_estimator( - keras_model=model, - config=config, - model_dir='/tmp/model_dir') -``` - Define an *input pipeline*. The `input_fn` returns a `tf.data.Dataset` object used to distribute the data across multiple devicesāwith each device processing a slice of the input batch. @@ -615,6 +606,15 @@ strategy = tf.contrib.distribute.MirroredStrategy() config = tf.estimator.RunConfig(train_distribute=strategy) ``` +Convert the Keras model to a `tf.estimator.Estimator` instance: + +```python +keras_estimator = keras.estimator.model_to_estimator( + keras_model=model, + config=config, + model_dir='/tmp/model_dir') +``` + Finally, train the `Estimator` instance by providing the `input_fn` and `steps` arguments: diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index e55520ceaa6..a641dc3a6f5 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -289,17 +289,27 @@ Note: If you're only interested in building the libraries for the TensorFlow C or Java APIs, see [Build the C or Java libraries](#BuildCorJava), you do not need to build the pip package in that case. -To build a pip package for TensorFlow with CPU-only support, -you would typically invoke the following command: +### CPU-only support + +To build a pip package for TensorFlow with CPU-only support: <pre> -$ <b>bazel build --config=opt //tensorflow/tools/pip_package:build_pip_package</b> +$ bazel build --config=opt //tensorflow/tools/pip_package:build_pip_package </pre> -To build a pip package for TensorFlow with GPU support, -invoke the following command: +To build a pip package for TensorFlow with CPU-only support for the IntelĀ® MKL-DNN: -<pre>$ <b>bazel build --config=opt --config=cuda //tensorflow/tools/pip_package:build_pip_package</b> </pre> +<pre> +$ bazel build --config=mkl --config=opt //tensorflow/tools/pip_package:build_pip_package +</pre> + +### GPU support + +To build a pip package for TensorFlow with GPU support: + +<pre> +$ bazel build --config=opt --config=cuda //tensorflow/tools/pip_package:build_pip_package +</pre> **NOTE on gcc 5 or later:** the binary pip packages available on the TensorFlow website are built with gcc 4, which uses the older ABI. To diff --git a/tensorflow/docs_src/mobile/tflite/demo_android.md b/tensorflow/docs_src/mobile/tflite/demo_android.md index 1980fdeb66c..fdf0bcf3c11 100644 --- a/tensorflow/docs_src/mobile/tflite/demo_android.md +++ b/tensorflow/docs_src/mobile/tflite/demo_android.md @@ -44,23 +44,22 @@ app: Android Studio project. * Install all the Gradle extensions it requests. -To get a model, either: +Now you can build and run the demo app. -* Download the quantized [Mobilenet TensorFlow Lite model](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip) - and unzip and copy `mobilenet_quant_v1_224.tflite` to the assets directory: - `tensorflow/contrib/lite/java/demo/app/src/main/assets/`. -* Or, download the floating point [Inception-v3 model](https://storage.googleapis.com/download.tensorflow.org/models/tflite/inception_v3_slim_2016_android_2017_11_10.zip) - and unzip and copy `inceptionv3_non_slim_2015.tflite` to the assets - directory. Change the chosen classifier in - [Camera2BasicFragment.java](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java)<br> - from: `classifier = new ImageClassifierQuantizedMobileNet(getActivity());`<br> - to: `classifier = new ImageClassifierFloatInception(getActivity());`. - -Now you can build and run the demo app. +The build process downloads the quantized [Mobilenet TensorFlow Lite model](https://storage.googleapis.com/download.tensorflow.org/models/tflite/mobilenet_v1_224_android_quant_2017_11_08.zip), and unzips it into the assets directory: `tensorflow/contrib/lite/java/demo/app/src/main/assets/`. Some additional details are available on the [TF Lite Android App page](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite/java/demo/README.md). +### Using other models + +To use a different model: +* Download the floating point [Inception-v3 model](https://storage.googleapis.com/download.tensorflow.org/models/tflite/inception_v3_slim_2016_android_2017_11_10.zip). +* Unzip and copy `inceptionv3_non_slim_2015.tflite` to the assets directory. +* Change the chosen classifier in [Camera2BasicFragment.java](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/Camera2BasicFragment.java)<br> + from: `classifier = new ImageClassifierQuantizedMobileNet(getActivity());`<br> + to: `classifier = new ImageClassifierFloatInception(getActivity());`. + ## Build TensorFlow Lite and the demo app from source diff --git a/tensorflow/docs_src/tutorials/layers.md b/tensorflow/docs_src/tutorials/layers.md index 212e3376377..791909f5fd5 100644 --- a/tensorflow/docs_src/tutorials/layers.md +++ b/tensorflow/docs_src/tutorials/layers.md @@ -470,51 +470,18 @@ as the loss metric. The following code calculates cross entropy when the model runs in either `TRAIN` or `EVAL` mode: ```python -onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=10) -loss = tf.losses.softmax_cross_entropy( - onehot_labels=onehot_labels, logits=logits) +loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits) ``` Let's take a closer look at what's happening above. -Our `labels` tensor contains a list of predictions for our examples, e.g. `[1, -9, ...]`. In order to calculate cross-entropy, first we need to convert `labels` -to the corresponding -[one-hot encoding](https://www.quora.com/What-is-one-hot-encoding-and-when-is-it-used-in-data-science): +Our `labels` tensor contains a list of prediction indices for our examples, e.g. `[1, +9, ...]`. `logits` contains the linear outputs of our last layer. -```none -[[0, 1, 0, 0, 0, 0, 0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 0, 0, 1], - ...] -``` +`tf.losses.sparse_softmax_cross_entropy`, calculates the softmax crossentropy +(aka: categorical crossentropy, negative log-likelihood) from these two inputs +in an efficient, numerically stable way. -We use the @{tf.one_hot} function -to perform this conversion. `tf.one_hot()` has two required arguments: - -* `indices`. The locations in the one-hot tensor that will have "on - values"āi.e., the locations of `1` values in the tensor shown above. -* `depth`. The depth of the one-hot tensorāi.e., the number of target classes. - Here, the depth is `10`. - -The following code creates the one-hot tensor for our labels, `onehot_labels`: - -```python -onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=10) -``` - -Because `labels` contains a series of values from 0ā9, `indices` is just our -`labels` tensor, with values cast to integers. The `depth` is `10` because we -have 10 possible target classes, one for each digit. - -Next, we compute cross-entropy of `onehot_labels` and the softmax of the -predictions from our logits layer. `tf.losses.softmax_cross_entropy()` takes -`onehot_labels` and `logits` as arguments, performs softmax activation on -`logits`, calculates cross-entropy, and returns our `loss` as a scalar `Tensor`: - -```python -loss = tf.losses.softmax_cross_entropy( - onehot_labels=onehot_labels, logits=logits) -``` ### Configure the Training Op diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index b2dbdafc5f9..7f1f0970a6f 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -11210,7 +11210,7 @@ func SampleDistortedBoundingBoxAspectRatioRange(value []float32) SampleDistorted // SampleDistortedBoundingBoxAreaRange sets the optional area_range attribute to value. // // value: The cropped area of the image must contain a fraction of the -// supplied image within in this range. +// supplied image within this range. // If not specified, defaults to <f:0.05 f:1 > func SampleDistortedBoundingBoxAreaRange(value []float32) SampleDistortedBoundingBoxAttr { return func(m optionalAttr) { @@ -17969,9 +17969,10 @@ func SparseFillEmptyRowsGrad(scope *Scope, reverse_index_map tf.Output, grad_val } // Computes scaled exponential linear: `scale * alpha * (exp(features) - 1)` -// // if < 0, `scale * features` otherwise. // +// Assumes weights to have zero mean and variance 1.0 / fan_in. +// // See [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) func Selu(scope *Scope, features tf.Output) (activations tf.Output) { if scope.Err() != nil { @@ -21655,7 +21656,7 @@ func ImageSummaryBadColor(value tf.Tensor) ImageSummaryAttr { // generated sequentially as '*tag*/image/0', '*tag*/image/1', etc. // // The `bad_color` argument is the color to use in the generated images for -// non-finite input values. It is a `unit8` 1-D tensor of length `channels`. +// non-finite input values. It is a `uint8` 1-D tensor of length `channels`. // Each element must be in the range `[0, 255]` (It represents the value of a // pixel in the output image). Non-finite values in the input tensor are // replaced by this tensor in the output image. The default value is the color @@ -24048,7 +24049,7 @@ func SampleDistortedBoundingBoxV2AspectRatioRange(value []float32) SampleDistort // SampleDistortedBoundingBoxV2AreaRange sets the optional area_range attribute to value. // // value: The cropped area of the image must contain a fraction of the -// supplied image within in this range. +// supplied image within this range. // If not specified, defaults to <f:0.05 f:1 > func SampleDistortedBoundingBoxV2AreaRange(value []float32) SampleDistortedBoundingBoxV2Attr { return func(m optionalAttr) { @@ -24744,8 +24745,7 @@ type DecodeProtoV2Attr func(optionalAttr) // If not specified, defaults to "local://" func DecodeProtoV2DescriptorSource(value string) DecodeProtoV2Attr { return func(m optionalAttr) { - m["descriptor_source"] = value - } + m["descriptor_source"] = value } } // DecodeProtoV2MessageFormat sets the optional message_format attribute to value. diff --git a/tensorflow/java/src/gen/cc/source_writer.cc b/tensorflow/java/src/gen/cc/source_writer.cc index 66401bdba72..8e5fba7e32f 100644 --- a/tensorflow/java/src/gen/cc/source_writer.cc +++ b/tensorflow/java/src/gen/cc/source_writer.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include <string> #include <algorithm> #include <list> #include <string> diff --git a/tensorflow/java/src/main/java/org/tensorflow/Graph.java b/tensorflow/java/src/main/java/org/tensorflow/Graph.java index d4fd3db5f73..7d19696749b 100644 --- a/tensorflow/java/src/main/java/org/tensorflow/Graph.java +++ b/tensorflow/java/src/main/java/org/tensorflow/Graph.java @@ -143,6 +143,82 @@ public final class Graph implements AutoCloseable { } } + /** + * Adds operations to compute the partial derivatives of sum of {@code y}s w.r.t {@code x}s, + * i.e., {@code d(y_1 + y_2 + ...)/dx_1, d(y_1 + y_2 + ...)/dx_2...} + * <p> + * {@code dx} are used as initial gradients (which represent the symbolic partial derivatives of some loss function + * {@code L} w.r.t. {@code y}). {@code dx} must be null or have size of {@code y}. + * <p> + * If {@code dx} is null, the implementation will use dx of {@link org.tensorflow.op.core.OnesLike OnesLike} for all + * shapes in {@code y}. + * + * @param y output of the function to derive + * @param x inputs of the function for which partial derivatives are computed + * @param dx if not null, the partial derivatives of some loss function {@code L} w.r.t. {@code y} + * @return the partial derivatives {@code dy} with the size of {@code x} + */ + public Output<?>[] addGradients(Output<?>[] y, Output<?>[] x, Output<?>[] dx) { + Output<?>[] dy = new Output<?>[x.length]; + final long[] yHandles = new long[y.length]; + final int[] yIndices = new int[y.length]; + final long[] xHandles = new long[x.length]; + final int[] xIndices = new int[x.length]; + long[] dxHandles = null; + int[] dxIndices = null; + + try (Reference ref = ref()) { + for (int i = 0; i < y.length; ++i) { + yHandles[i] = y[i].op().getUnsafeNativeHandle(); + yIndices[i] = y[i].index(); + } + for (int i = 0; i < x.length; ++i) { + xHandles[i] = x[i].op().getUnsafeNativeHandle(); + xIndices[i] = x[i].index(); + } + if (dx != null && dx.length > 0) { + dxHandles = new long[dx.length]; + dxIndices = new int[dx.length]; + + for (int i = 0; i < dx.length; ++i) { + dxHandles[i] = dx[i].op().getUnsafeNativeHandle(); + dxIndices[i] = dx[i].index(); + } + } + // Gradient outputs are returned in two continuous arrays concatenated into one. The first holds the native handles + // of the gradient operations while the second holds the index of their output + // e.g. given xHandles = [x0Handle, x1Handle, ...] and xIndices = [x0Index, x1Index, ..], we obtain + // dy = [dy0Handle, dy1Handle, ..., dy0Index, dy1Index, ...] + long[] dyHandlesAndIndices = + addGradients(ref.nativeHandle(), yHandles, yIndices, xHandles, xIndices, dxHandles, dxIndices); + int ndy = dyHandlesAndIndices.length >> 1; + if (ndy != dy.length) { + throw new IllegalStateException(String.valueOf(ndy) + " gradients were added to the graph when " + dy.length + + " were expected"); + } + for (int i = 0, j = ndy; i < ndy; ++i, ++j) { + Operation op = new Operation(this, dyHandlesAndIndices[i]); + dy[i] = new Output<>(op, (int) dyHandlesAndIndices[j]); + } + } + return dy; + } + + /** + * Adds operations to compute the partial derivatives of sum of {@code y}s w.r.t {@code x}s, + * i.e., {@code dy/dx_1, dy/dx_2...} + * <p> + * This is a simplified version of {@link #addGradients(Output[], Output[], Output[]) where {@code y} is + * a single output and {@code dx} is null. + * + * @param y output of the function to derive + * @param x inputs of the function for which partial derivatives are computed + * @return the partial derivatives {@code dy} with the size of {@code x} + */ + public Output<?>[] addGradients(Output<?> y, Output<?>[] x) { + return addGradients(new Output<?>[]{y}, x, null); + } + private final Object nativeHandleLock = new Object(); private long nativeHandle; private int refcount = 0; @@ -254,6 +330,9 @@ public final class Graph implements AutoCloseable { private static native byte[] toGraphDef(long handle); + private static native long[] addGradients(long handle, long[] inputHandles, int[] inputIndices, + long[] outputHandles, int[] outputIndices, long[] gradInputHandles, int[] gradInputIndices); + static { TensorFlow.init(); } diff --git a/tensorflow/java/src/main/java/org/tensorflow/op/core/Gradients.java b/tensorflow/java/src/main/java/org/tensorflow/op/core/Gradients.java new file mode 100644 index 00000000000..f4671c8af94 --- /dev/null +++ b/tensorflow/java/src/main/java/org/tensorflow/op/core/Gradients.java @@ -0,0 +1,153 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.op.core; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; + +import org.tensorflow.Operand; +import org.tensorflow.Output; +import org.tensorflow.op.Op; +import org.tensorflow.op.Operands; +import org.tensorflow.op.Scope; +import org.tensorflow.op.annotation.Operator; + +/** + * Adds operations to compute the partial derivatives of sum of {@code y}s w.r.t {@code x}s, + * i.e., {@code d(y_1 + y_2 + ...)/dx_1, d(y_1 + y_2 + ...)/dx_2...} + * <p> + * If {@code Options.dx()} values are set, they are as the initial symbolic partial derivatives of some loss + * function {@code L} w.r.t. {@code y}. {@code Options.dx()} must have the size of {@code y}. + * <p> + * If {@code Options.dx()} is not set, the implementation will use dx of {@code OnesLike} for all + * shapes in {@code y}. + * <p> + * The partial derivatives are returned in output {@code dy}, with the size of {@code x}. + * <p> + * Example of usage: + * <pre>{@code + * Gradients gradients = Gradients.create(scope, Arrays.asList(loss), Arrays.asList(w, b)); + * + * Constant<Float> alpha = ops.constant(1.0f, Float.class); + * ApplyGradientDescent.create(scope, w, alpha, gradients.<Float>dy(0)); + * ApplyGradientDescent.create(scope, b, alpha, gradients.<Float>dy(1)); + * }</pre> + */ +@Operator +public class Gradients implements Op, Iterable<Operand<?>> { + + /** + * Optional attributes for {@link Gradients} + */ + public static class Options { + + /** + * @param dx partial derivatives of some loss function {@code L} w.r.t. {@code y} + * @return this option builder + */ + public Options dx(Iterable<Operand<?>> dx) { + this.dx = dx; + return this; + } + + private Iterable<Operand<?>> dx; + + private Options() { + } + } + + /** + * Adds gradients computation ops to the graph according to scope. + * + * @param scope current graph scope + * @param y outputs of the function to derive + * @param x inputs of the function for which partial derivatives are computed + * @param options carries optional attributes values + * @return a new instance of {@code Gradients} + */ + public static Gradients create(Scope scope, Iterable<Operand<?>> y, Iterable<Operand<?>> x, Options... options) { + Output<?>[] dx = null; + if (options != null) { + for (Options opts : options) { + if (opts.dx != null) { + dx = Operands.asOutputs(opts.dx); + } + } + } + Output<?>[] gradOutputs = scope.graph().addGradients(Operands.asOutputs(y), Operands.asOutputs(x), dx); + return new Gradients(Arrays.asList(gradOutputs)); + } + + /** + * Adds gradients computation ops to the graph according to scope. + * + * This is a simplified version of {@link #create(Scope, Iterable, Iterable, Options...)} where {@code y} is + * a single output. + * + * @param scope current graph scope + * @param y output of the function to derive + * @param x inputs of the function for which partial derivatives are computed + * @param options carries optional attributes values + * @return a new instance of {@code Gradients} + */ + @SuppressWarnings({"unchecked", "rawtypes"}) + public static Gradients create(Scope scope, Operand<?> y, Iterable<Operand<?>> x, Options... options) { + return create(scope, (Iterable) Arrays.asList(y), x, options); + } + + /** + * @param dx partial derivatives of some loss function {@code L} w.r.t. {@code y} + * @return builder to add more options to this operation + */ + public Options dx(Iterable<Operand<?>> dx) { + return new Options().dx(dx); + } + + @Override + @SuppressWarnings({"rawtypes", "unchecked"}) + public Iterator<Operand<?>> iterator() { + return (Iterator) dy.iterator(); + } + + /** + * Partial derivatives of {@code y}s w.r.t. {@code x}s, with the size of {@code x} + */ + public List<Output<?>> dy() { + return dy; + } + + /** + * Returns a symbolic handle to one of the gradient operation output + * <p> + * Warning: Does not check that the type of the tensor matches T. It is recommended to call + * this method with an explicit type parameter rather than letting it be inferred, e.g. {@code + * gradients.<Integer>dy(0)} + * + * @param <T> The expected element type of the tensors produced by this output. + * @param index The index of the output among the gradients added by this operation + */ + @SuppressWarnings("unchecked") + public <T> Output<T> dy(int index) { + return (Output<T>) dy.get(index); + } + + private List<Output<?>> dy; + + private Gradients(List<Output<?>> dy) { + this.dy = dy; + } +} diff --git a/tensorflow/java/src/main/native/graph_jni.cc b/tensorflow/java/src/main/native/graph_jni.cc index 0fef1552758..dac6a345e91 100644 --- a/tensorflow/java/src/main/native/graph_jni.cc +++ b/tensorflow/java/src/main/native/graph_jni.cc @@ -16,7 +16,9 @@ limitations under the License. #include "tensorflow/java/src/main/native/graph_jni.h" #include <limits> +#include <memory> #include "tensorflow/c/c_api.h" +#include "tensorflow/java/src/main/native/utils_jni.h" #include "tensorflow/java/src/main/native/exception_jni.h" namespace { @@ -130,3 +132,55 @@ Java_org_tensorflow_Graph_toGraphDef(JNIEnv* env, jclass clazz, jlong handle) { TF_DeleteBuffer(buf); return ret; } + +JNIEXPORT jlongArray JNICALL +Java_org_tensorflow_Graph_addGradients(JNIEnv* env, jclass clazz, jlong handle, + jlongArray y_handles, jintArray y_indices, + jlongArray x_handles, jintArray x_indices, + jlongArray dx_handles, jintArray dx_indices) { + + TF_Graph* g = requireHandle(env, handle); + if (g == nullptr) return nullptr; + + const jint ny = env->GetArrayLength(y_handles); + const jint nx = env->GetArrayLength(x_handles); + + std::unique_ptr<TF_Output[]> y(new TF_Output[ny]); + std::unique_ptr<TF_Output[]> x(new TF_Output[nx]); + std::unique_ptr<TF_Output[]> dx(nullptr); + std::unique_ptr<TF_Output[]> dy(new TF_Output[nx]); + + resolveOutputs(env, "y", y_handles, y_indices, y.get(), ny); + resolveOutputs(env, "x", x_handles, x_indices, x.get(), nx); + if (dx_handles != nullptr) { + if (env->GetArrayLength(dx_handles) != ny) { + throwException(env, kIllegalArgumentException, + "expected %d, got %d dx handles", ny, + env->GetArrayLength(dx_handles)); + } + dx.reset(new TF_Output[ny]); + resolveOutputs(env, "dx", dx_handles, dx_indices, dx.get(), ny); + } + if (env->ExceptionCheck()) return nullptr; + + TF_Status* status = TF_NewStatus(); + TF_AddGradients(g, y.get(), ny, x.get(), nx, dx.get(), status, dy.get()); + + if (!throwExceptionIfNotOK(env, status)) { + TF_DeleteStatus(status); + return nullptr; + } + TF_DeleteStatus(status); + + // returned array contains both op handles and output indices, in pair + jlongArray dy_handles_and_indices = env->NewLongArray(nx << 1); + jlong* dy_elems = env->GetLongArrayElements(dy_handles_and_indices, nullptr); + for (int i = 0, j = nx; i < nx; ++i, ++j) { + TF_Output dy_output = dy.get()[i]; + dy_elems[i] = reinterpret_cast<jlong>(dy_output.oper); + dy_elems[j] = static_cast<jlong>(dy_output.index); + } + env->ReleaseLongArrayElements(dy_handles_and_indices, dy_elems, 0); + + return dy_handles_and_indices; +} diff --git a/tensorflow/java/src/main/native/graph_jni.h b/tensorflow/java/src/main/native/graph_jni.h index dd2e038332f..4f87e8d5a79 100644 --- a/tensorflow/java/src/main/native/graph_jni.h +++ b/tensorflow/java/src/main/native/graph_jni.h @@ -73,6 +73,15 @@ JNIEXPORT jbyteArray JNICALL Java_org_tensorflow_Graph_toGraphDef(JNIEnv *, jclass, jlong); +/* + * Class: org_tensorflow_Graph + * Method: name + * Signature: (J[J[I[J[I[J[I)[J + */ +JNIEXPORT jlongArray JNICALL Java_org_tensorflow_Graph_addGradients(JNIEnv *, + jclass, jlong, jlongArray, jintArray, jlongArray, jintArray, jlongArray, + jintArray); + #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/tensorflow/java/src/main/native/session_jni.cc b/tensorflow/java/src/main/native/session_jni.cc index 2cd542d3c9b..cb54daf1379 100644 --- a/tensorflow/java/src/main/native/session_jni.cc +++ b/tensorflow/java/src/main/native/session_jni.cc @@ -17,6 +17,7 @@ limitations under the License. #include <memory> #include "tensorflow/c/c_api.h" +#include "tensorflow/java/src/main/native/utils_jni.h" #include "tensorflow/java/src/main/native/exception_jni.h" #include "tensorflow/java/src/main/native/session_jni.h" @@ -55,37 +56,6 @@ void resolveHandles(JNIEnv* env, const char* type, jlongArray src_array, env->ReleaseLongArrayElements(src_array, src_start, JNI_ABORT); } -void resolveOutputs(JNIEnv* env, const char* type, jlongArray src_op, - jintArray src_index, TF_Output* dst, jint n) { - if (env->ExceptionCheck()) return; - jint len = env->GetArrayLength(src_op); - if (len != n) { - throwException(env, kIllegalArgumentException, - "expected %d, got %d %s Operations", n, len, type); - return; - } - len = env->GetArrayLength(src_index); - if (len != n) { - throwException(env, kIllegalArgumentException, - "expected %d, got %d %s Operation output indices", n, len, - type); - return; - } - jlong* op_handles = env->GetLongArrayElements(src_op, nullptr); - jint* indices = env->GetIntArrayElements(src_index, nullptr); - for (int i = 0; i < n; ++i) { - if (op_handles[i] == 0) { - throwException(env, kNullPointerException, "invalid %s (#%d of %d)", type, - i, n); - break; - } - dst[i] = TF_Output{reinterpret_cast<TF_Operation*>(op_handles[i]), - static_cast<int>(indices[i])}; - } - env->ReleaseIntArrayElements(src_index, indices, JNI_ABORT); - env->ReleaseLongArrayElements(src_op, op_handles, JNI_ABORT); -} - void TF_MaybeDeleteBuffer(TF_Buffer* buf) { if (buf == nullptr) return; TF_DeleteBuffer(buf); diff --git a/tensorflow/java/src/main/native/utils_jni.cc b/tensorflow/java/src/main/native/utils_jni.cc new file mode 100644 index 00000000000..069ac05a1c3 --- /dev/null +++ b/tensorflow/java/src/main/native/utils_jni.cc @@ -0,0 +1,53 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/java/src/main/native/utils_jni.h" + +#include "tensorflow/java/src/main/native/exception_jni.h" + +void resolveOutputs(JNIEnv* env, const char* type, jlongArray src_op, + jintArray src_index, TF_Output* dst, jint n) { + if (env->ExceptionCheck()) return; + jint len = env->GetArrayLength(src_op); + if (len != n) { + throwException(env, kIllegalArgumentException, + "expected %d, got %d %s Operations", n, len, type); + return; + } + len = env->GetArrayLength(src_index); + if (len != n) { + throwException(env, kIllegalArgumentException, + "expected %d, got %d %s Operation output indices", n, len, + type); + return; + } + jlong* op_handles = env->GetLongArrayElements(src_op, nullptr); + jint* indices = env->GetIntArrayElements(src_index, nullptr); + for (int i = 0; i < n; ++i) { + if (op_handles[i] == 0) { + throwException(env, kNullPointerException, "invalid %s (#%d of %d)", type, + i, n); + break; + } + dst[i] = TF_Output{reinterpret_cast<TF_Operation*>(op_handles[i]), + static_cast<int>(indices[i])}; + } + env->ReleaseIntArrayElements(src_index, indices, JNI_ABORT); + env->ReleaseLongArrayElements(src_op, op_handles, JNI_ABORT); +} + + + + diff --git a/tensorflow/java/src/main/native/utils_jni.h b/tensorflow/java/src/main/native/utils_jni.h new file mode 100644 index 00000000000..352298e7de1 --- /dev/null +++ b/tensorflow/java/src/main/native/utils_jni.h @@ -0,0 +1,33 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_JAVA_UTILS_JNI_H_ +#define TENSORFLOW_JAVA_UTILS_JNI_H_ + +#include <jni.h> + +#include "tensorflow/c/c_api.h" + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +void resolveOutputs(JNIEnv* env, const char* type, jlongArray src_op, + jintArray src_index, TF_Output* dst, jint n); + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus +#endif /* TENSORFLOW_JAVA_UTILS_JNI_H_ */ diff --git a/tensorflow/java/src/test/java/org/tensorflow/GraphTest.java b/tensorflow/java/src/test/java/org/tensorflow/GraphTest.java index c540299bdcf..c2e52c22c6d 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/GraphTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/GraphTest.java @@ -22,6 +22,7 @@ import static org.junit.Assert.assertTrue; import java.util.HashSet; import java.util.Iterator; + import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; @@ -129,4 +130,106 @@ public class GraphTest { // expected exception. } } + + @Test + public void addGradientsToGraph() { + try (Graph g = new Graph(); + Session s = new Session(g)) { + + Output<Float> x1 = TestUtil.placeholder(g, "x1", Float.class); + Output<Float> x2 = TestUtil.placeholder(g, "x2", Float.class); + Output<Float> y0 = TestUtil.square(g, "y0", x1); + Output<Float> y1 = TestUtil.square(g, "y1", y0); + Output<Float> y2 = TestUtil.addN(g, y0, x2); + + Output<?>[] grads0 = g.addGradients(y1, toArray(x1)); + assertNotNull(grads0); + assertEquals(1, grads0.length); + assertEquals(DataType.FLOAT, grads0[0].dataType()); + + Output<?>[] grads1 = g.addGradients(y2, toArray(x1, x2)); + assertNotNull(grads1); + assertEquals(2, grads1.length); + assertEquals(DataType.FLOAT, grads1[0].dataType()); + assertEquals(DataType.FLOAT, grads1[1].dataType()); + + try (Tensor<Float> c1 = Tensors.create(3.0f); + Tensor<Float> c2 = Tensors.create(2.0f); + TestUtil.AutoCloseableList<Tensor<?>> outputs = new TestUtil.AutoCloseableList<>( + s.runner() + .feed(x1, c1) + .feed(x2, c2) + .fetch(grads0[0]) + .fetch(grads1[0]) + .fetch(grads1[1]) + .run())) { + + assertEquals(3, outputs.size()); + assertEquals(108.0f, outputs.get(0).floatValue(), 0.0f); + assertEquals(6.0f, outputs.get(1).floatValue(), 0.0f); + assertEquals(1.0f, outputs.get(2).floatValue(), 0.0f); + } + } + } + + @Test + public void addGradientSumsToGraph() { + try (Graph g = new Graph(); + Session s = new Session(g)) { + + Output<Float> x = TestUtil.placeholder(g, "x", Float.class); + Output<Float> y0 = TestUtil.square(g, "y0", x); + Output<Float> y1 = TestUtil.square(g, "y1", y0); + + Output<?>[] grad = g.addGradients(toArray(y0, y1), toArray(x), null); + assertNotNull(grad); + assertEquals(1, grad.length); + assertEquals(DataType.FLOAT, grad[0].dataType()); + + try (Tensor<Float> c = Tensors.create(3.0f); + Tensor<?> output = s.runner() + .feed(x, c) + .fetch(grad[0]) + .run() + .get(0)) { + + assertEquals(114.0f, output.floatValue(), 0.0f); + } + } + } + + @Test + public void addGradientsWithInitialValuesToGraph() { + try (Graph g = new Graph(); + Session s = new Session(g)) { + + Output<Float> x = TestUtil.placeholder(g, "x", Float.class); + Output<Float> y0 = TestUtil.square(g, "y0", x); + Output<Float> y1 = TestUtil.square(g, "y1", y0); + + Output<?>[] grad0 = g.addGradients(y1, toArray(y0)); + assertNotNull(grad0); + assertEquals(1, grad0.length); + assertEquals(DataType.FLOAT, grad0[0].dataType()); + + Output<?>[] grad1 = g.addGradients(toArray(y0), toArray(x), toArray(grad0[0])); + assertNotNull(grad1); + assertEquals(1, grad1.length); + assertEquals(DataType.FLOAT, grad1[0].dataType()); + + try (Tensor<Float> c = Tensors.create(3.0f); + Tensor<?> output = s.runner() + .feed(x, c) + .fetch(grad1[0]) + .run() + .get(0)) { + + assertEquals(108.0f, output.floatValue(), 0.0f); + } + } + } + + private static Output<?>[] toArray(Output<?>... outputs) { + return outputs; + } } diff --git a/tensorflow/java/src/test/java/org/tensorflow/SessionTest.java b/tensorflow/java/src/test/java/org/tensorflow/SessionTest.java index e8cc76c2a64..7d5980bcded 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/SessionTest.java +++ b/tensorflow/java/src/test/java/org/tensorflow/SessionTest.java @@ -20,8 +20,6 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; -import java.util.ArrayList; -import java.util.Collection; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; @@ -36,8 +34,8 @@ public class SessionTest { Session s = new Session(g)) { TestUtil.transpose_A_times_X(g, new int[][] {{2}, {3}}); try (Tensor<Integer> x = Tensors.create(new int[][] {{5}, {7}}); - AutoCloseableList<Tensor<?>> outputs = - new AutoCloseableList<Tensor<?>>(s.runner().feed("X", x).fetch("Y").run())) { + TestUtil.AutoCloseableList<Tensor<?>> outputs = + new TestUtil.AutoCloseableList<Tensor<?>>(s.runner().feed("X", x).fetch("Y").run())) { assertEquals(1, outputs.size()); final int[][] expected = {{31}}; assertArrayEquals(expected, outputs.get(0).copyTo(new int[1][1])); @@ -53,8 +51,8 @@ public class SessionTest { Output<Integer> feed = g.operation("X").output(0); Output<Integer> fetch = g.operation("Y").output(0); try (Tensor<Integer> x = Tensors.create(new int[][] {{5}, {7}}); - AutoCloseableList<Tensor<?>> outputs = - new AutoCloseableList<Tensor<?>>(s.runner().feed(feed, x).fetch(fetch).run())) { + TestUtil.AutoCloseableList<Tensor<?>> outputs = + new TestUtil.AutoCloseableList<Tensor<?>>(s.runner().feed(feed, x).fetch(fetch).run())) { assertEquals(1, outputs.size()); final int[][] expected = {{31}}; assertArrayEquals(expected, outputs.get(0).copyTo(new int[1][1])); @@ -112,7 +110,7 @@ public class SessionTest { .setOptions(fullTraceRunOptions()) .runAndFetchMetadata(); // Sanity check on outputs. - AutoCloseableList<Tensor<?>> outputs = new AutoCloseableList<Tensor<?>>(result.outputs); + TestUtil.AutoCloseableList<Tensor<?>> outputs = new TestUtil.AutoCloseableList<Tensor<?>>(result.outputs); assertEquals(1, outputs.size()); final int[][] expected = {{31}}; assertArrayEquals(expected, outputs.get(0).copyTo(new int[1][1])); @@ -135,8 +133,8 @@ public class SessionTest { Session s = new Session(g)) { TestUtil.constant(g, "c1", 2718); TestUtil.constant(g, "c2", 31415); - AutoCloseableList<Tensor<?>> outputs = - new AutoCloseableList<Tensor<?>>(s.runner().fetch("c2").fetch("c1").run()); + TestUtil.AutoCloseableList<Tensor<?>> outputs = + new TestUtil.AutoCloseableList<Tensor<?>>(s.runner().fetch("c2").fetch("c1").run()); assertEquals(2, outputs.size()); assertEquals(31415, outputs.get(0).intValue()); assertEquals(2718, outputs.get(1).intValue()); @@ -164,28 +162,6 @@ public class SessionTest { Session s = new Session(g, singleThreadConfigProto())) {} } - private static final class AutoCloseableList<E extends AutoCloseable> extends ArrayList<E> - implements AutoCloseable { - AutoCloseableList(Collection<? extends E> c) { - super(c); - } - - @Override - public void close() { - Exception toThrow = null; - for (AutoCloseable c : this) { - try { - c.close(); - } catch (Exception e) { - toThrow = e; - } - } - if (toThrow != null) { - throw new RuntimeException(toThrow); - } - } - } - private static byte[] fullTraceRunOptions() { // Ideally this would use the generated Java sources for protocol buffers // and end up with something like the snippet below. However, generating diff --git a/tensorflow/java/src/test/java/org/tensorflow/TestUtil.java b/tensorflow/java/src/test/java/org/tensorflow/TestUtil.java index c973b5a3d8b..4e848864167 100644 --- a/tensorflow/java/src/test/java/org/tensorflow/TestUtil.java +++ b/tensorflow/java/src/test/java/org/tensorflow/TestUtil.java @@ -16,9 +16,34 @@ limitations under the License. package org.tensorflow; import java.lang.reflect.Array; +import java.util.ArrayList; +import java.util.Collection; /** Static utility functions. */ public class TestUtil { + + public static final class AutoCloseableList<E extends AutoCloseable> extends ArrayList<E> + implements AutoCloseable { + AutoCloseableList(Collection<? extends E> c) { + super(c); + } + + @Override + public void close() { + Exception toThrow = null; + for (AutoCloseable c : this) { + try { + c.close(); + } catch (Exception e) { + toThrow = e; + } + } + if (toThrow != null) { + throw new RuntimeException(toThrow); + } + } + } + public static <T> Output<T> constant(Graph g, String name, Object value) { try (Tensor<?> t = Tensor.create(value)) { return g.opBuilder("Const", name) @@ -36,7 +61,7 @@ public class TestUtil { .<T>output(0); } - public static Output<?> addN(Graph g, Output<?>... inputs) { + public static <T> Output<T> addN(Graph g, Output<?>... inputs) { return g.opBuilder("AddN", "AddN").addInputList(inputs).build().output(0); } @@ -58,6 +83,13 @@ public class TestUtil { .setAttr("num_split", numSplit) .build(); } + + public static <T> Output<T> square(Graph g, String name, Output<T> value) { + return g.opBuilder("Square", name) + .addInput(value) + .build() + .<T>output(0); + } public static void transpose_A_times_X(Graph g, int[][] a) { Output<Integer> aa = constant(g, "A", a); diff --git a/tensorflow/python/estimator/model_fn.py b/tensorflow/python/estimator/model_fn.py index 009ac9d8fd0..a9fd8f8e1a4 100644 --- a/tensorflow/python/estimator/model_fn.py +++ b/tensorflow/python/estimator/model_fn.py @@ -99,7 +99,7 @@ class EstimatorSpec( ignored in eval and infer modes. Example: ```python - def my_model_fn(mode, features, labels): + def my_model_fn(features, labels, mode): predictions = ... loss = ... train_op = ... @@ -114,7 +114,7 @@ class EstimatorSpec( given mode. Example: ```python - def my_model_fn(mode, features, labels): + def my_model_fn(features, labels, mode): if (mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL): loss = ... diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 89afd1d25b7..cf0b1e36fb3 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -3239,8 +3239,9 @@ class Graph(object): # the name will still appear in _names_in_use even though the name hasn't # been used. This is ok, just leave _names_in_use as-is in this case. # TODO(skyewm): make the C API guarantee no name conflicts. - if ret.name not in self._names_in_use: - self._names_in_use[ret.name] = 1 + name_key = ret.name.lower() + if name_key not in self._names_in_use: + self._names_in_use[name_key] = 1 self._create_op_helper(ret, compute_device=compute_device) return ret @@ -3949,20 +3950,27 @@ class Graph(object): """ if self._name_stack: name = self._name_stack + "/" + name - i = self._names_in_use.get(name, 0) - # Increment the number for "name". + + # For the sake of checking for names in use, we treat names as case + # insensitive (e.g. foo = Foo). + name_key = name.lower() + i = self._names_in_use.get(name_key, 0) + # Increment the number for "name_key". if mark_as_used: - self._names_in_use[name] = i + 1 + self._names_in_use[name_key] = i + 1 if i > 0: - base_name = name - # Make sure the composed name is not already used. - while name in self._names_in_use: - name = "%s_%d" % (base_name, i) + base_name_key = name_key + # Make sure the composed name key is not already used. + while name_key in self._names_in_use: + name_key = "%s_%d" % (base_name_key, i) i += 1 - # Mark the composed name as used in case someone wants + # Mark the composed name_key as used in case someone wants # to call unique_name("name_1"). if mark_as_used: - self._names_in_use[name] = 1 + self._names_in_use[name_key] = 1 + + # Return the new name with the original capitalization of the given name. + name = "%s_%d" % (name, i-1) return name def get_name_scope(self): diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index c72406e92b0..150100d771b 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -965,6 +965,15 @@ class NameStackTest(test_util.TensorFlowTestCase): self.assertEqual("foo_1", g.unique_name("foo")) self.assertEqual("foo_3", g.unique_name("foo")) + def testUniqueNameCaseInsensitivity(self): + g = ops.Graph() + self.assertEqual("foo", g.unique_name("foo")) + self.assertEqual("Foo_1", g.unique_name("Foo")) + with g.name_scope("bar"): + self.assertEqual("bar/foo", g.unique_name("foo")) + with g.name_scope("Bar"): + self.assertEqual("Bar_1/foo", g.unique_name("foo")) + def testInvalidNameRaisesError(self): g = ops.Graph() with g.name_scope(""): # Should not raise diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py index 2c9f391d01d..7d07c77c797 100644 --- a/tensorflow/python/grappler/layout_optimizer_test.py +++ b/tensorflow/python/grappler/layout_optimizer_test.py @@ -1390,7 +1390,7 @@ class LayoutOptimizerTest(test.TestCase): expected_num_transposes = 3 self.assertEqual(expected_num_transposes, num_transposes) self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes) - self._assert_trans_nchw_to_nhwc('map/while/Add-0-2', nodes) + self._assert_trans_nchw_to_nhwc('map/while/Add_1-0-2', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3) def testLoopWithVecAnd4D(self): @@ -1414,7 +1414,7 @@ class LayoutOptimizerTest(test.TestCase): expected_num_transposes = 2 self.assertEqual(expected_num_transposes, num_transposes) self._assert_trans_nhwc_to_nchw('map/while/Conv2D-0', nodes) - self._assert_trans_nchw_to_nhwc('map/while/Add-0-2', nodes) + self._assert_trans_nchw_to_nhwc('map/while/Add_1-0-2', nodes) self.assertAllClose(output_val_ref, output_val, atol=1e-3) def testBinaryOpSecondPort(self): diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 5796c874f96..8a6614c8371 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -893,6 +893,7 @@ tf_py_test( "//third_party/py/numpy", "//tensorflow/python:client_testlib", "//tensorflow/python:framework", + "//tensorflow/python:sparse_grad", "//tensorflow/python:sparse_ops", ], ) diff --git a/tensorflow/python/kernel_tests/init_ops_test.py b/tensorflow/python/kernel_tests/init_ops_test.py index 795aa67248f..927ca012ae6 100644 --- a/tensorflow/python/kernel_tests/init_ops_test.py +++ b/tensorflow/python/kernel_tests/init_ops_test.py @@ -364,14 +364,52 @@ class UniformUnitScalingInitializationTest(test.TestCase): class VarianceScalingInitializationTest(test.TestCase): + def testTruncatedNormalDistribution(self): + shape = [100, 100] + expect_mean = 0. + expect_var = 1. / shape[0] + init = init_ops.variance_scaling_initializer( + distribution='truncated_normal') + + with self.test_session(use_gpu=True), \ + test.mock.patch.object( + random_ops, 'truncated_normal', wraps=random_ops.truncated_normal) \ + as mock_truncated_normal: + x = init(shape).eval() + self.assertTrue(mock_truncated_normal.called) + + self.assertNear(np.mean(x), expect_mean, err=1e-2) + self.assertNear(np.var(x), expect_var, err=1e-2) + def testNormalDistribution(self): shape = [100, 100] expect_mean = 0. expect_var = 1. / shape[0] init = init_ops.variance_scaling_initializer(distribution='normal') - with self.test_session(use_gpu=True): + with self.test_session(use_gpu=True), \ + test.mock.patch.object( + random_ops, 'truncated_normal', wraps=random_ops.truncated_normal) \ + as mock_truncated_normal: x = init(shape).eval() + self.assertTrue(mock_truncated_normal.called) + + self.assertNear(np.mean(x), expect_mean, err=1e-2) + self.assertNear(np.var(x), expect_var, err=1e-2) + + def testUntruncatedNormalDistribution(self): + shape = [100, 100] + expect_mean = 0. + expect_var = 1. / shape[0] + init = init_ops.variance_scaling_initializer( + distribution='untruncated_normal') + + with self.test_session(use_gpu=True), \ + test.mock.patch.object( + random_ops, 'random_normal', wraps=random_ops.random_normal) \ + as mock_random_normal: + x = init(shape).eval() + self.assertTrue(mock_random_normal.called) self.assertNear(np.mean(x), expect_mean, err=1e-2) self.assertNear(np.var(x), expect_var, err=1e-2) diff --git a/tensorflow/python/kernel_tests/shape_ops_test.py b/tensorflow/python/kernel_tests/shape_ops_test.py index 7368251ab69..34e34d9d1b2 100644 --- a/tensorflow/python/kernel_tests/shape_ops_test.py +++ b/tensorflow/python/kernel_tests/shape_ops_test.py @@ -642,6 +642,29 @@ class TileTest(test.TestCase): err = gradient_checker.compute_gradient_error(a, [4, 2], tiled, [4, 4]) self.assertLess(err, 1e-3) + def testGradientWithSparseGradWithRank1(self): + inputs = constant_op.constant([1.0, 2.0, 3.0, 4.0], + dtype=dtypes.float32) + outputs = array_ops.gather(array_ops.tile(inputs, [3]), + [1, 5, 9, 3, 7, 2, 2, 2]) + with self.test_session(): + error = gradient_checker.compute_gradient_error( + inputs, inputs.get_shape().as_list(), + outputs, outputs.get_shape().as_list()) + self.assertLess(error, 1e-4) + + def testGradientWithSparseGradWithRank3(self): + inputs = constant_op.constant([1.0, 2.0, 3.0, 4.0], + dtype=dtypes.float32) + inputs = array_ops.reshape(inputs, [-1, 1, 1]) + outputs = array_ops.gather(array_ops.tile(inputs, [3, 4, 2]), + [1, 5, 9, 3, 7, 2, 2, 2]) + with self.test_session(): + error = gradient_checker.compute_gradient_error( + inputs, inputs.get_shape().as_list(), + outputs, outputs.get_shape().as_list()) + self.assertLess(error, 1e-4) + def testShapeFunctionEdgeCases(self): # Unknown multiples shape. inp = constant_op.constant(0.0, shape=[4, 4, 4, 4]) diff --git a/tensorflow/python/kernel_tests/sparse_slice_op_test.py b/tensorflow/python/kernel_tests/sparse_slice_op_test.py index da116601f83..97f30daf4a9 100644 --- a/tensorflow/python/kernel_tests/sparse_slice_op_test.py +++ b/tensorflow/python/kernel_tests/sparse_slice_op_test.py @@ -21,13 +21,15 @@ from __future__ import print_function import numpy as np from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import sparse_ops +import tensorflow.python.ops.sparse_grad # pylint: disable=unused-import from tensorflow.python.platform import test class SparseSliceOpTest(test.TestCase): - def _SparseTensor_4x6(self): + def _SparseTensor_4x6(self, val_dtype=np.int64): # [0 | |2 | |4 |5 ] # [ |11| |13|14| ] # [20| | |23| |25] @@ -37,7 +39,7 @@ class SparseSliceOpTest(test.TestCase): [2, 3], [2, 5], [3, 0], [3, 2], [3, 3], [3, 5]]).astype( np.int64) val = np.array([0, 2, 4, 5, 11, 13, 14, 20, 23, 25, 30, 32, 33, 35]).astype( - np.int64) + val_dtype) shape = np.array([4, 6]).astype(np.int64) return sparse_tensor.SparseTensor(ind, val, shape) @@ -244,6 +246,22 @@ class SparseSliceOpTest(test.TestCase): self.assertAllEqual(sparse_tensor5.values.eval(), [5, 25, 35]) self.assertAllEqual(sparse_tensor5.dense_shape.eval(), [4, 1]) + def testGradients(self): + sp_input = self._SparseTensor_4x6(val_dtype=np.float32) + start_and_size = [([0, 0], [4, 2]), + ([0, 2], [5, 2]), + ([0, 4], [5, 3])] + + with self.test_session(use_gpu=False): + for start, size in start_and_size: + sp_output = sparse_ops.sparse_slice(sp_input, start, size) + nnz_in = len(sp_input.values.eval()) + nnz_out = len(sp_output.values.eval()) + + err = gradient_checker.compute_gradient_error( + [sp_input.values], [(nnz_in,)], sp_output.values, (nnz_out,)) + self.assertLess(err, 1e-3) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py index 3678bd4c1f6..fe459a96b98 100644 --- a/tensorflow/python/ops/array_grad.py +++ b/tensorflow/python/ops/array_grad.py @@ -568,7 +568,6 @@ ops.NotDifferentiable("Size") @ops.RegisterGradient("Tile") def _TileGrad(op, grad): """Sum reduces grad along the tiled dimensions.""" - assert isinstance(grad, ops.Tensor) input_shape = array_ops.shape(op.inputs[0]) # We interleave multiples and input_shape to get split_shape, # reshape grad to split_shape, and reduce along all even @@ -581,6 +580,13 @@ def _TileGrad(op, grad): split_shape = array_ops.reshape( array_ops.transpose(array_ops.stack([op.inputs[1], input_shape])), [-1]) axes = math_ops.range(0, array_ops.size(split_shape), 2) + # Sum reduces grad along the first dimension for IndexedSlices + if isinstance(grad, ops.IndexedSlices): + grad = math_ops.unsorted_segment_sum( + grad.values, + math_ops.mod(grad.indices, input_shape[0]), + input_shape[0]) + split_shape = array_ops.concat([[1], split_shape[1:]], axis=0) input_grad = math_ops.reduce_sum(array_ops.reshape(grad, split_shape), axes) # Fix shape inference if not context.executing_eagerly(): diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index c8442b42d5c..fc37805c799 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -3135,6 +3135,7 @@ def while_loop(cond, happen is that the thread updating `x` can never get ahead of the counter thread because the thread incrementing `x` depends on the value of the counter. + ```python import tensorflow as tf diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py index c41e952167d..5bfc5ce2a7a 100644 --- a/tensorflow/python/ops/init_ops.py +++ b/tensorflow/python/ops/init_ops.py @@ -43,7 +43,8 @@ from tensorflow.python.ops import linalg_ops_impl from tensorflow.python.ops import gen_linalg_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops -from tensorflow.python.util.deprecation import deprecated +from tensorflow.python.util.deprecation import ( + deprecated, deprecated_arg_values) from tensorflow.python.util.tf_export import tf_export @@ -409,8 +410,10 @@ class UniformUnitScaling(Initializer): class VarianceScaling(Initializer): """Initializer capable of adapting its scale to the shape of weights tensors. - With `distribution="normal"`, samples are drawn from a truncated normal - distribution centered on zero, with `stddev = sqrt(scale / n)` + With `distribution="truncated_normal" or "untruncated_normal"`, + samples are drawn from a truncated/untruncated normal + distribution with a mean of zero and a standard deviation (after truncation, + if used) `stddev = sqrt(scale / n)` where n is: - number of input units in the weight tensor, if mode = "fan_in" - number of output units, if mode = "fan_out" @@ -433,10 +436,14 @@ class VarianceScaling(Initializer): "distribution" arguments. """ + @deprecated_arg_values( + None, + "`normal` is a deprecated alias for `truncated_normal`", + distribution="normal") def __init__(self, scale=1.0, mode="fan_in", - distribution="normal", + distribution="truncated_normal", seed=None, dtype=dtypes.float32): if scale <= 0.: @@ -444,7 +451,8 @@ class VarianceScaling(Initializer): if mode not in {"fan_in", "fan_out", "fan_avg"}: raise ValueError("Invalid `mode` argument:", mode) distribution = distribution.lower() - if distribution not in {"normal", "uniform"}: + if distribution not in {"normal", "uniform", + "truncated_normal", "untruncated_normal"}: raise ValueError("Invalid `distribution` argument:", distribution) self.scale = scale self.mode = mode @@ -466,11 +474,15 @@ class VarianceScaling(Initializer): scale /= max(1., fan_out) else: scale /= max(1., (fan_in + fan_out) / 2.) - if self.distribution == "normal": + if self.distribution == "normal" or self.distribution == "truncated_normal": # constant taken from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.) stddev = math.sqrt(scale) / .87962566103423978 return random_ops.truncated_normal( shape, 0.0, stddev, dtype, seed=self.seed) + elif self.distribution == "untruncated_normal": + stddev = math.sqrt(scale) + return random_ops.random_normal( + shape, 0.0, stddev, dtype, seed=self.seed) else: limit = math.sqrt(3.0 * scale) return random_ops.random_uniform( diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py index 9ba91772f5f..66633c8b12f 100644 --- a/tensorflow/python/ops/losses/losses_impl.py +++ b/tensorflow/python/ops/losses/losses_impl.py @@ -878,7 +878,8 @@ def sparse_softmax_cross_entropy( exception when this op is run on CPU, and return `NaN` for corresponding loss and gradient rows on GPU. logits: Unscaled log probabilities of shape - `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float32` or `float64`. + `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float16`, `float32` or + `float64`. weights: Coefficients for the loss. This must be scalar or broadcastable to `labels` (i.e. same rank and each dimension is either 1 or the same). scope: the scope for the operations performed in computing the loss. diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 5a3b669c288..41d54a6c2f9 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -2009,7 +2009,8 @@ def sparse_softmax_cross_entropy_with_logits( exception when this op is run on CPU, and return `NaN` for corresponding loss and gradient rows on GPU. logits: Unscaled log probabilities of shape - `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float32` or `float64`. + `[d_0, d_1, ..., d_{r-1}, num_classes]` and dtype `float16`, `float32`, or + `float64`. name: A name for the operation (optional). Returns: diff --git a/tensorflow/python/ops/sparse_grad.py b/tensorflow/python/ops/sparse_grad.py index 97353d6c747..1223b290ff6 100644 --- a/tensorflow/python/ops/sparse_grad.py +++ b/tensorflow/python/ops/sparse_grad.py @@ -116,6 +116,35 @@ def _SparseReduceSumGrad(op, out_grad): None, None) +@ops.RegisterGradient("SparseSlice") +def _SparseSliceGrad(op, *grads): + """The backward operator for the SparseSlice op. + + This op takes in the upstream gradient w.r.t. non-empty values of + the sliced `SparseTensor`, and outputs the gradients w.r.t. + the non-empty values of input `SparseTensor`. + + Args: + op: the SparseSlice op + *grads: the incoming gradients, one element per output of `op` + + Returns: + Gradient for each of the 5 input tensors of SparseSlice: + (indices, values, shape, start, size) + The gradients for the indices, shape, start and the size are None. + """ + backprop_val_grad = grads[1] + input_indices = op.inputs[0] + input_start = op.inputs[3] + output_indices = op.outputs[0] + + val_grad = gen_sparse_ops.sparse_slice_grad( + backprop_val_grad, input_indices, input_start, output_indices) + val_grad.set_shape(op.inputs[1].get_shape()) + # (indices, values, shape, start, size) + return (None, val_grad, None, None, None) + + @ops.RegisterGradient("SparseTensorDenseMatMul") def _SparseTensorDenseMatMulGrad(op, grad): """Gradients for the dense tensor in the SparseTensorDenseMatMul op. diff --git a/tensorflow/stream_executor/BUILD b/tensorflow/stream_executor/BUILD index 21295abed1d..e742f8e8d51 100644 --- a/tensorflow/stream_executor/BUILD +++ b/tensorflow/stream_executor/BUILD @@ -2,6 +2,7 @@ licenses(["restricted"]) load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda_is_configured") load("//tensorflow/core:platform/default/build_config_root.bzl", "if_static") +load("//tensorflow:tensorflow.bzl", "cc_header_only_library") STREAM_EXECUTOR_HEADERS = glob([ "*.h", @@ -51,6 +52,14 @@ cc_library( ] + if_static([":stream_executor_impl"]), ) +cc_header_only_library( + name = "stream_executor_headers_lib", + visibility = ["//visibility:public"], + deps = [ + ":stream_executor", + ], +) + cc_library( name = "cuda_platform", srcs = if_cuda_is_configured( diff --git a/tensorflow/tools/api/generator/create_python_api.py b/tensorflow/tools/api/generator/create_python_api.py index 671b7e387e4..48d7dcd09eb 100644 --- a/tensorflow/tools/api/generator/create_python_api.py +++ b/tensorflow/tools/api/generator/create_python_api.py @@ -180,7 +180,7 @@ def get_api_init_text(package, api_name): for module in list(sys.modules.values()): # Only look at tensorflow modules. if (not module or not hasattr(module, '__name__') or - package not in module.__name__): + module.__name__ is None or package not in module.__name__): continue # Do not generate __init__.py files for contrib modules for now. if '.contrib.' in module.__name__ or module.__name__.endswith('.contrib'): diff --git a/tensorflow/tools/api/golden/tensorflow.initializers.variance_scaling.pbtxt b/tensorflow/tools/api/golden/tensorflow.initializers.variance_scaling.pbtxt index a6b6e5eceb6..86340913e25 100644 --- a/tensorflow/tools/api/golden/tensorflow.initializers.variance_scaling.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.initializers.variance_scaling.pbtxt @@ -5,7 +5,7 @@ tf_class { is_instance: "<type \'object\'>" member_method { name: "__init__" - argspec: "args=[\'self\', \'scale\', \'mode\', \'distribution\', \'seed\', \'dtype\'], varargs=None, keywords=None, defaults=[\'1.0\', \'fan_in\', \'normal\', \'None\', \"<dtype: \'float32\'>\"], " + argspec: "args=[\'self\', \'scale\', \'mode\', \'distribution\', \'seed\', \'dtype\'], varargs=None, keywords=None, defaults=[\'1.0\', \'fan_in\', \'truncated_normal\', \'None\', \"<dtype: \'float32\'>\"], " } member_method { name: "from_config" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.initializers.-variance-scaling.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.initializers.-variance-scaling.pbtxt index 32a6f6ee888..03f4064b9ef 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.initializers.-variance-scaling.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.initializers.-variance-scaling.pbtxt @@ -5,7 +5,7 @@ tf_class { is_instance: "<type \'object\'>" member_method { name: "__init__" - argspec: "args=[\'self\', \'scale\', \'mode\', \'distribution\', \'seed\', \'dtype\'], varargs=None, keywords=None, defaults=[\'1.0\', \'fan_in\', \'normal\', \'None\', \"<dtype: \'float32\'>\"], " + argspec: "args=[\'self\', \'scale\', \'mode\', \'distribution\', \'seed\', \'dtype\'], varargs=None, keywords=None, defaults=[\'1.0\', \'fan_in\', \'truncated_normal\', \'None\', \"<dtype: \'float32\'>\"], " } member_method { name: "from_config" diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt index 5470164a5bb..9ec20f09557 100644 --- a/tensorflow/tools/api/golden/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.pbtxt @@ -816,6 +816,10 @@ tf_module { name: "broadcast_static_shape" argspec: "args=[\'shape_x\', \'shape_y\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "broadcast_to" + argspec: "args=[\'input\', \'shape\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "case" argspec: "args=[\'pred_fn_pairs\', \'default\', \'exclusive\', \'strict\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'False\', \'False\', \'case\'], " diff --git a/tensorflow/tools/api/golden/tensorflow.variance_scaling_initializer.pbtxt b/tensorflow/tools/api/golden/tensorflow.variance_scaling_initializer.pbtxt index a58398d645e..09d7bc03b4f 100644 --- a/tensorflow/tools/api/golden/tensorflow.variance_scaling_initializer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.variance_scaling_initializer.pbtxt @@ -5,7 +5,7 @@ tf_class { is_instance: "<type \'object\'>" member_method { name: "__init__" - argspec: "args=[\'self\', \'scale\', \'mode\', \'distribution\', \'seed\', \'dtype\'], varargs=None, keywords=None, defaults=[\'1.0\', \'fan_in\', \'normal\', \'None\', \"<dtype: \'float32\'>\"], " + argspec: "args=[\'self\', \'scale\', \'mode\', \'distribution\', \'seed\', \'dtype\'], varargs=None, keywords=None, defaults=[\'1.0\', \'fan_in\', \'truncated_normal\', \'None\', \"<dtype: \'float32\'>\"], " } member_method { name: "from_config" diff --git a/tensorflow/tools/ci_build/Dockerfile.cpu.ppc64le b/tensorflow/tools/ci_build/Dockerfile.cpu.ppc64le index f496ac59b68..e879c34bbda 100644 --- a/tensorflow/tools/ci_build/Dockerfile.cpu.ppc64le +++ b/tensorflow/tools/ci_build/Dockerfile.cpu.ppc64le @@ -8,6 +8,7 @@ RUN /install/install_bootstrap_deb_packages.sh RUN add-apt-repository -y ppa:openjdk-r/ppa RUN /install/install_deb_packages.sh RUN apt-get update && apt-get install -y libopenblas-dev +RUN /install/install_hdf5_ppc64le.sh RUN /install/install_pip_packages.sh RUN /install/install_bazel_from_source.sh RUN /install/install_proto3.sh diff --git a/tensorflow/tools/ci_build/Dockerfile.gpu.ppc64le b/tensorflow/tools/ci_build/Dockerfile.gpu.ppc64le index 3eddc565509..89671387472 100644 --- a/tensorflow/tools/ci_build/Dockerfile.gpu.ppc64le +++ b/tensorflow/tools/ci_build/Dockerfile.gpu.ppc64le @@ -14,6 +14,7 @@ RUN /install/install_bootstrap_deb_packages.sh RUN add-apt-repository -y ppa:openjdk-r/ppa RUN /install/install_deb_packages.sh RUN apt-get update && apt-get install -y libopenblas-dev +RUN /install/install_hdf5_ppc64le.sh RUN /install/install_pip_packages.sh RUN /install/install_bazel_from_source.sh RUN /install/install_golang_ppc64le.sh diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.cpu b/tensorflow/tools/ci_build/Dockerfile.rbe.cpu index 3bc52b9ed61..7e5860aeec1 100644 --- a/tensorflow/tools/ci_build/Dockerfile.rbe.cpu +++ b/tensorflow/tools/ci_build/Dockerfile.rbe.cpu @@ -1,4 +1,4 @@ -FROM launcher.gcr.io/google/rbe-debian8:r327695 +FROM launcher.gcr.io/google/rbe-ubuntu16-04:r327695 LABEL maintainer="Yu Yi <yiyu@google.com>" # Copy install scripts @@ -9,6 +9,6 @@ ENV CC /usr/local/bin/clang ENV CXX /usr/local/bin/clang++ ENV AR /usr/bin/ar -# Run pip install script for RBE Debian8 container. +# Run pip install script for RBE Ubuntu 16-04 container. RUN /install/install_pip_packages_remote.sh RUN /install/install_pip_packages.sh diff --git a/tensorflow/tools/ci_build/ci_parameterized_build.sh b/tensorflow/tools/ci_build/ci_parameterized_build.sh index b56b9308b3a..08e2c3edd2d 100755 --- a/tensorflow/tools/ci_build/ci_parameterized_build.sh +++ b/tensorflow/tools/ci_build/ci_parameterized_build.sh @@ -59,6 +59,9 @@ # TF_BUILD_BAZEL_CLEAN: # Will perform "bazel clean", if and only if this variable # is set to any non-empty and non-0 value +# TF_BAZEL_BUILD_ONLY: +# If it is set to any non-empty value that is not "0", Bazel +# will only build specified targets # TF_GPU_COUNT: # Run this many parallel tests for serial builds. # For now, only can be edited for PIP builds. @@ -410,6 +413,11 @@ fi # this flag, and it only affects a few tests. EXTRA_ARGS="${EXTRA_ARGS} --distinct_host_configuration=false" +if [[ ! -z "${TF_BAZEL_BUILD_ONLY}" ]] && + [[ "${TF_BAZEL_BUILD_ONLY}" != "0" ]];then + BAZEL_CMD=${BAZEL_BUILD_ONLY_CMD} +fi + # Process PIP install-test option if [[ ${TF_BUILD_IS_PIP} == "no_pip" ]] || [[ ${TF_BUILD_IS_PIP} == "both" ]]; then diff --git a/tensorflow/tools/ci_build/install/install_hdf5_ppc64le.sh b/tensorflow/tools/ci_build/install/install_hdf5_ppc64le.sh new file mode 100755 index 00000000000..4989d986b8e --- /dev/null +++ b/tensorflow/tools/ci_build/install/install_hdf5_ppc64le.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + + +#This is required because pypi doesn't have a pre-built h5py binary for ppc64le +#It has to be compiled from source during the install +apt-get update +apt-get install -y libhdf5-dev + +#h5py is not expecting the shared libraries to have _serial in the name. +ln -s /usr/lib/powerpc64le-linux-gnu/libhdf5_serial.so /usr/lib/powerpc64le-linux-gnu/libhdf5.so +ln -s /usr/lib/powerpc64le-linux-gnu/libhdf5_serial_hl.so /usr/lib/powerpc64le-linux-gnu/libhdf5_hl.so + +#pip is not installed yet, so use easy_install +#CPATH is the location of hdf5.h +CPATH=/usr/include/hdf5/serial/ easy_install -U h5py +CPATH=/usr/include/hdf5/serial/ easy_install3 -U h5py diff --git a/tensorflow/tools/ci_build/linux/mkl/build-dev-container.sh b/tensorflow/tools/ci_build/linux/mkl/build-dev-container.sh new file mode 100755 index 00000000000..ad22ebe4eb3 --- /dev/null +++ b/tensorflow/tools/ci_build/linux/mkl/build-dev-container.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# Build a whl and container with Intel(R) MKL support +# Usage: build-dev-container.sh + +# Helper function to traverse directories up until given file is found. +function upsearch () { + test / == "$PWD" && return || \ + test -e "$1" && echo "$PWD" && return || \ + cd .. && upsearch "$1" +} + +# Set up WORKSPACE. +WORKSPACE="${WORKSPACE:-$(upsearch WORKSPACE)}" + +TF_DOCKER_BUILD_DEVEL_BRANCH=${TF_DOCKER_BUILD_DEVEL_BRANCH:-master} +TF_DOCKER_BUILD_IMAGE_NAME=${TF_DOCKER_BUILD_IMAGE_NAME:-intel-mkl/tensorflow} +TF_DOCKER_BUILD_VERSION=${TF_DOCKER_BUILD_VERSION:-nightly} + +echo "TF_DOCKER_BUILD_DEVEL_BRANCH=${TF_DOCKER_BUILD_DEVEL_BRANCH}" +echo "TF_DOCKER_BUILD_IMAGE_NAME=${TF_DOCKER_BUILD_IMAGE_NAME}" +echo "TF_DOCKER_BUILD_VERSION=${TF_DOCKER_BUILD_VERSION}" + +# build the python 2 container and whl +TF_DOCKER_BUILD_TYPE="MKL" \ + TF_DOCKER_BUILD_IS_DEVEL="YES" \ + TF_DOCKER_BUILD_DEVEL_BRANCH="${TF_DOCKER_BUILD_DEVEL_BRANCH}" \ + TF_DOCKER_BUILD_IMAGE_NAME="${TF_DOCKER_BUILD_IMAGE_NAME}" \ + TF_DOCKER_BUILD_VERSION="${TF_DOCKER_BUILD_VERSION}" \ + ${WORKSPACE}/tensorflow/tools/docker/parameterized_docker_build.sh + +# build the python 3 container and whl +TF_DOCKER_BUILD_TYPE="MKL" \ + TF_DOCKER_BUILD_IS_DEVEL="YES" \ + TF_DOCKER_BUILD_DEVEL_BRANCH="${TF_DOCKER_BUILD_DEVEL_BRANCH}" \ + TF_DOCKER_BUILD_IMAGE_NAME="${TF_DOCKER_BUILD_IMAGE_NAME}" \ + TF_DOCKER_BUILD_VERSION="${TF_DOCKER_BUILD_VERSION}" \ + TF_DOCKER_BUILD_PYTHON_VERSION="PYTHON3" \ + ${WORKSPACE}/tensorflow/tools/docker/parameterized_docker_build.sh + diff --git a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh index b8bce57c87a..3d27e84b81c 100755 --- a/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh +++ b/tensorflow/tools/ci_build/pi/build_raspberry_pi.sh @@ -65,6 +65,10 @@ OPENBLAS_SRC_PATH=/tmp/openblas_src/ sudo rm -rf ${OPENBLAS_SRC_PATH} git clone https://github.com/xianyi/OpenBLAS ${OPENBLAS_SRC_PATH} cd ${OPENBLAS_SRC_PATH} +# The commit after this introduced Fortran compile issues. In theory they should +# be solvable using NOFORTRAN=1 on the make command, but my initial tries didn't +# work, so pinning to the last know good version. +git checkout 5a6a2bed9aff0ba8a18651d5514d029c8cae336a # If this path is changed, you'll also need to update # cxx_builtin_include_directory in third_party/toolchains/cpus/arm/CROSSTOOL.tpl OPENBLAS_INSTALL_PATH=/tmp/openblas_install/ diff --git a/tensorflow/tools/ci_build/update_version.py b/tensorflow/tools/ci_build/update_version.py index 00bfcfd49bd..642dde36a7c 100755 --- a/tensorflow/tools/ci_build/update_version.py +++ b/tensorflow/tools/ci_build/update_version.py @@ -37,7 +37,7 @@ SETUP_PY = "%s/tools/pip_package/setup.py" % TF_SRC_DIR README_MD = "./README.md" DEVEL_DOCKERFILE = "%s/tools/docker/Dockerfile.devel" % TF_SRC_DIR GPU_DEVEL_DOCKERFILE = "%s/tools/docker/Dockerfile.devel-gpu" % TF_SRC_DIR -CPU_MKL_DEVEL_DOCKERFILE = "%s/tools/docker/Dockerfile.devel-cpu-mkl" % TF_SRC_DIR +CPU_MKL_DEVEL_DOCKERFILE = "%s/tools/docker/Dockerfile.devel-mkl" % TF_SRC_DIR RELEVANT_FILES = [TF_SRC_DIR, VERSION_H, SETUP_PY, diff --git a/tensorflow/tools/docker/Dockerfile.devel-mkl b/tensorflow/tools/docker/Dockerfile.devel-mkl new file mode 100755 index 00000000000..6dca0e393fa --- /dev/null +++ b/tensorflow/tools/docker/Dockerfile.devel-mkl @@ -0,0 +1,128 @@ +FROM ubuntu:16.04 + +LABEL maintainer="Clayne Robison <clayne.b.robison@intel.com>" + +# These parameters can be overridden by parameterized_docker_build.sh +ARG TF_BUILD_VERSION=r1.9 +ARG PYTHON="python" +ARG PYTHON3_DEV="" +ARG WHL_DIR="/tmp/pip" +ARG PIP="pip" + +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + curl \ + git \ + libcurl3-dev \ + libfreetype6-dev \ + libhdf5-serial-dev \ + libpng12-dev \ + libzmq3-dev \ + pkg-config \ + python-dev \ + ${PYTHON3_DEV} \ + rsync \ + software-properties-common \ + unzip \ + zip \ + zlib1g-dev \ + openjdk-8-jdk \ + openjdk-8-jre-headless \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +RUN curl -fSsL -O https://bootstrap.pypa.io/get-pip.py && \ + ${PYTHON} get-pip.py && \ + rm get-pip.py + +RUN ${PIP} --no-cache-dir install \ + Pillow \ + h5py \ + ipykernel \ + jupyter \ + matplotlib \ + mock \ + numpy \ + scipy \ + sklearn \ + pandas \ + && \ + ${PYTHON} -m ipykernel.kernelspec + +RUN if [ "${PYTHON}" = "python3" ]; then \ + ln -s -f /usr/bin/python3 /usr/bin/python; \ + fi + +# Set up our notebook config. +COPY jupyter_notebook_config.py /root/.jupyter/ + +# Jupyter has issues with being run directly: +# https://github.com/ipython/ipython/issues/7062 +# We just add a little wrapper script. +COPY run_jupyter.sh / + +# Set up Bazel. + +# Running bazel inside a `docker build` command causes trouble, cf: +# https://github.com/bazelbuild/bazel/issues/134 +# The easiest solution is to set up a bazelrc file forcing --batch. +RUN echo "startup --batch" >>/etc/bazel.bazelrc +# Similarly, we need to workaround sandboxing issues: +# https://github.com/bazelbuild/bazel/issues/418 +RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \ + >>/etc/bazel.bazelrc +# Install the most recent bazel release. +ENV BAZEL_VERSION 0.11.0 +WORKDIR / +RUN mkdir /bazel && \ + cd /bazel && \ + curl -H "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36" -fSsL -O https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh && \ + curl -H "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36" -fSsL -o /bazel/LICENSE.txt https://raw.githubusercontent.com/bazelbuild/bazel/master/LICENSE && \ + chmod +x bazel-*.sh && \ + ./bazel-$BAZEL_VERSION-installer-linux-x86_64.sh && \ + cd / && \ + rm -f /bazel/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh + +# Download and build TensorFlow. +WORKDIR /tensorflow + +# Download and build TensorFlow. +# Enable checking out both tags and branches +RUN export TAG_PREFIX="v" && \ + echo ${TF_BUILD_VERSION} | grep -q ^${TAG_PREFIX}; \ + if [ $? -eq 0 ]; then \ + git clone --depth=1 https://github.com/tensorflow/tensorflow.git . && \ + git fetch --tags && \ + git checkout ${TF_BUILD_VERSION}; \ + else \ + git clone --depth=1 --branch=${TF_BUILD_VERSION} https://github.com/tensorflow/tensorflow.git . ; \ + fi + +RUN yes "" | ${PYTHON} configure.py + +ENV CI_BUILD_PYTHON ${PYTHON} + +# Set bazel build parameters in .bazelrc in parameterized_docker_build.sh +# Use --copt=-march values to get optimized builds appropriate for the hardware +# platform of your choice. +# For ivy-bridge or sandy-bridge +# --copt=-march="avx" \ +# For haswell, broadwell, or skylake +# --copt=-march="avx2" \ +COPY .bazelrc /root/.bazelrc + +RUN tensorflow/tools/ci_build/builds/configured CPU \ + bazel --bazelrc=/root/.bazelrc build -c opt \ + tensorflow/tools/pip_package:build_pip_package && \ + bazel-bin/tensorflow/tools/pip_package/build_pip_package "${WHL_DIR}" && \ + ${PIP} --no-cache-dir install --upgrade "${WHL_DIR}"/tensorflow-*.whl && \ + rm -rf /root/.cache +# Clean up Bazel cache when done. + +# TensorBoard +EXPOSE 6006 +# IPython +EXPOSE 8888 + +WORKDIR /root diff --git a/tensorflow/tools/docker/Dockerfile.mkl b/tensorflow/tools/docker/Dockerfile.mkl new file mode 100755 index 00000000000..139395d4910 --- /dev/null +++ b/tensorflow/tools/docker/Dockerfile.mkl @@ -0,0 +1,75 @@ +FROM ubuntu:16.04 + +LABEL maintainer="Clayne Robison <clayne.b.robison@intel.com>" + +# This parameter MUST be set by parameterized_docker_build.sh +ARG TF_WHL_URL + +# Optional parameters +ARG TF_BUILD_VERSION=r1.9 +ARG PYTHON="python" +ARG PYTHON_DEV="python-dev" +ARG PIP="pip" + +# Pick up some TF dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + curl \ + libfreetype6-dev \ + libhdf5-serial-dev \ + libpng12-dev \ + libzmq3-dev \ + pkg-config \ + python \ + ${PYTHON_DEV} \ + rsync \ + software-properties-common \ + unzip \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +RUN curl -O https://bootstrap.pypa.io/get-pip.py && \ + python get-pip.py && \ + rm get-pip.py + +RUN ${PIP} --no-cache-dir install \ + Pillow \ + h5py \ + ipykernel \ + jupyter \ + matplotlib \ + numpy \ + pandas \ + scipy \ + sklearn \ + && \ + python -m ipykernel.kernelspec + +COPY ${TF_WHL_URL} / +RUN ${PIP} install --no-cache-dir --force-reinstall /${TF_WHL_URL} && \ + rm -rf /${TF_WHL_URL} + +RUN if [ "${PYTHON}" = "python3" ]; then \ + ln -s -f /usr/bin/python3 /usr/bin/python; \ + fi + +# Set up our notebook config. +COPY jupyter_notebook_config.py /root/.jupyter/ + +# Copy sample notebooks. +COPY notebooks /notebooks + +# Jupyter has issues with being run directly: +# https://github.com/ipython/ipython/issues/7062 +# We just add a little wrapper script. +COPY run_jupyter.sh / + +# TensorBoard +EXPOSE 6006 +# IPython +EXPOSE 8888 + +WORKDIR "/notebooks" + +CMD ["/run_jupyter.sh", "--allow-root"] diff --git a/tensorflow/tools/docker/parameterized_docker_build.sh b/tensorflow/tools/docker/parameterized_docker_build.sh index 05de25f2cb1..4681c5fd611 100755 --- a/tensorflow/tools/docker/parameterized_docker_build.sh +++ b/tensorflow/tools/docker/parameterized_docker_build.sh @@ -19,8 +19,8 @@ # parameterized_docker_build.sh # # The script obeys the following environment variables: -# TF_DOCKER_BUILD_TYPE: (CPU | GPU) -# CPU or GPU image +# TF_DOCKER_BUILD_TYPE: (CPU | GPU | MKL) +# CPU, GPU, or MKL image # # TF_DOCKER_BUILD_IS_DEVEL: (NO | YES) # Is this developer image @@ -87,6 +87,15 @@ # TF_DOCKER_BUILD_OPTIONS # (Optional) # Specifies the desired build options. Defaults to OPT. +# +# TF_DOCKER_BUILD_ARGS +# (Optional) +# A list (array) of docker build args. Will be passed to docker build +# command as list of --build-arg parameters. +# +# TF_BAZEL_BUILD_OPTIONS +# (Optional) +# Bazel compiler flags to be passed to the bazelrc file # Script directory SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" @@ -116,6 +125,8 @@ echo " TF_DOCKER_BUILD_IMAGE_NAME=${TF_DOCKER_BUILD_IMAGE_NAME}" echo " TF_DOCKER_BUILD_VERSION=${TF_DOCKER_BUILD_VERSION}" echo " TF_DOCKER_BUILD_PORT=${TF_DOCKER_BUILD_PORT}" echo " TF_DOCKER_BUILD_PUSH_CMD=${TF_DOCKER_BUILD_PUSH_CMD}" +echo " TF_DOCKER_BUILD_ARGS=${TF_DOCKER_BUILD_ARGS[@]:-()}" +echo " TF_BAZEL_BUILD_OPTIONS=${TF_BAZEL_BUILD_OPTIONS}" CONTAINER_PORT=${TF_DOCKER_BUILD_PORT:-8888} @@ -149,6 +160,15 @@ fi if [[ ${TF_DOCKER_BUILD_TYPE} == "cpu" ]]; then DOCKER_BINARY="docker" +elif [[ ${TF_DOCKER_BUILD_TYPE} == "mkl" ]]; then + DOCKER_BINARY="docker" + FINAL_TAG="${FINAL_TAG}-mkl" + if [[ ${ORIG_DOCKERFILE} == *"."* ]]; then + # There is already a dot in the tag, use "-" + ORIG_DOCKERFILE="${ORIG_DOCKERFILE}-mkl" + else + ORIG_DOCKERFILE="${ORIG_DOCKERFILE}.mkl" + fi elif [[ ${TF_DOCKER_BUILD_TYPE} == "gpu" ]]; then DOCKER_BINARY="nvidia-docker" @@ -203,6 +223,10 @@ if [[ "${TF_DOCKER_BUILD_IS_DEVEL}" == "no" ]]; then export TF_BUILD_OPTIONS=${TF_DOCKER_BUILD_OPTIONS} export TF_BUILD_IS_PIP="PIP" + if [[ "${TF_DOCKER_BUILD_TYPE}" == "mkl" ]]; then + die "FAIL: Non-development MKL builds require a pre-built pip whl." + fi + if [[ "${TF_DOCKER_BUILD_TYPE}" == "gpu" ]]; then export TF_BUILD_APPEND_CI_DOCKER_EXTRA_PARAMS=\ "${TF_BUILD_APPEND_CI_DOCKER_EXTRA_PARAMS} -e TF_CUDA_COMPUTE_CAPABILITIES=3.0,3.5,5.2" @@ -255,25 +279,39 @@ if [[ "${TF_DOCKER_BUILD_IS_DEVEL}" == "no" ]]; then # Use string replacement to put the correct file name into the Dockerfile PIP_WHL=$(basename "${PIP_WHL}") - # Modify the non-devel Dockerfile to point to the correct pip whl file - # location - sed -e "/# --- DO NOT EDIT OR DELETE BETWEEN THE LINES --- #/,"\ + if [[ ${TF_DOCKER_BUILD_TYPE} == "mkl" ]]; then + TF_DOCKER_BUILD_ARGS+=("--build-arg TF_WHL_URL=${PIP_WHL}" ) + cp "${ORIG_DOCKERFILE}" "${DOCKERFILE}" + else + # Modify the non-devel Dockerfile to point to the correct pip whl file + # location + sed -e "/# --- DO NOT EDIT OR DELETE BETWEEN THE LINES --- #/,"\ "/# --- ~ DO NOT EDIT OR DELETE BETWEEN THE LINES --- #/c"\ "COPY ${PIP_WHL} /\n"\ "RUN pip --no-cache-dir install /${PIP_WHL}" "${ORIG_DOCKERFILE}" \ - > "${DOCKERFILE}" + > "${DOCKERFILE}" + fi echo "Using local pip wheel from: ${TF_DOCKER_BUILD_CENTRAL_PIP}" echo - else echo "Downloading pip wheel from: ${TF_DOCKER_BUILD_CENTRAL_PIP}" - echo - - # Modify the non-devel Dockerfile to point to the correct pip whl URL. - sed -e "/# --- DO NOT EDIT OR DELETE BETWEEN THE LINES --- #/,"\ + if [[ ${TF_DOCKER_BUILD_TYPE} == "mkl" ]]; then + pushd "${TMP_DIR}/" + curl -O ${TF_DOCKER_BUILD_CENTRAL_PIP} + popd + PIP_WHL_PATH=`find ${TMP_DIR} -name "*.whl"` + PIP_WHL=$(basename "${PIP_WHL_PATH}") + echo "PIP_WHL= ${PIP_WHL}" + echo + TF_DOCKER_BUILD_ARGS+=("--build-arg TF_WHL_URL=${PIP_WHL}") + cp "${ORIG_DOCKERFILE}" "${DOCKERFILE}" + else + # Modify the non-devel Dockerfile to point to the correct pip whl URL. + sed -e "/# --- DO NOT EDIT OR DELETE BETWEEN THE LINES --- #/,"\ "/# --- ~ DO NOT EDIT OR DELETE BETWEEN THE LINES --- #/c"\ "RUN pip --no-cache-dir install ${TF_DOCKER_BUILD_CENTRAL_PIP}" "${ORIG_DOCKERFILE}" \ - > "${DOCKERFILE}" + > "${DOCKERFILE}" + fi fi echo "Modified Dockerfile at: ${DOCKERFILE}" @@ -281,36 +319,66 @@ if [[ "${TF_DOCKER_BUILD_IS_DEVEL}" == "no" ]]; then # Modify python/pip version if necessary. if [[ "${TF_DOCKER_BUILD_PYTHON_VERSION}" == "python3" ]]; then - if sed -i -e 's/python /python3 /g' "${DOCKERFILE}" && \ - sed -i -e 's/python-dev/python3-dev/g' "${DOCKERFILE}" && \ - sed -i -e 's/pip /pip3 /g' "${DOCKERFILE}" && \ - sed -i -e 's^# RUN ln -s -f /usr/bin/python3 /usr/bin/python#^RUN ln -s -f /usr/bin/python3 /usr/bin/python^' "${DOCKERFILE}" - then - echo "Modified Dockerfile for python version "\ -"${TF_DOCKER_BUILD_PYTHON_VERSION} at: ${DOCKERFILE}" + if [[ ${TF_DOCKER_BUILD_TYPE} == "mkl" ]]; then + TF_DOCKER_BUILD_ARGS+=("--build-arg PYTHON=${TF_DOCKER_BUILD_PYTHON_VERSION}") + TF_DOCKER_BUILD_ARGS+=("--build-arg PYTHON_DEV=python3-dev") + TF_DOCKER_BUILD_ARGS+=("--build-arg PIP=pip3") + cp "${ORIG_DOCKERFILE}" "${DOCKERFILE}" else - die "FAILED to modify ${DOCKERFILE} for python3" + if sed -i -e 's/python /python3 /g' "${DOCKERFILE}" && \ + sed -i -e 's/python-dev/python3-dev/g' "${DOCKERFILE}" && \ + sed -i -e 's/pip /pip3 /g' "${DOCKERFILE}" && \ + sed -i -e 's^# RUN ln -s -f /usr/bin/python3 /usr/bin/python#^RUN ln -s -f /usr/bin/python3 /usr/bin/python^' "${DOCKERFILE}" + then + echo "Modified Dockerfile for python version "\ + "${TF_DOCKER_BUILD_PYTHON_VERSION} at: ${DOCKERFILE}" + else + die "FAILED to modify ${DOCKERFILE} for python3" + fi fi fi -else +else # TF_DOCKER_BUILD_IS_DEVEL == 'yes' DOCKERFILE="${TMP_DIR}/Dockerfile" - # Modify the devel Dockerfile to specify the git branch - sed "s/^RUN git clone --branch=.* --depth=1/RUN git clone --branch=${TF_DOCKER_BUILD_DEVEL_BRANCH} --depth=1/" \ - "${ORIG_DOCKERFILE}" > "${DOCKERFILE}" + # Set up Dockerfile ARGS for mkl build + if [[ ${TF_DOCKER_BUILD_TYPE} == "mkl" ]]; then + if [[ -z "${TF_BAZEL_BUILD_OPTIONS// }" ]]; then + TF_BAZEL_BUILD_OPTIONS=("--config=mkl --copt=-mavx --cxxopt=-D_GLIBCXX_USE_CXX11_ABI=0") + else + TF_BAZEL_BUILD_OPTIONS="${TF_BAZEL_BUILD_OPTIONS}" + fi + TF_DOCKER_BUILD_ARGS+=("--build-arg TF_BUILD_VERSION=${TF_DOCKER_BUILD_DEVEL_BRANCH}") + echo "TF_DOCKER_BUILD_ARGS=${TF_DOCKER_BUILD_ARGS[@]}" + + # Pass the build options to bazel using the user-specific .bazelrc file + echo "build ${TF_BAZEL_BUILD_OPTIONS}" >> ${TMP_DIR}/.bazelrc + cp "${ORIG_DOCKERFILE}" "${DOCKERFILE}" + else + # Modify the devel Dockerfile to specify the git branch + sed "s/^RUN git clone --branch=.* --depth=1/RUN git clone --branch=${TF_DOCKER_BUILD_DEVEL_BRANCH} --depth=1/" \ + "${ORIG_DOCKERFILE}" > "${DOCKERFILE}" + fi # Modify python/pip version if necessary. if [[ "${TF_DOCKER_BUILD_PYTHON_VERSION}" == "python3" ]]; then - if sed -i -e 's/python-dev/python-dev python3-dev/g' "${DOCKERFILE}" && \ - sed -i -e 's/python /python3 /g' "${DOCKERFILE}" && \ - sed -i -e 's^/tmp/pip^/tmp/pip3^g' "${DOCKERFILE}" && \ - sed -i -e 's/pip /pip3 /g' "${DOCKERFILE}" && \ - sed -i -e 's/ENV CI_BUILD_PYTHON python/ENV CI_BUILD_PYTHON python3/g' "${DOCKERFILE}" && \ - sed -i -e 's^# RUN ln -s -f /usr/bin/python3 /usr/bin/python#^RUN ln -s -f /usr/bin/python3 /usr/bin/python^' "${DOCKERFILE}" - then - echo "Modified Dockerfile further for python version ${TF_DOCKER_BUILD_PYTHON_VERSION} at: ${DOCKERFILE}" + if [[ ${TF_DOCKER_BUILD_TYPE} == "mkl" ]]; then + TF_DOCKER_BUILD_ARGS+=("--build-arg PYTHON=${TF_DOCKER_BUILD_PYTHON_VERSION}") + TF_DOCKER_BUILD_ARGS+=("--build-arg PYTHON3_DEV=python3-dev") + TF_DOCKER_BUILD_ARGS+=("--build-arg WHL_DIR=/tmp/pip3") + TF_DOCKER_BUILD_ARGS+=("--build-arg PIP=pip3") + cp "${ORIG_DOCKERFILE}" "${DOCKERFILE}" else - die "FAILED to modify ${DOCKERFILE} for python3" + if sed -i -e 's/python-dev/python-dev python3-dev/g' "${DOCKERFILE}" && \ + sed -i -e 's/python /python3 /g' "${DOCKERFILE}" && \ + sed -i -e 's^/tmp/pip^/tmp/pip3^g' "${DOCKERFILE}" && \ + sed -i -e 's/pip /pip3 /g' "${DOCKERFILE}" && \ + sed -i -e 's/ENV CI_BUILD_PYTHON python/ENV CI_BUILD_PYTHON python3/g' "${DOCKERFILE}" && \ + sed -i -e 's^# RUN ln -s -f /usr/bin/python3 /usr/bin/python#^RUN ln -s -f /usr/bin/python3 /usr/bin/python^' "${DOCKERFILE}" + then + echo "Modified Dockerfile further for python version ${TF_DOCKER_BUILD_PYTHON_VERSION} at: ${DOCKERFILE}" + else + die "FAILED to modify ${DOCKERFILE} for python3" + fi fi fi fi @@ -319,8 +387,11 @@ fi # Intermediate image name with tag IMG="${USER}/tensorflow:${FINAL_TAG}" echo "Building docker image with image name and tag: ${IMG}" +echo "TF_DOCKER_BUILD_ARGS=${TF_DOCKER_BUILD_ARGS[@]}" +CMD="${DOCKER_BINARY} build ${TF_DOCKER_BUILD_ARGS[@]} --no-cache --pull -t ${IMG} -f ${DOCKERFILE} ${TMP_DIR}" +echo "CMD=${CMD}" +${CMD} -"${DOCKER_BINARY}" build --no-cache --pull -t "${IMG}" -f "${DOCKERFILE}" "${TMP_DIR}" if [[ $? == "0" ]]; then echo "${DOCKER_BINARY} build of ${IMG} succeeded" else @@ -340,7 +411,7 @@ fi DOCKER_RUN_LOG="${TMP_DIR}/docker_run.log" echo "" echo "Running docker container from image ${IMG}..." -echo " (Log file is at: ${DOCKER_RUN_LOG}" +echo " Log file is at: ${DOCKER_RUN_LOG}" echo "" if [[ "${TF_DOCKER_BUILD_IS_DEVEL}" == "no" ]]; then @@ -386,7 +457,6 @@ if [[ "${TF_DOCKER_BUILD_IS_DEVEL}" == "no" ]]; then # Stop the running docker container sleep 1 "${DOCKER_BINARY}" stop --time=0 ${CONTAINER_ID} - fi diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 55cd4f37c68..c630ca04b88 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -53,7 +53,7 @@ REQUIRED_PACKAGES = [ 'gast >= 0.2.0', 'numpy >= 1.13.3', 'six >= 1.10.0', - 'protobuf >= 3.4.0', + 'protobuf >= 3.6.0', 'setuptools <= 39.1.0', 'tensorboard >= 1.8.0, < 1.9.0', 'termcolor >= 1.1.0', @@ -170,8 +170,9 @@ class InstallHeaders(Command): # symlink within the directory hierarchy. # NOTE(keveman): Figure out how to customize bdist_wheel package so # we can do the symlink. - if 'external/eigen_archive/' in install_dir: - extra_dir = install_dir.replace('external/eigen_archive', '') + if 'tensorflow/include/external/eigen_archive/' in install_dir: + extra_dir = install_dir.replace( + 'tensorflow/include/external/eigen_archive', '') if not os.path.exists(extra_dir): self.mkpath(extra_dir) self.copy_file(header, extra_dir) @@ -204,13 +205,12 @@ def find_files(pattern, root): yield os.path.join(dirpath, filename) -matches = ['../' + x for x in find_files('*', 'external') if '.py' not in x] - so_lib_paths = [ i for i in os.listdir('.') if os.path.isdir(i) and fnmatch.fnmatch(i, '_solib_*') ] +matches = [] for path in so_lib_paths: matches.extend( ['../' + x for x in find_files('*', path) if '.py' not in x] @@ -225,7 +225,7 @@ headers = (list(find_files('*.h', 'tensorflow/core')) + list(find_files('*.h', 'tensorflow/stream_executor')) + list(find_files('*.h', 'google/protobuf_archive/src')) + list(find_files('*', 'third_party/eigen3')) + - list(find_files('*', 'external/eigen_archive'))) + list(find_files('*', 'tensorflow/include/external/eigen_archive'))) setup( name=project_name, diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 4015c0d5a43..5372a585aa4 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -559,11 +559,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "kafka", urls = [ - "https://mirror.bazel.build/github.com/edenhill/librdkafka/archive/v0.11.1.tar.gz", - "https://github.com/edenhill/librdkafka/archive/v0.11.1.tar.gz", + "https://mirror.bazel.build/github.com/edenhill/librdkafka/archive/v0.11.4.tar.gz", + "https://github.com/edenhill/librdkafka/archive/v0.11.4.tar.gz", ], - sha256 = "dd035d57c8f19b0b612dd6eefe6e5eebad76f506e302cccb7c2066f25a83585e", - strip_prefix = "librdkafka-0.11.1", + sha256 = "9d8f1eb7b0e29e9ab1168347c939cb7ae5dff00a39cef99e7ef033fd8f92737c", + strip_prefix = "librdkafka-0.11.4", build_file = clean_dep("//third_party:kafka/BUILD"), patch_file = clean_dep("//third_party/kafka:config.patch"), ) diff --git a/third_party/eigen.BUILD b/third_party/eigen.BUILD index e54c1a4501d..759f8a9be92 100644 --- a/third_party/eigen.BUILD +++ b/third_party/eigen.BUILD @@ -69,3 +69,9 @@ cc_library( includes = ["."], visibility = ["//visibility:public"], ) + +filegroup( + name = "eigen_header_files", + srcs = EIGEN_MPL2_HEADER_FILES, + visibility = ["//visibility:public"], +) diff --git a/third_party/eigen3/BUILD b/third_party/eigen3/BUILD index f661093bc9f..203991b50f5 100644 --- a/third_party/eigen3/BUILD +++ b/third_party/eigen3/BUILD @@ -17,21 +17,23 @@ load("//tensorflow:tensorflow.bzl", "if_mkl") # INTEL_MKL end load("//tensorflow:tensorflow.bzl", "if_mkl") +EIGEN3_THIRD_PARTY_HEADERS = [ + "Eigen/Core", + "Eigen/LU", + "Eigen/Cholesky", + "Eigen/Eigenvalues", + "Eigen/QR", + "Eigen/SVD", + "unsupported/Eigen/MatrixFunctions", + "unsupported/Eigen/SpecialFunctions", + "unsupported/Eigen/CXX11/ThreadPool", + "unsupported/Eigen/CXX11/Tensor", + "unsupported/Eigen/CXX11/FixedPoint", +] + glob(["unsupported/Eigen/CXX11/src/FixedPoint/*.h"]) + cc_library( name = "eigen3", - hdrs = glob(["unsupported/Eigen/CXX11/src/FixedPoint/*.h"]) + [ - "Eigen/Core", - "Eigen/LU", - "Eigen/Cholesky", - "Eigen/Eigenvalues", - "Eigen/QR", - "Eigen/SVD", - "unsupported/Eigen/MatrixFunctions", - "unsupported/Eigen/SpecialFunctions", - "unsupported/Eigen/CXX11/ThreadPool", - "unsupported/Eigen/CXX11/Tensor", - "unsupported/Eigen/CXX11/FixedPoint", - ], + hdrs = EIGEN3_THIRD_PARTY_HEADERS, includes = if_mkl(["./mkl_include"]), visibility = ["//visibility:public"], deps = [ @@ -48,3 +50,35 @@ filegroup( ), visibility = ["//tensorflow:__subpackages__"], ) + +filegroup( + name = "eigen_third_party_header_files", + srcs = EIGEN3_THIRD_PARTY_HEADERS, + visibility = ["//visibility:public"], +) + +genrule( + name = "install_eigen_headers", + srcs = [ + "@eigen_archive//:eigen_header_files", + ":eigen_third_party_header_files", + ], + outs = ["include"], + cmd = """ + mkdir $@ + for f in $(locations @eigen_archive//:eigen_header_files) ; do + d="$${f%/*}" + d="$${d#*external/eigen_archive/}" + + mkdir -p "$@/$${d}" + cp "$${f}" "$@/$${d}/" + done + + for f in $(locations :eigen_third_party_header_files) ; do + d="$${f%/*}" + + mkdir -p "$@/$${d}" + cp "$${f}" "$@/$${d}/" + done + """, +) diff --git a/third_party/kafka/BUILD b/third_party/kafka/BUILD index a839ca717e6..75792b0d873 100644 --- a/third_party/kafka/BUILD +++ b/third_party/kafka/BUILD @@ -60,6 +60,8 @@ cc_library( "src/rdkafka_event.h", "src/rdkafka_feature.c", "src/rdkafka_feature.h", + "src/rdkafka_header.c", + "src/rdkafka_header.h", "src/rdkafka_int.h", "src/rdkafka_interceptor.c", "src/rdkafka_interceptor.h", @@ -93,7 +95,6 @@ cc_library( "src/rdkafka_sasl_int.h", "src/rdkafka_sasl_plain.c", "src/rdkafka_subscription.c", - "src/rdkafka_subscription.h", "src/rdkafka_timer.c", "src/rdkafka_timer.h", "src/rdkafka_topic.c", @@ -105,6 +106,8 @@ cc_library( "src/rdlist.h", "src/rdlog.c", "src/rdlog.h", + "src/rdmurmur2.c", + "src/rdmurmur2.h", "src/rdports.c", "src/rdports.h", "src/rdposix.h", diff --git a/third_party/repo.bzl b/third_party/repo.bzl index cb67d3e9617..9cee1fcc4b5 100644 --- a/third_party/repo.bzl +++ b/third_party/repo.bzl @@ -16,7 +16,6 @@ _SINGLE_URL_WHITELIST = depset([ "arm_compiler", - "ortools_archive", ]) def _is_windows(ctx): diff --git a/third_party/sqlite.BUILD b/third_party/sqlite.BUILD index 6da79535892..2876f305f1f 100644 --- a/third_party/sqlite.BUILD +++ b/third_party/sqlite.BUILD @@ -5,6 +5,7 @@ licenses(["unencumbered"]) # Public Domain SQLITE_COPTS = [ "-Os", + "-DSQLITE_ENABLE_JSON1", "-DHAVE_DECL_STRERROR_R=1", "-DHAVE_STDINT_H=1", "-DHAVE_INTTYPES_H=1", diff --git a/third_party/toolchains/BUILD b/third_party/toolchains/BUILD new file mode 100644 index 00000000000..fc3183a7543 --- /dev/null +++ b/third_party/toolchains/BUILD @@ -0,0 +1,22 @@ +licenses(["restricted"]) + +package(default_visibility = ["//visibility:public"]) + +# Platform for use with remote execution with +# custom container based off RBE Ubuntu16_04 +# http://gcr.io/cloud-marketplace/google/rbe-ubuntu16-04 +# Built with //tensorflow/tools/ci_build/Dockerfile.rbe.cpu +platform( + name = "rbe_ubuntu16_04-tf", + constraint_values = [ + "@bazel_tools//platforms:x86_64", + "@bazel_tools//platforms:linux", + "@bazel_tools//tools/cpp:clang", + "@bazel_toolchains//constraints:xenial", + ], + remote_execution_properties = """ + properties: { + name: "container-image" + value:"docker://gcr.io/asci-toolchain/nosla-ubuntu16_04-tf@sha256:800a7b68cabef15419695c188ed33ed70adf678c2371b97b236f3ae26c38274d" + }""", +)