Merge changes from github.
PiperOrigin-RevId: 180301735
This commit is contained in:
parent
2e2715baa8
commit
20765b3e1a
@ -67,4 +67,4 @@ If the Project Stewards receive a report alleging a violation of the Code of Con
|
|||||||
|
|
||||||
## Attribution
|
## Attribution
|
||||||
|
|
||||||
This Code of Conduct is adapted from the Contributor Covenant, version 1.4, available at http://contributor-covenant.org/version/1/4, and includes some aspects of the Geek Feminism Code of Conduct and the Drupal Code of Conduct.
|
This Code of Conduct is adapted from the Contributor Covenant, version 1.4, available at https://contributor-covenant.org/version/1/4, and includes some aspects of the Geek Feminism Code of Conduct and the Drupal Code of Conduct.
|
||||||
|
@ -8,8 +8,8 @@ We'd love to accept your patches! Before we can take them, we have to jump a cou
|
|||||||
|
|
||||||
Please fill out either the individual or corporate Contributor License Agreement (CLA).
|
Please fill out either the individual or corporate Contributor License Agreement (CLA).
|
||||||
|
|
||||||
* If you are an individual writing original source code and you're sure you own the intellectual property, then you'll need to sign an [individual CLA](http://code.google.com/legal/individual-cla-v1.0.html).
|
* If you are an individual writing original source code and you're sure you own the intellectual property, then you'll need to sign an [individual CLA](https://code.google.com/legal/individual-cla-v1.0.html).
|
||||||
* If you work for a company that wants to allow you to contribute your work, then you'll need to sign a [corporate CLA](http://code.google.com/legal/corporate-cla-v1.0.html).
|
* If you work for a company that wants to allow you to contribute your work, then you'll need to sign a [corporate CLA](https://code.google.com/legal/corporate-cla-v1.0.html).
|
||||||
|
|
||||||
Follow either of the two links above to access the appropriate CLA and instructions for how to sign and return it. Once we receive it, we'll be able to accept your pull requests.
|
Follow either of the two links above to access the appropriate CLA and instructions for how to sign and return it. Once we receive it, we'll be able to accept your pull requests.
|
||||||
|
|
||||||
@ -117,7 +117,7 @@ pylint --rcfile=/tmp/pylintrc myfile.py
|
|||||||
* [Google Java Style Guide](https://google.github.io/styleguide/javaguide.html)
|
* [Google Java Style Guide](https://google.github.io/styleguide/javaguide.html)
|
||||||
* [Google JavaScript Style Guide](https://google.github.io/styleguide/jsguide.html)
|
* [Google JavaScript Style Guide](https://google.github.io/styleguide/jsguide.html)
|
||||||
* [Google Shell Style Guide](https://google.github.io/styleguide/shell.xml)
|
* [Google Shell Style Guide](https://google.github.io/styleguide/shell.xml)
|
||||||
* [Google Objective-C Style Guide](http://google.github.io/styleguide/objcguide.html)
|
* [Google Objective-C Style Guide](https://google.github.io/styleguide/objcguide.html)
|
||||||
|
|
||||||
#### Running sanity check
|
#### Running sanity check
|
||||||
|
|
||||||
|
@ -49,8 +49,8 @@ packages on Linux, Mac, and Windows.
|
|||||||
* Linux CPU-only: [Python 2](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/)) / [Python 3.4](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/)) / [Python 3.5](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=cpu-slave/))
|
* Linux CPU-only: [Python 2](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/)) / [Python 3.4](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=cpu-slave/)) / [Python 3.5](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=cpu-slave/))
|
||||||
* Linux GPU: [Python 2](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/42/artifact/pip_test/whl/tf_nightly_gpu-1.head-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/)) / [Python 3.4](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly_gpu-1.head-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/)) / [Python 3.5](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly_gpu-1.head-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/))
|
* Linux GPU: [Python 2](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/42/artifact/pip_test/whl/tf_nightly_gpu-1.head-cp27-none-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=gpu-linux/)) / [Python 3.4](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly_gpu-1.head-cp34-cp34m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=gpu-linux/)) / [Python 3.5](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly_gpu-1.head-cp35-cp35m-linux_x86_64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-linux/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3.5,label=gpu-linux/))
|
||||||
* Mac CPU-only: [Python 2](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-mac/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-mac/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/)) / [Python 3](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-mac/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-mac/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/))
|
* Mac CPU-only: [Python 2](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-mac/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-py2-none-any.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-mac/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=mac-slave/)) / [Python 3](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-mac/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/lastSuccessfulBuild/artifact/pip_test/whl/tf_nightly-1.head-py3-none-any.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-mac/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON3,label=mac-slave/))
|
||||||
* Windows CPU-only: [Python 3.5 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=35/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly-1.head-cp35-cp35m-win_amd64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=35/)) / [Python 3.6 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=36/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly-1.head-cp36-cp36m-win_amd64.whl) ([build history](http://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=36/))
|
* Windows CPU-only: [Python 3.5 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=35/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly-1.head-cp35-cp35m-win_amd64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=35/)) / [Python 3.6 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=36/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly-1.head-cp36-cp36m-win_amd64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows,PY=36/))
|
||||||
* Windows GPU: [Python 3.5 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=35/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly_gpu-1.head-cp35-cp35m-win_amd64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=35/)) / [Python 3.6 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=36/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly_gpu-1.head-cp36-cp36m-win_amd64.whl) ([build history](http://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=36/))
|
* Windows GPU: [Python 3.5 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=35/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly_gpu-1.head-cp35-cp35m-win_amd64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=35/)) / [Python 3.6 64-bit](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=36/lastSuccessfulBuild/artifact/cmake_build/tf_python/dist/tf_nightly_gpu-1.head-cp36-cp36m-win_amd64.whl) ([build history](https://ci.tensorflow.org/view/tf-nightly/job/tf-nightly-windows/M=windows-gpu,PY=36/))
|
||||||
* Android: [demo APK](https://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/tensorflow_demo.apk), [native libs](https://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/native/)
|
* Android: [demo APK](https://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/tensorflow_demo.apk), [native libs](https://ci.tensorflow.org/view/Nightly/job/nightly-android/lastSuccessfulBuild/artifact/out/native/)
|
||||||
([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-android/))
|
([build history](https://ci.tensorflow.org/view/Nightly/job/nightly-android/))
|
||||||
|
|
||||||
|
@ -446,11 +446,13 @@ filegroup(
|
|||||||
"//tensorflow/contrib/data/python/kernel_tests:all_files",
|
"//tensorflow/contrib/data/python/kernel_tests:all_files",
|
||||||
"//tensorflow/contrib/data/python/ops:all_files",
|
"//tensorflow/contrib/data/python/ops:all_files",
|
||||||
"//tensorflow/contrib/decision_trees/proto:all_files",
|
"//tensorflow/contrib/decision_trees/proto:all_files",
|
||||||
|
"//tensorflow/contrib/deprecated:all_files",
|
||||||
"//tensorflow/contrib/distributions:all_files",
|
"//tensorflow/contrib/distributions:all_files",
|
||||||
"//tensorflow/contrib/eager/proto:all_files",
|
"//tensorflow/contrib/eager/proto:all_files",
|
||||||
"//tensorflow/contrib/eager/python:all_files",
|
"//tensorflow/contrib/eager/python:all_files",
|
||||||
"//tensorflow/contrib/estimator:all_files",
|
"//tensorflow/contrib/estimator:all_files",
|
||||||
"//tensorflow/contrib/factorization:all_files",
|
"//tensorflow/contrib/factorization:all_files",
|
||||||
|
"//tensorflow/contrib/factorization/examples:all_files",
|
||||||
"//tensorflow/contrib/factorization/kernels:all_files",
|
"//tensorflow/contrib/factorization/kernels:all_files",
|
||||||
"//tensorflow/contrib/ffmpeg:all_files",
|
"//tensorflow/contrib/ffmpeg:all_files",
|
||||||
"//tensorflow/contrib/ffmpeg/default:all_files",
|
"//tensorflow/contrib/ffmpeg/default:all_files",
|
||||||
@ -461,6 +463,7 @@ filegroup(
|
|||||||
"//tensorflow/contrib/graph_editor:all_files",
|
"//tensorflow/contrib/graph_editor:all_files",
|
||||||
"//tensorflow/contrib/grid_rnn:all_files",
|
"//tensorflow/contrib/grid_rnn:all_files",
|
||||||
"//tensorflow/contrib/hooks:all_files",
|
"//tensorflow/contrib/hooks:all_files",
|
||||||
|
"//tensorflow/contrib/hvx/clock_cycle_profiling:all_files",
|
||||||
"//tensorflow/contrib/hvx/hvx_ops_support_checker:all_files",
|
"//tensorflow/contrib/hvx/hvx_ops_support_checker:all_files",
|
||||||
"//tensorflow/contrib/image:all_files",
|
"//tensorflow/contrib/image:all_files",
|
||||||
"//tensorflow/contrib/input_pipeline:all_files",
|
"//tensorflow/contrib/input_pipeline:all_files",
|
||||||
@ -478,6 +481,7 @@ filegroup(
|
|||||||
"//tensorflow/contrib/layers/kernels:all_files",
|
"//tensorflow/contrib/layers/kernels:all_files",
|
||||||
"//tensorflow/contrib/learn:all_files",
|
"//tensorflow/contrib/learn:all_files",
|
||||||
"//tensorflow/contrib/learn/python/learn/datasets:all_files",
|
"//tensorflow/contrib/learn/python/learn/datasets:all_files",
|
||||||
|
"//tensorflow/contrib/legacy_seq2seq:all_files",
|
||||||
"//tensorflow/contrib/libsvm:all_files",
|
"//tensorflow/contrib/libsvm:all_files",
|
||||||
"//tensorflow/contrib/linalg:all_files",
|
"//tensorflow/contrib/linalg:all_files",
|
||||||
"//tensorflow/contrib/linear_optimizer:all_files",
|
"//tensorflow/contrib/linear_optimizer:all_files",
|
||||||
@ -503,15 +507,19 @@ filegroup(
|
|||||||
"//tensorflow/contrib/lookup:all_files",
|
"//tensorflow/contrib/lookup:all_files",
|
||||||
"//tensorflow/contrib/losses:all_files",
|
"//tensorflow/contrib/losses:all_files",
|
||||||
"//tensorflow/contrib/makefile:all_files",
|
"//tensorflow/contrib/makefile:all_files",
|
||||||
|
"//tensorflow/contrib/memory_stats:all_files",
|
||||||
"//tensorflow/contrib/meta_graph_transform:all_files",
|
"//tensorflow/contrib/meta_graph_transform:all_files",
|
||||||
"//tensorflow/contrib/metrics:all_files",
|
"//tensorflow/contrib/metrics:all_files",
|
||||||
"//tensorflow/contrib/model_pruning:all_files",
|
"//tensorflow/contrib/model_pruning:all_files",
|
||||||
"//tensorflow/contrib/mpi_collectives:all_files",
|
"//tensorflow/contrib/model_pruning/examples/cifar10:all_files",
|
||||||
|
"//tensorflow/contrib/nccl:all_files",
|
||||||
"//tensorflow/contrib/ndlstm:all_files",
|
"//tensorflow/contrib/ndlstm:all_files",
|
||||||
"//tensorflow/contrib/nearest_neighbor:all_files",
|
"//tensorflow/contrib/nearest_neighbor:all_files",
|
||||||
"//tensorflow/contrib/nn:all_files",
|
"//tensorflow/contrib/nn:all_files",
|
||||||
"//tensorflow/contrib/opt:all_files",
|
"//tensorflow/contrib/opt:all_files",
|
||||||
|
"//tensorflow/contrib/periodic_resample:all_files",
|
||||||
"//tensorflow/contrib/predictor:all_files",
|
"//tensorflow/contrib/predictor:all_files",
|
||||||
|
"//tensorflow/contrib/quantization:all_files",
|
||||||
"//tensorflow/contrib/quantize:all_files",
|
"//tensorflow/contrib/quantize:all_files",
|
||||||
"//tensorflow/contrib/receptive_field:all_files",
|
"//tensorflow/contrib/receptive_field:all_files",
|
||||||
"//tensorflow/contrib/reduce_slice_ops:all_files",
|
"//tensorflow/contrib/reduce_slice_ops:all_files",
|
||||||
@ -580,6 +588,7 @@ filegroup(
|
|||||||
"//tensorflow/core/profiler/internal/advisor:all_files",
|
"//tensorflow/core/profiler/internal/advisor:all_files",
|
||||||
"//tensorflow/core/util/ctc:all_files",
|
"//tensorflow/core/util/ctc:all_files",
|
||||||
"//tensorflow/core/util/tensor_bundle:all_files",
|
"//tensorflow/core/util/tensor_bundle:all_files",
|
||||||
|
"//tensorflow/examples/adding_an_op:all_files",
|
||||||
"//tensorflow/examples/android:all_files",
|
"//tensorflow/examples/android:all_files",
|
||||||
"//tensorflow/examples/benchmark:all_files",
|
"//tensorflow/examples/benchmark:all_files",
|
||||||
"//tensorflow/examples/get_started/regression:all_files",
|
"//tensorflow/examples/get_started/regression:all_files",
|
||||||
@ -587,10 +596,13 @@ filegroup(
|
|||||||
"//tensorflow/examples/image_retraining:all_files",
|
"//tensorflow/examples/image_retraining:all_files",
|
||||||
"//tensorflow/examples/label_image:all_files",
|
"//tensorflow/examples/label_image:all_files",
|
||||||
"//tensorflow/examples/learn:all_files",
|
"//tensorflow/examples/learn:all_files",
|
||||||
|
"//tensorflow/examples/multibox_detector:all_files",
|
||||||
"//tensorflow/examples/saved_model:all_files",
|
"//tensorflow/examples/saved_model:all_files",
|
||||||
"//tensorflow/examples/speech_commands:all_files",
|
"//tensorflow/examples/speech_commands:all_files",
|
||||||
"//tensorflow/examples/tutorials/estimators:all_files",
|
"//tensorflow/examples/tutorials/estimators:all_files",
|
||||||
|
"//tensorflow/examples/tutorials/layers:all_files",
|
||||||
"//tensorflow/examples/tutorials/mnist:all_files",
|
"//tensorflow/examples/tutorials/mnist:all_files",
|
||||||
|
"//tensorflow/examples/tutorials/monitors:all_files",
|
||||||
"//tensorflow/examples/tutorials/word2vec:all_files",
|
"//tensorflow/examples/tutorials/word2vec:all_files",
|
||||||
"//tensorflow/examples/wav_to_spectrogram:all_files",
|
"//tensorflow/examples/wav_to_spectrogram:all_files",
|
||||||
"//tensorflow/go:all_files",
|
"//tensorflow/go:all_files",
|
||||||
@ -613,6 +625,7 @@ filegroup(
|
|||||||
"//tensorflow/python/kernel_tests/random:all_files",
|
"//tensorflow/python/kernel_tests/random:all_files",
|
||||||
"//tensorflow/python/ops/distributions:all_files",
|
"//tensorflow/python/ops/distributions:all_files",
|
||||||
"//tensorflow/python/ops/linalg:all_files",
|
"//tensorflow/python/ops/linalg:all_files",
|
||||||
|
"//tensorflow/python/ops/losses:all_files",
|
||||||
"//tensorflow/python/profiler:all_files",
|
"//tensorflow/python/profiler:all_files",
|
||||||
"//tensorflow/python/profiler/internal:all_files",
|
"//tensorflow/python/profiler/internal:all_files",
|
||||||
"//tensorflow/python/saved_model:all_files",
|
"//tensorflow/python/saved_model:all_files",
|
||||||
@ -623,6 +636,7 @@ filegroup(
|
|||||||
"//tensorflow/tools/api/tests:all_files",
|
"//tensorflow/tools/api/tests:all_files",
|
||||||
"//tensorflow/tools/benchmark:all_files",
|
"//tensorflow/tools/benchmark:all_files",
|
||||||
"//tensorflow/tools/build_info:all_files",
|
"//tensorflow/tools/build_info:all_files",
|
||||||
|
"//tensorflow/tools/ci_build/gpu_build:all_files",
|
||||||
"//tensorflow/tools/common:all_files",
|
"//tensorflow/tools/common:all_files",
|
||||||
"//tensorflow/tools/compatibility:all_files",
|
"//tensorflow/tools/compatibility:all_files",
|
||||||
"//tensorflow/tools/dist_test/server:all_files",
|
"//tensorflow/tools/dist_test/server:all_files",
|
||||||
@ -630,17 +644,17 @@ filegroup(
|
|||||||
"//tensorflow/tools/docker/notebooks:all_files",
|
"//tensorflow/tools/docker/notebooks:all_files",
|
||||||
"//tensorflow/tools/docs:all_files",
|
"//tensorflow/tools/docs:all_files",
|
||||||
"//tensorflow/tools/git:all_files",
|
"//tensorflow/tools/git:all_files",
|
||||||
|
"//tensorflow/tools/graph_transforms:all_files",
|
||||||
"//tensorflow/tools/mlpbtxt:all_files",
|
"//tensorflow/tools/mlpbtxt:all_files",
|
||||||
"//tensorflow/tools/proto_text:all_files",
|
"//tensorflow/tools/proto_text:all_files",
|
||||||
"//tensorflow/tools/quantization:all_files",
|
"//tensorflow/tools/quantization:all_files",
|
||||||
"//tensorflow/tools/test:all_files",
|
"//tensorflow/tools/test:all_files",
|
||||||
"//tensorflow/user_ops:all_files",
|
"//tensorflow/user_ops:all_files",
|
||||||
"//third_party/hadoop:all_files",
|
"//third_party/hadoop:all_files",
|
||||||
"//third_party/mpi:all_files",
|
|
||||||
"//third_party/sycl:all_files",
|
"//third_party/sycl:all_files",
|
||||||
"//third_party/sycl/sycl:all_files",
|
"//third_party/sycl/sycl:all_files",
|
||||||
],
|
],
|
||||||
visibility = [":__subpackages__"],
|
visibility = ["//visibility:public"],
|
||||||
)
|
)
|
||||||
|
|
||||||
load(
|
load(
|
||||||
|
@ -1462,7 +1462,11 @@ TEST_F(CApiFunctionTest, AppendHash) {
|
|||||||
/*append_hash=*/true);
|
/*append_hash=*/true);
|
||||||
tensorflow::FunctionDef fdef;
|
tensorflow::FunctionDef fdef;
|
||||||
ASSERT_TRUE(GetFunctionDef(func_, &fdef));
|
ASSERT_TRUE(GetFunctionDef(func_, &fdef));
|
||||||
|
#if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
|
||||||
|
ASSERT_EQ(string("func_name_base_ZpgUD4x8oqk"), fdef.signature().name());
|
||||||
|
#else
|
||||||
ASSERT_EQ(string("func_name_base_qaJ8jA8UmGY"), fdef.signature().name());
|
ASSERT_EQ(string("func_name_base_qaJ8jA8UmGY"), fdef.signature().name());
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(CApiFunctionTest, GetOpDef) {
|
TEST_F(CApiFunctionTest, GetOpDef) {
|
||||||
|
@ -473,6 +473,41 @@ Status AddNGrad(const Scope& scope, const Operation& op,
|
|||||||
}
|
}
|
||||||
REGISTER_GRADIENT_OP("AddN", AddNGrad);
|
REGISTER_GRADIENT_OP("AddN", AddNGrad);
|
||||||
|
|
||||||
|
Status PowGrad(const Scope& scope, const Operation& op,
|
||||||
|
const std::vector<Output>& grad_inputs,
|
||||||
|
std::vector<Output>* grad_outputs) {
|
||||||
|
auto x = ConjugateHelper(scope, op.input(0));
|
||||||
|
auto y = ConjugateHelper(scope, op.input(1));
|
||||||
|
auto z = ConjugateHelper(scope, op.output(0));
|
||||||
|
auto grad = grad_inputs[0];
|
||||||
|
// grad * y * pow(x, y - 1)
|
||||||
|
auto one = Cast(scope, Const(scope, 1.0), y.type());
|
||||||
|
auto gx_1 = Mul(scope,
|
||||||
|
Mul(scope, grad, y),
|
||||||
|
Pow(scope, x, Sub(scope, y, one)));
|
||||||
|
// Avoid false singularity at x = 0
|
||||||
|
DataType x_dtype = x.type();
|
||||||
|
auto zero = Cast(scope, Const(scope, 0.0), x_dtype);
|
||||||
|
if (x_dtype == DT_COMPLEX64 || x_dtype == DT_COMPLEX128) {
|
||||||
|
// real(x) < 0 is fine for the complex case
|
||||||
|
auto log_x = Where3(scope,
|
||||||
|
NotEqual(scope, x, zero),
|
||||||
|
Log(scope, x),
|
||||||
|
ZerosLike(scope, x));
|
||||||
|
auto gy_1 = Mul(scope, Mul(scope, grad, z), log_x);
|
||||||
|
return BinaryGradCommon(scope, op, grad_outputs, gx_1, gy_1);
|
||||||
|
} else {
|
||||||
|
// There's no sensible real value to return if x < 0, so return 0
|
||||||
|
auto log_x = Where3(scope,
|
||||||
|
Greater(scope, x, zero),
|
||||||
|
Log(scope, x),
|
||||||
|
ZerosLike(scope, x));
|
||||||
|
auto gy_1 = Mul(scope, Mul(scope, grad, z), log_x);
|
||||||
|
return BinaryGradCommon(scope, op, grad_outputs, gx_1, gy_1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
REGISTER_GRADIENT_OP("Pow", PowGrad);
|
||||||
|
|
||||||
// MaximumMinimumGradCommon adds shared ops to calculate gradients for
|
// MaximumMinimumGradCommon adds shared ops to calculate gradients for
|
||||||
// the binary Maximum and Minimum ops.
|
// the binary Maximum and Minimum ops.
|
||||||
Status MaximumMinimumGradCommon(const Scope& scope, const Operation& op,
|
Status MaximumMinimumGradCommon(const Scope& scope, const Operation& op,
|
||||||
@ -812,6 +847,183 @@ Status MinOrMaxGrad(const Scope& scope, const Operation& op,
|
|||||||
REGISTER_GRADIENT_OP("Min", MinOrMaxGrad);
|
REGISTER_GRADIENT_OP("Min", MinOrMaxGrad);
|
||||||
REGISTER_GRADIENT_OP("Max", MinOrMaxGrad);
|
REGISTER_GRADIENT_OP("Max", MinOrMaxGrad);
|
||||||
|
|
||||||
|
Status ProdGrad(const Scope& scope, const Operation& op,
|
||||||
|
const std::vector<Output>& grad_inputs,
|
||||||
|
std::vector<Output>* grad_outputs) {
|
||||||
|
auto zero = Const(scope, 0);
|
||||||
|
auto one = Const(scope, 1);
|
||||||
|
|
||||||
|
// The gradient can be expressed by dividing the product by each entry of
|
||||||
|
// the input tensor. If our input is
|
||||||
|
// [
|
||||||
|
// [3, 4],
|
||||||
|
// [5, 6],
|
||||||
|
// [7, 8]
|
||||||
|
// ]
|
||||||
|
// and we do a Prod operation on the axis 1, we will obtain [[105, 192]].
|
||||||
|
// The gradient will have the same shape as the input
|
||||||
|
// [
|
||||||
|
// [105/3, 192/4],
|
||||||
|
// dz * [105/5, 192/6],
|
||||||
|
// [105/7, 192/6]
|
||||||
|
// ]
|
||||||
|
// If the input contains a zero, the division is impossible but
|
||||||
|
// if we take the calculation that gave the first gradient
|
||||||
|
// (3 * 5 * 6)/3 is equal to 5 * 6
|
||||||
|
// the trick will be to cumprod the elements on the axis without
|
||||||
|
// the element at the current position (3 in the example above).
|
||||||
|
// We will take as example:
|
||||||
|
// [
|
||||||
|
// [
|
||||||
|
// [3.0, 4.0],
|
||||||
|
// [5.0, 6.0],
|
||||||
|
// [7.0, 8.0]
|
||||||
|
// ],
|
||||||
|
// [
|
||||||
|
// [3.0, 5.0],
|
||||||
|
// [0.0, 6.0],
|
||||||
|
// [5.0, 6.0]
|
||||||
|
// ]
|
||||||
|
// ]
|
||||||
|
|
||||||
|
// [2, 3, 2]
|
||||||
|
auto input_shape = Shape(scope, op.input(0));
|
||||||
|
|
||||||
|
// The Reshape with -1 flattens the reduction indices.
|
||||||
|
// [1]
|
||||||
|
auto reduction_indices = Reshape(scope, op.input(1), {-1});
|
||||||
|
|
||||||
|
// [2, 1, 2]
|
||||||
|
auto output_shape_kept_dims =
|
||||||
|
ReducedShapeHelper(scope, input_shape, reduction_indices);
|
||||||
|
|
||||||
|
// [1, 3, 1]
|
||||||
|
auto tile_scaling = SafeDivHelper(scope, input_shape, output_shape_kept_dims);
|
||||||
|
|
||||||
|
// [[[105, 192]], [[0, 180]]]
|
||||||
|
auto grad = Reshape(scope, grad_inputs[0], output_shape_kept_dims);
|
||||||
|
|
||||||
|
// [[[105, 192], [105, 192], [105, 192]], [[0, 180], [0, 180], [0, 180]]]
|
||||||
|
auto grad_tiled = Tile(scope, grad, tile_scaling);
|
||||||
|
|
||||||
|
Scope cpu_scope = scope.WithDevice("/cpu:0");
|
||||||
|
|
||||||
|
// [3]
|
||||||
|
auto rank = Rank(cpu_scope, op.input(0));
|
||||||
|
|
||||||
|
|
||||||
|
// Normalize any negative indices in the reduction_axes to positive values.
|
||||||
|
auto reduction_indices_pos = Mod(cpu_scope, Add(cpu_scope, reduction_indices, rank), rank);
|
||||||
|
|
||||||
|
// [1]
|
||||||
|
auto reduced = Cast(cpu_scope, reduction_indices_pos, DataType::DT_INT32);
|
||||||
|
|
||||||
|
// [0, 1, 2]
|
||||||
|
auto idx = Range(cpu_scope, zero, rank, one);
|
||||||
|
|
||||||
|
// [0, 2]
|
||||||
|
auto other = SetDiff1D(cpu_scope, idx, reduced).out;
|
||||||
|
|
||||||
|
// [1, 0, 2]
|
||||||
|
auto perm =
|
||||||
|
Concat(cpu_scope, std::initializer_list<Input>{reduced, other}, 0);
|
||||||
|
|
||||||
|
// 3 => [3]
|
||||||
|
auto reduced_num = Prod(cpu_scope, Gather(scope, input_shape, reduced), 0);
|
||||||
|
|
||||||
|
// 2 * 2 => [2]
|
||||||
|
auto other_num = Prod(cpu_scope, Gather(scope, input_shape, other), 0);
|
||||||
|
|
||||||
|
// [
|
||||||
|
// [
|
||||||
|
// [ 3., 4.],
|
||||||
|
// [ 3., 5.]
|
||||||
|
// ],
|
||||||
|
// [
|
||||||
|
// [ 5., 6.],
|
||||||
|
// [ 0., 6.]
|
||||||
|
// ],
|
||||||
|
// [
|
||||||
|
// [ 7., 8.],
|
||||||
|
// [ 5., 6.]
|
||||||
|
// ]
|
||||||
|
// ]
|
||||||
|
auto permuted = Transpose(scope, op.input(0), perm);
|
||||||
|
|
||||||
|
// [3, 2, 2]
|
||||||
|
auto permuted_shape = Shape(scope, permuted);
|
||||||
|
|
||||||
|
// [
|
||||||
|
// [ 3., 4., 3., 5.],
|
||||||
|
// [ 5., 6., 0., 6.],
|
||||||
|
// [ 7., 8., 5., 6.]
|
||||||
|
// ]
|
||||||
|
auto reshaped = Reshape(
|
||||||
|
scope, permuted,
|
||||||
|
Stack(scope, std::initializer_list<Input>{reduced_num, other_num}));
|
||||||
|
|
||||||
|
// [
|
||||||
|
// [ 1., 1., 1., 1.],
|
||||||
|
// [ 3., 4., 3., 5.],
|
||||||
|
// [ 15., 24., 0., 30.]
|
||||||
|
// ]
|
||||||
|
auto left = Cumprod(scope, reshaped, zero, Cumprod::Exclusive(true));
|
||||||
|
|
||||||
|
// [
|
||||||
|
// [ 35., 48., 0., 36.],
|
||||||
|
// [ 7., 8., 5., 6.],
|
||||||
|
// [ 1., 1., 1., 1.]
|
||||||
|
// ]
|
||||||
|
auto right =
|
||||||
|
Cumprod(scope, reshaped, zero, Cumprod::Exclusive(true).Reverse(true));
|
||||||
|
|
||||||
|
// left * right =
|
||||||
|
// [
|
||||||
|
// [ 35., 48., 0., 36.],
|
||||||
|
// [ 21., 32., 15., 30.],
|
||||||
|
// [ 15., 24., 0., 30.]
|
||||||
|
// ]
|
||||||
|
// y =
|
||||||
|
// [
|
||||||
|
// [
|
||||||
|
// [ 35., 48.],
|
||||||
|
// [ 0., 36.]
|
||||||
|
// ],
|
||||||
|
// [
|
||||||
|
// [ 21., 32.],
|
||||||
|
// [ 15., 30.]
|
||||||
|
// ],
|
||||||
|
// [
|
||||||
|
// [ 15., 24.],
|
||||||
|
// [ 0., 30.]
|
||||||
|
// ]
|
||||||
|
// ]
|
||||||
|
auto y = Reshape(scope, Mul(scope, left, right), permuted_shape);
|
||||||
|
|
||||||
|
// out =
|
||||||
|
// [
|
||||||
|
// [
|
||||||
|
// [ 35., 48.],
|
||||||
|
// [ 21., 32.],
|
||||||
|
// [ 15., 24.]
|
||||||
|
// ],
|
||||||
|
// [
|
||||||
|
// [ 0., 36.],
|
||||||
|
// [ 15., 30.],
|
||||||
|
// [ 0., 30.]
|
||||||
|
// ]
|
||||||
|
// ]
|
||||||
|
auto out =
|
||||||
|
Mul(scope, grad_tiled, Transpose(scope, y, InvertPermutation(scope, perm)));
|
||||||
|
|
||||||
|
grad_outputs->push_back(Reshape(scope, out, input_shape));
|
||||||
|
|
||||||
|
// stop propagation along reduction_indices
|
||||||
|
grad_outputs->push_back(NoGradient());
|
||||||
|
return scope.status();
|
||||||
|
}
|
||||||
|
REGISTER_GRADIENT_OP("Prod", ProdGrad);
|
||||||
|
|
||||||
// MatMulGrad helper function used to compute two MatMul operations
|
// MatMulGrad helper function used to compute two MatMul operations
|
||||||
// based on input matrix transposition combinations.
|
// based on input matrix transposition combinations.
|
||||||
Status MatMulGradHelper(const Scope& scope, const bool is_batch,
|
Status MatMulGradHelper(const Scope& scope, const bool is_batch,
|
||||||
|
@ -843,6 +843,14 @@ TEST_F(NaryGradTest, SquaredDifference) {
|
|||||||
RunTest({x1, x2}, {x1_shape, x2_shape}, {y}, {x1_shape});
|
RunTest({x1, x2}, {x1_shape, x2_shape}, {y}, {x1_shape});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(NaryGradTest, Pow) {
|
||||||
|
TensorShape shape({3});
|
||||||
|
auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape));
|
||||||
|
// fix exponent to avoid overflow
|
||||||
|
auto y = Pow(scope_, x, Const(scope_, {1.f, 2.f, 3.f}));
|
||||||
|
RunTest({x}, {shape}, {y}, {shape});
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(NaryGradTest, Maximum) {
|
TEST_F(NaryGradTest, Maximum) {
|
||||||
TensorShape shape({3, 2});
|
TensorShape shape({3, 2});
|
||||||
auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape));
|
auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape));
|
||||||
@ -865,6 +873,15 @@ TEST_F(NaryGradTest, Minimum) {
|
|||||||
RunTest(x, x_init_value, y, shape);
|
RunTest(x, x_init_value, y, shape);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(NaryGradTest, Prod) {
|
||||||
|
TensorShape x_shape({2, 3, 2});
|
||||||
|
auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape));
|
||||||
|
auto y = Prod(scope_, x, {1});
|
||||||
|
// y's shape is the result of reducing x along axes 1
|
||||||
|
TensorShape y_shape({2, 1, 2});
|
||||||
|
RunTest({x}, {x_shape}, {y}, {y_shape});
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(NaryGradTest, Select) {
|
TEST_F(NaryGradTest, Select) {
|
||||||
TensorShape shape({3, 4});
|
TensorShape shape({3, 4});
|
||||||
auto x1 = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape));
|
auto x1 = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(shape));
|
||||||
|
@ -148,7 +148,11 @@ cc_library(
|
|||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
name = "simple_orc_jit",
|
name = "simple_orc_jit",
|
||||||
srcs = ["simple_orc_jit.cc"],
|
srcs = [
|
||||||
|
"simple_orc_jit.cc",
|
||||||
|
"windows_compatibility.cc",
|
||||||
|
"windows_compatibility.h",
|
||||||
|
],
|
||||||
hdrs = ["simple_orc_jit.h"],
|
hdrs = ["simple_orc_jit.h"],
|
||||||
deps = [
|
deps = [
|
||||||
":compiler_functor",
|
":compiler_functor",
|
||||||
|
@ -33,13 +33,10 @@ void ExternalConstantPool::Insert(string name, const Literal& literal,
|
|||||||
CHECK(entries_.find(name) == entries_.end());
|
CHECK(entries_.find(name) == entries_.end());
|
||||||
|
|
||||||
int64 literal_size = ShapeUtil::ByteSizeOf(literal.shape());
|
int64 literal_size = ShapeUtil::ByteSizeOf(literal.shape());
|
||||||
void* raw_pointer;
|
void* raw_pointer = tensorflow::port::AlignedMalloc(
|
||||||
CHECK_EQ(
|
literal_size, std::max<size_t>(alignment, sizeof(void*)));
|
||||||
posix_memalign(&raw_pointer, std::max<size_t>(alignment, sizeof(void*)),
|
CHECK(raw_pointer != nullptr) << "failed to allocate " << literal_size
|
||||||
literal_size),
|
<< " bytes with alignment of " << alignment;
|
||||||
0)
|
|
||||||
<< "failed to allocate " << literal_size << " bytes with alignment of "
|
|
||||||
<< alignment;
|
|
||||||
|
|
||||||
std::memcpy(raw_pointer, literal.InternalData(), literal_size);
|
std::memcpy(raw_pointer, literal.InternalData(), literal_size);
|
||||||
entries_.emplace(std::move(name), static_cast<uint8*>(raw_pointer));
|
entries_.emplace(std::move(name), static_cast<uint8*>(raw_pointer));
|
||||||
|
@ -20,6 +20,7 @@ limitations under the License.
|
|||||||
|
|
||||||
#include "tensorflow/compiler/xla/literal_util.h"
|
#include "tensorflow/compiler/xla/literal_util.h"
|
||||||
#include "tensorflow/core/lib/gtl/flatmap.h"
|
#include "tensorflow/core/lib/gtl/flatmap.h"
|
||||||
|
#include "tensorflow/core/platform/mem.h"
|
||||||
|
|
||||||
namespace xla {
|
namespace xla {
|
||||||
namespace cpu {
|
namespace cpu {
|
||||||
@ -49,10 +50,10 @@ class ExternalConstantPool {
|
|||||||
const uint8* Find(const string& name);
|
const uint8* Find(const string& name);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// We need to `free()` pointers allocated into `entries_` since we allocate
|
// We need to `AlignedFree` pointers allocated into `entries_` since we
|
||||||
// them with `posix_memalign`.
|
// allocate them with `AlignedMalloc`.
|
||||||
struct FreeDeleter {
|
struct FreeDeleter {
|
||||||
void operator()(void* ptr) { free(ptr); }
|
void operator()(void* ptr) { tensorflow::port::AlignedFree(ptr); }
|
||||||
};
|
};
|
||||||
|
|
||||||
tensorflow::gtl::FlatMap<string, std::unique_ptr<uint8, FreeDeleter>>
|
tensorflow::gtl::FlatMap<string, std::unique_ptr<uint8, FreeDeleter>>
|
||||||
|
@ -15,7 +15,6 @@ limitations under the License.
|
|||||||
|
|
||||||
#include "tensorflow/compiler/xla/service/cpu/simple_orc_jit.h"
|
#include "tensorflow/compiler/xla/service/cpu/simple_orc_jit.h"
|
||||||
|
|
||||||
#include <dlfcn.h>
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <list>
|
#include <list>
|
||||||
@ -38,6 +37,7 @@ limitations under the License.
|
|||||||
#include "tensorflow/compiler/xla/service/cpu/runtime_matmul.h"
|
#include "tensorflow/compiler/xla/service/cpu/runtime_matmul.h"
|
||||||
#include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.h"
|
#include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.h"
|
||||||
#include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.h"
|
#include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.h"
|
||||||
|
#include "tensorflow/compiler/xla/service/cpu/windows_compatibility.h"
|
||||||
#include "tensorflow/compiler/xla/types.h"
|
#include "tensorflow/compiler/xla/types.h"
|
||||||
#include "tensorflow/core/platform/logging.h"
|
#include "tensorflow/core/platform/logging.h"
|
||||||
|
|
||||||
|
32
tensorflow/compiler/xla/service/cpu/windows_compatibility.cc
Normal file
32
tensorflow/compiler/xla/service/cpu/windows_compatibility.cc
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#include "tensorflow/compiler/xla/service/cpu/windows_compatibility.h"
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
void sincos(double x, double *sinv, double *cosv) {
|
||||||
|
*sinv = sin(x);
|
||||||
|
*cosv = cos(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
void sincosf(float x, float *sinv, float *cosv) {
|
||||||
|
*sinv = sinf(x);
|
||||||
|
*cosv = cosf(x);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // _MSC_VER
|
31
tensorflow/compiler/xla/service/cpu/windows_compatibility.h
Normal file
31
tensorflow/compiler/xla/service/cpu/windows_compatibility.h
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_WINDOWS_COMPATIBILITY_H_
|
||||||
|
#define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_WINDOWS_COMPATIBILITY_H_
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
|
||||||
|
// MSVC does not have sincos[f].
|
||||||
|
void sincos(double x, double *sinv, double *cosv);
|
||||||
|
void sincosf(float x, float *sinv, float *cosv);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // _MSC_VER
|
||||||
|
|
||||||
|
#endif // TENSORFLOW_COMPILER_XLA_SERVICE_CPU_WINDOWS_COMPATIBILITY_H_
|
@ -196,17 +196,7 @@ class StatusAdaptorForMacros {
|
|||||||
#define TF_STATUS_MACROS_CONCAT_NAME(x, y) TF_STATUS_MACROS_CONCAT_IMPL(x, y)
|
#define TF_STATUS_MACROS_CONCAT_NAME(x, y) TF_STATUS_MACROS_CONCAT_IMPL(x, y)
|
||||||
#define TF_STATUS_MACROS_CONCAT_IMPL(x, y) x##y
|
#define TF_STATUS_MACROS_CONCAT_IMPL(x, y) x##y
|
||||||
|
|
||||||
#define TF_ASSIGN_OR_RETURN(...) \
|
#define TF_ASSIGN_OR_RETURN(lhs, rexpr) \
|
||||||
TF_STATUS_MACRO_GET_VARIADIC_IMPL(__VA_ARGS__, TF_ASSIGN_OR_RETURN_IMPL_3, \
|
|
||||||
TF_ASSIGN_OR_RETURN_IMPL_2) \
|
|
||||||
(__VA_ARGS__)
|
|
||||||
|
|
||||||
#define TF_STATUS_MACRO_GET_VARIADIC_IMPL(_1, _2, _3, NAME, ...) NAME
|
|
||||||
|
|
||||||
#define TF_ASSIGN_OR_RETURN_IMPL_2(lhs, rexpr) \
|
|
||||||
TF_ASSIGN_OR_RETURN_IMPL_3(lhs, rexpr)
|
|
||||||
|
|
||||||
#define TF_ASSIGN_OR_RETURN_IMPL_3(lhs, rexpr) \
|
|
||||||
TF_ASSIGN_OR_RETURN_IMPL( \
|
TF_ASSIGN_OR_RETURN_IMPL( \
|
||||||
TF_STATUS_MACROS_CONCAT_NAME(_status_or_value, __COUNTER__), lhs, rexpr)
|
TF_STATUS_MACROS_CONCAT_NAME(_status_or_value, __COUNTER__), lhs, rexpr)
|
||||||
|
|
||||||
|
@ -36,7 +36,7 @@ namespace {
|
|||||||
|
|
||||||
class ClientTest : public ClientLibraryTestBase {};
|
class ClientTest : public ClientLibraryTestBase {};
|
||||||
|
|
||||||
TEST_F(ClientTest, ExecuteWithLayout) {
|
XLA_TEST_F(ClientTest, ExecuteWithLayout) {
|
||||||
ComputationBuilder b(client_, TestName());
|
ComputationBuilder b(client_, TestName());
|
||||||
|
|
||||||
std::vector<std::vector<int64>> layouts = {{0, 1}, {1, 0}};
|
std::vector<std::vector<int64>> layouts = {{0, 1}, {1, 0}};
|
||||||
@ -68,7 +68,7 @@ TEST_F(ClientTest, ExecuteWithLayout) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(ClientTest, ExecuteWithTupleLayout) {
|
XLA_TEST_F(ClientTest, ExecuteWithTupleLayout) {
|
||||||
ComputationBuilder b(client_, TestName());
|
ComputationBuilder b(client_, TestName());
|
||||||
|
|
||||||
b.Tuple({b.ConstantR2<int32>({{1, 2}, {3, 4}}),
|
b.Tuple({b.ConstantR2<int32>({{1, 2}, {3, 4}}),
|
||||||
@ -107,7 +107,8 @@ TEST_F(ClientTest, ExecuteWithTupleLayout) {
|
|||||||
/*minor_to_major=*/{1, 0})));
|
/*minor_to_major=*/{1, 0})));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(ClientTest, DISABLED_ON_CPU_PARALLEL(DISABLED_ON_GPU(ExecuteParallel))) {
|
XLA_TEST_F(ClientTest,
|
||||||
|
DISABLED_ON_CPU_PARALLEL(DISABLED_ON_GPU(ExecuteParallel))) {
|
||||||
Computation add_with_one_arg, mul_with_two_args, dot_with_one_arg;
|
Computation add_with_one_arg, mul_with_two_args, dot_with_one_arg;
|
||||||
Shape shape = ShapeUtil::MakeShape(S32, {2, 2});
|
Shape shape = ShapeUtil::MakeShape(S32, {2, 2});
|
||||||
|
|
||||||
|
@ -101,7 +101,7 @@ py_library(
|
|||||||
"//tensorflow/contrib/training:training_py",
|
"//tensorflow/contrib/training:training_py",
|
||||||
"//tensorflow/contrib/util:util_py",
|
"//tensorflow/contrib/util:util_py",
|
||||||
"//tensorflow/python:util",
|
"//tensorflow/python:util",
|
||||||
] + if_mpi(["//tensorflow/contrib/mpi_collectives:mpi_ops_py"]),
|
] + if_mpi(["//tensorflow/contrib/mpi_collectives:mpi_collectives_py"]),
|
||||||
)
|
)
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
@ -122,7 +122,7 @@ cc_library(
|
|||||||
"//tensorflow/contrib/tensor_forest:stats_ops_kernels",
|
"//tensorflow/contrib/tensor_forest:stats_ops_kernels",
|
||||||
"//tensorflow/contrib/tensor_forest:tensor_forest_kernels",
|
"//tensorflow/contrib/tensor_forest:tensor_forest_kernels",
|
||||||
"//tensorflow/contrib/text:all_kernels",
|
"//tensorflow/contrib/text:all_kernels",
|
||||||
],
|
] + if_mpi(["//tensorflow/contrib/mpi_collectives:mpi_collectives_py"]),
|
||||||
)
|
)
|
||||||
|
|
||||||
cc_library(
|
cc_library(
|
||||||
|
@ -43,7 +43,7 @@ def custom_gradient(fx, gx, x, axis=(),
|
|||||||
h(x) = x * stop_gradient(g(x)) + stop_gradient(f(x) - x * g(x))
|
h(x) = x * stop_gradient(g(x)) + stop_gradient(f(x) - x * g(x))
|
||||||
```
|
```
|
||||||
|
|
||||||
is such that `h(x) = stop(f(x))` and `grad[h(x), x] = stop_gradient(g(x)).`
|
is such that `h(x) = stop_gradient(f(x))` and `grad[h(x), x] = stop_gradient(g(x)).`
|
||||||
|
|
||||||
In addition to scalar-domain/scalar-range functions, this function also
|
In addition to scalar-domain/scalar-range functions, this function also
|
||||||
supports tensor-domain/scalar-range functions. However, in the latter case it
|
supports tensor-domain/scalar-range functions. However, in the latter case it
|
||||||
|
@ -372,7 +372,6 @@ if (tensorflow_BUILD_CC_TESTS)
|
|||||||
"${tensorflow_source_dir}/tensorflow/core/distributed_runtime/tensor_coding_test.cc"
|
"${tensorflow_source_dir}/tensorflow/core/distributed_runtime/tensor_coding_test.cc"
|
||||||
"${tensorflow_source_dir}/tensorflow/core/kernels/remote_fused_graph_rewriter_transform_test.cc"
|
"${tensorflow_source_dir}/tensorflow/core/kernels/remote_fused_graph_rewriter_transform_test.cc"
|
||||||
"${tensorflow_source_dir}/tensorflow/core/kernels/hexagon/graph_transferer_test.cc"
|
"${tensorflow_source_dir}/tensorflow/core/kernels/hexagon/graph_transferer_test.cc"
|
||||||
"${tensorflow_source_dir}/tensorflow/core/kernels/hexagon/quantized_matmul_op_for_hexagon_test.cc"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if (NOT tensorflow_ENABLE_GPU)
|
if (NOT tensorflow_ENABLE_GPU)
|
||||||
|
@ -22,6 +22,11 @@ Usage: python ./rnn_ptb.py --data-path=<path_to_dataset>
|
|||||||
Penn Treebank (PTB) dataset from:
|
Penn Treebank (PTB) dataset from:
|
||||||
http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
|
http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
@ -21,3 +21,14 @@ tf_py_test(
|
|||||||
],
|
],
|
||||||
tags = ["notsan"],
|
tags = ["notsan"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
filegroup(
|
||||||
|
name = "all_files",
|
||||||
|
srcs = glob(
|
||||||
|
["**/*"],
|
||||||
|
exclude = [
|
||||||
|
"**/METADATA",
|
||||||
|
"**/OWNERS",
|
||||||
|
],
|
||||||
|
),
|
||||||
|
)
|
||||||
|
@ -233,6 +233,23 @@ py_library(
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
cuda_py_test(
|
||||||
|
name = "single_image_random_dot_stereograms_ops_test",
|
||||||
|
size = "medium",
|
||||||
|
srcs = ["python/kernel_tests/single_image_random_dot_stereograms_ops_test.py"],
|
||||||
|
additional_deps = [
|
||||||
|
":distort_image_py",
|
||||||
|
":image_py",
|
||||||
|
":single_image_random_dot_stereograms_py",
|
||||||
|
"//third_party/py/numpy",
|
||||||
|
"//tensorflow/python:array_ops",
|
||||||
|
"//tensorflow/python:framework_for_generated_wrappers",
|
||||||
|
"//tensorflow/python:framework_test_lib",
|
||||||
|
"//tensorflow/python:math_ops",
|
||||||
|
"//tensorflow/python:platform_test",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
filegroup(
|
filegroup(
|
||||||
name = "all_files",
|
name = "all_files",
|
||||||
srcs = glob(
|
srcs = glob(
|
||||||
|
@ -19,6 +19,10 @@ limitations under the License.
|
|||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
|
|
||||||
|
using shape_inference::DimensionHandle;
|
||||||
|
using shape_inference::InferenceContext;
|
||||||
|
using shape_inference::ShapeHandle;
|
||||||
|
|
||||||
REGISTER_OP("SingleImageRandomDotStereograms")
|
REGISTER_OP("SingleImageRandomDotStereograms")
|
||||||
.Attr("T: {double,float,int64,int32}")
|
.Attr("T: {double,float,int64,int32}")
|
||||||
.Input("depth_values: T")
|
.Input("depth_values: T")
|
||||||
@ -37,6 +41,26 @@ REGISTER_OP("SingleImageRandomDotStereograms")
|
|||||||
"output_image_shape: shape = { dim {size:1024} dim {size: 768} dim "
|
"output_image_shape: shape = { dim {size:1024} dim {size: 768} dim "
|
||||||
"{size: 1}}")
|
"{size: 1}}")
|
||||||
.Attr("output_data_window: shape = { dim {size:1022} dim {size: 757}}")
|
.Attr("output_data_window: shape = { dim {size:1022} dim {size: 757}}")
|
||||||
|
.SetShapeFn([](InferenceContext* c) {
|
||||||
|
// Validate that the output_image_shape attr is correct.
|
||||||
|
// NOTE: The output_image_shape is [X, Y, C]
|
||||||
|
// while the output data is [Y, X, C] (or [H, W, C]).
|
||||||
|
// As a result, by default the output_image_shape has the value
|
||||||
|
// of [1024, 768, 1] but the output data will be [768, 1024, 1].
|
||||||
|
PartialTensorShape shape;
|
||||||
|
TF_RETURN_IF_ERROR(c->GetAttr("output_image_shape", &shape));
|
||||||
|
ShapeHandle output_image_shape;
|
||||||
|
TF_RETURN_IF_ERROR(
|
||||||
|
c->MakeShapeFromPartialTensorShape(shape, &output_image_shape));
|
||||||
|
DimensionHandle x_dim = c->Dim(output_image_shape, 0);
|
||||||
|
DimensionHandle y_dim = c->Dim(output_image_shape, 1);
|
||||||
|
|
||||||
|
int colors;
|
||||||
|
TF_RETURN_IF_ERROR(c->GetAttr("number_colors", &colors));
|
||||||
|
|
||||||
|
c->set_output(0, c->MakeShape({y_dim, x_dim, colors > 256? c->MakeDim(3) : c->MakeDim(1)}));
|
||||||
|
return Status::OK();
|
||||||
|
})
|
||||||
.Doc(R"doc(
|
.Doc(R"doc(
|
||||||
Outputs a single image random dot stereogram for export via encode_PNG/JPG OP.
|
Outputs a single image random dot stereogram for export via encode_PNG/JPG OP.
|
||||||
|
|
||||||
|
@ -0,0 +1,87 @@
|
|||||||
|
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the 'License');
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an 'AS IS' BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ==============================================================================
|
||||||
|
"""Tests for python single_image_random_dot_stereograms_ops."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from six.moves import xrange # pylint: disable=redefined-builtin
|
||||||
|
|
||||||
|
from tensorflow.contrib.image.python.ops.single_image_random_dot_stereograms \
|
||||||
|
import single_image_random_dot_stereograms
|
||||||
|
from tensorflow.python.client import session
|
||||||
|
from tensorflow.python.framework import constant_op
|
||||||
|
from tensorflow.python.framework import test_util
|
||||||
|
from tensorflow.python.platform import googletest
|
||||||
|
|
||||||
|
class SingleImageRandomDotStereogramsTest(test_util.TensorFlowTestCase):
|
||||||
|
|
||||||
|
def test_shape_function_default(self):
|
||||||
|
"""
|
||||||
|
NOTE: The output_image_shape is [X, Y, C]
|
||||||
|
while the output data is [Y, X, C] (or [H, W, C]).
|
||||||
|
As a result, by default the output_image_shape has the value
|
||||||
|
of [1024, 768, 1], but the output data will be [768, 1024, 1].
|
||||||
|
"""
|
||||||
|
x_np = [[1, 2, 3, 3, 2, 1],
|
||||||
|
[1, 2, 3, 4, 5, 2],
|
||||||
|
[1, 2, 3, 4, 5, 3],
|
||||||
|
[1, 2, 3, 4, 5, 4],
|
||||||
|
[6, 5, 4, 4, 5, 5]]
|
||||||
|
x_tf = constant_op.constant(x_np)
|
||||||
|
# By default [1024, 768, 1] => [768, 1024, 1].
|
||||||
|
sirds_1 = single_image_random_dot_stereograms(
|
||||||
|
x_tf,
|
||||||
|
convergence_dots_size=8,
|
||||||
|
number_colors=256,
|
||||||
|
normalize=True)
|
||||||
|
shape_1 = sirds_1.get_shape().as_list()
|
||||||
|
self.assertEqual(shape_1, [768, 1024, 1])
|
||||||
|
with self.test_session():
|
||||||
|
r_tf_1 = sirds_1.eval()
|
||||||
|
self.assertAllEqual(shape_1, r_tf_1.shape)
|
||||||
|
|
||||||
|
# If color > 256 then [1024, 768, 3] => [768, 1024, 3].
|
||||||
|
sirds_2 = single_image_random_dot_stereograms(
|
||||||
|
x_tf,
|
||||||
|
convergence_dots_size=8,
|
||||||
|
number_colors=512,
|
||||||
|
normalize=True)
|
||||||
|
shape_2 = sirds_2.get_shape().as_list()
|
||||||
|
self.assertEqual(shape_2, [768, 1024, 3])
|
||||||
|
with self.test_session():
|
||||||
|
r_tf_2 = sirds_2.eval()
|
||||||
|
self.assertAllEqual(shape_2, r_tf_2.shape)
|
||||||
|
|
||||||
|
# If explicitly set output_image_shape to [1200, 800, 1],
|
||||||
|
# then the output data should be [800, 1200, 1].
|
||||||
|
sirds_3 = single_image_random_dot_stereograms(
|
||||||
|
x_tf,
|
||||||
|
convergence_dots_size=8,
|
||||||
|
number_colors=256,
|
||||||
|
normalize=True,
|
||||||
|
output_image_shape=[1200, 800, 1])
|
||||||
|
shape_3 = sirds_3.get_shape().as_list()
|
||||||
|
self.assertEqual(shape_3, [800, 1200, 1])
|
||||||
|
with self.test_session():
|
||||||
|
r_tf_3 = sirds_3.eval()
|
||||||
|
self.assertAllEqual(shape_3, r_tf_3.shape)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
googletest.main()
|
@ -0,0 +1,18 @@
|
|||||||
|
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
@ -83,9 +83,11 @@ py_test(
|
|||||||
srcs_version = "PY2AND3",
|
srcs_version = "PY2AND3",
|
||||||
deps = [
|
deps = [
|
||||||
":kernel_methods",
|
":kernel_methods",
|
||||||
|
"//tensorflow/python:array_ops",
|
||||||
"//tensorflow/python:client_testlib",
|
"//tensorflow/python:client_testlib",
|
||||||
"//tensorflow/python:errors",
|
"//tensorflow/python:errors",
|
||||||
"//tensorflow/python:framework_for_generated_wrappers",
|
"//tensorflow/python:framework_for_generated_wrappers",
|
||||||
|
"//third_party/py/numpy",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -73,13 +73,13 @@ def sparse_multiclass_hinge_loss(
|
|||||||
labels)) as scope:
|
labels)) as scope:
|
||||||
|
|
||||||
# Check logits Tensor has valid rank.
|
# Check logits Tensor has valid rank.
|
||||||
logits_shape = logits.get_shape()
|
logits_rank = logits.get_shape().ndims
|
||||||
logits_rank = logits_shape.ndims
|
|
||||||
if logits_rank != 2:
|
if logits_rank != 2:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
'logits should have rank 2 ([batch_size, num_classes]). Given rank is'
|
'logits should have rank 2 ([batch_size, num_classes]). Given rank is'
|
||||||
' {}'.format(logits_rank))
|
' {}'.format(logits_rank))
|
||||||
batch_size, num_classes = logits_shape[0].value, logits_shape[1].value
|
logits_shape = array_ops.shape(logits)
|
||||||
|
batch_size, num_classes = logits_shape[0], logits_shape[1]
|
||||||
logits = math_ops.to_float(logits)
|
logits = math_ops.to_float(logits)
|
||||||
|
|
||||||
# Check labels have valid type.
|
# Check labels have valid type.
|
||||||
|
@ -18,10 +18,13 @@ from __future__ import absolute_import
|
|||||||
from __future__ import division
|
from __future__ import division
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
from tensorflow.contrib.kernel_methods.python import losses
|
from tensorflow.contrib.kernel_methods.python import losses
|
||||||
from tensorflow.python.framework import constant_op
|
from tensorflow.python.framework import constant_op
|
||||||
from tensorflow.python.framework import dtypes
|
from tensorflow.python.framework import dtypes
|
||||||
from tensorflow.python.framework import errors
|
from tensorflow.python.framework import errors
|
||||||
|
from tensorflow.python.ops import array_ops
|
||||||
from tensorflow.python.platform import test
|
from tensorflow.python.platform import test
|
||||||
|
|
||||||
|
|
||||||
@ -114,6 +117,26 @@ class SparseMulticlassHingeLossTest(test.TestCase):
|
|||||||
loss = losses.sparse_multiclass_hinge_loss(labels, logits)
|
loss = losses.sparse_multiclass_hinge_loss(labels, logits)
|
||||||
self.assertAlmostEqual(loss.eval(), 0.0, 3)
|
self.assertAlmostEqual(loss.eval(), 0.0, 3)
|
||||||
|
|
||||||
|
def testUnknownShape(self):
|
||||||
|
"""Result keeps same with `testZeroLossInt32Labels`"""
|
||||||
|
logits_np = np.array([[1.2, -1.4, -1.0],
|
||||||
|
[1.4, 1.8, 4.0],
|
||||||
|
[0.5, 1.8, -1.0]])
|
||||||
|
labels_np = np.array([0, 2, 1], dtype=np.int32)
|
||||||
|
|
||||||
|
logits_shapes = [[3, 3], # batch_size, num_classes
|
||||||
|
[None, 3],
|
||||||
|
[3, None],
|
||||||
|
[None, None]]
|
||||||
|
|
||||||
|
for batch_size, num_classes in logits_shapes:
|
||||||
|
with self.test_session():
|
||||||
|
logits = array_ops.placeholder(dtypes.float32, shape=(batch_size, num_classes))
|
||||||
|
labels = array_ops.placeholder(dtypes.int32, shape=(batch_size,))
|
||||||
|
loss = losses.sparse_multiclass_hinge_loss(labels, logits)
|
||||||
|
result = loss.eval(feed_dict={logits: logits_np, labels: labels_np})
|
||||||
|
self.assertAlmostEqual(result, 0.0, 3)
|
||||||
|
|
||||||
def testCorrectPredictionsSomeClassesInsideMargin(self):
|
def testCorrectPredictionsSomeClassesInsideMargin(self):
|
||||||
"""Loss is > 0 even if true class logits are higher than other classes."""
|
"""Loss is > 0 even if true class logits are higher than other classes."""
|
||||||
with self.test_session():
|
with self.test_session():
|
||||||
|
@ -1747,6 +1747,12 @@ class BatchNormTest(test.TestCase):
|
|||||||
expected_var *= correction_factor
|
expected_var *= correction_factor
|
||||||
return expected_var, correction_factor
|
return expected_var, correction_factor
|
||||||
|
|
||||||
|
def testBatchNormCenterFalse(self):
|
||||||
|
a = array_ops.placeholder(dtype=dtypes.float32, shape=(10, 10, 10, 10))
|
||||||
|
# Test that center=False builds a valid graph.
|
||||||
|
_layers.batch_norm(a, center=False, data_format='NCHW',
|
||||||
|
zero_debias_moving_mean=True)
|
||||||
|
|
||||||
def testUnknownShape(self):
|
def testUnknownShape(self):
|
||||||
with ops.Graph().as_default() as g, self.test_session(g):
|
with ops.Graph().as_default() as g, self.test_session(g):
|
||||||
inputs = array_ops.placeholder(dtype=dtypes.float32)
|
inputs = array_ops.placeholder(dtype=dtypes.float32)
|
||||||
|
@ -0,0 +1,18 @@
|
|||||||
|
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
@ -0,0 +1,18 @@
|
|||||||
|
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
@ -19,6 +19,10 @@
|
|||||||
@@MaxBytesInUse
|
@@MaxBytesInUse
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
from tensorflow.contrib.memory_stats.python.ops.memory_stats_ops import BytesInUse
|
from tensorflow.contrib.memory_stats.python.ops.memory_stats_ops import BytesInUse
|
||||||
from tensorflow.contrib.memory_stats.python.ops.memory_stats_ops import BytesLimit
|
from tensorflow.contrib.memory_stats.python.ops.memory_stats_ops import BytesLimit
|
||||||
from tensorflow.contrib.memory_stats.python.ops.memory_stats_ops import MaxBytesInUse
|
from tensorflow.contrib.memory_stats.python.ops.memory_stats_ops import MaxBytesInUse
|
||||||
|
@ -76,9 +76,10 @@ class MemoryStatsOpsTest(test_util.TensorFlowTestCase):
|
|||||||
with ops.control_dependencies([a]):
|
with ops.control_dependencies([a]):
|
||||||
bytes_in_use_op = memory_stats_ops.BytesInUse()
|
bytes_in_use_op = memory_stats_ops.BytesInUse()
|
||||||
with ops.control_dependencies([bytes_in_use_op]):
|
with ops.control_dependencies([bytes_in_use_op]):
|
||||||
b = math_ops.add(a, a)
|
b = random_ops.random_uniform(matrix_shape, dtype=dtype)
|
||||||
|
c = math_ops.matmul(a, b)
|
||||||
|
|
||||||
_, bytes_in_use, max_bytes_in_use = sess.run([b, bytes_in_use_op,
|
_, bytes_in_use, max_bytes_in_use = sess.run([c, bytes_in_use_op,
|
||||||
max_bytes_in_use_op])
|
max_bytes_in_use_op])
|
||||||
|
|
||||||
# intermediate result allocates 1 matrix, max usage is at least 2
|
# intermediate result allocates 1 matrix, max usage is at least 2
|
||||||
|
@ -66,6 +66,7 @@ See the @{$python/contrib.metrics} guide.
|
|||||||
@@set_intersection
|
@@set_intersection
|
||||||
@@set_size
|
@@set_size
|
||||||
@@set_union
|
@@set_union
|
||||||
|
@@cohen_kappa
|
||||||
@@count
|
@@count
|
||||||
@@precision_recall_at_equal_thresholds
|
@@precision_recall_at_equal_thresholds
|
||||||
@@recall_at_precision
|
@@recall_at_precision
|
||||||
@ -82,6 +83,7 @@ from tensorflow.contrib.metrics.python.ops.confusion_matrix_ops import confusion
|
|||||||
from tensorflow.contrib.metrics.python.ops.histogram_ops import auc_using_histogram
|
from tensorflow.contrib.metrics.python.ops.histogram_ops import auc_using_histogram
|
||||||
from tensorflow.contrib.metrics.python.ops.metric_ops import aggregate_metric_map
|
from tensorflow.contrib.metrics.python.ops.metric_ops import aggregate_metric_map
|
||||||
from tensorflow.contrib.metrics.python.ops.metric_ops import aggregate_metrics
|
from tensorflow.contrib.metrics.python.ops.metric_ops import aggregate_metrics
|
||||||
|
from tensorflow.contrib.metrics.python.ops.metric_ops import cohen_kappa
|
||||||
from tensorflow.contrib.metrics.python.ops.metric_ops import count
|
from tensorflow.contrib.metrics.python.ops.metric_ops import count
|
||||||
from tensorflow.contrib.metrics.python.ops.metric_ops import precision_recall_at_equal_thresholds
|
from tensorflow.contrib.metrics.python.ops.metric_ops import precision_recall_at_equal_thresholds
|
||||||
from tensorflow.contrib.metrics.python.ops.metric_ops import recall_at_precision
|
from tensorflow.contrib.metrics.python.ops.metric_ops import recall_at_precision
|
||||||
|
@ -24,10 +24,12 @@ from __future__ import print_function
|
|||||||
|
|
||||||
import collections as collections_lib
|
import collections as collections_lib
|
||||||
|
|
||||||
|
from tensorflow.python.eager import context
|
||||||
from tensorflow.python.framework import dtypes
|
from tensorflow.python.framework import dtypes
|
||||||
from tensorflow.python.framework import ops
|
from tensorflow.python.framework import ops
|
||||||
from tensorflow.python.ops import array_ops
|
from tensorflow.python.ops import array_ops
|
||||||
from tensorflow.python.ops import check_ops
|
from tensorflow.python.ops import check_ops
|
||||||
|
from tensorflow.python.ops import confusion_matrix
|
||||||
from tensorflow.python.ops import control_flow_ops
|
from tensorflow.python.ops import control_flow_ops
|
||||||
from tensorflow.python.ops import math_ops
|
from tensorflow.python.ops import math_ops
|
||||||
from tensorflow.python.ops import metrics
|
from tensorflow.python.ops import metrics
|
||||||
@ -3297,9 +3299,131 @@ def count(values,
|
|||||||
return count_, update_op
|
return count_, update_op
|
||||||
|
|
||||||
|
|
||||||
|
def cohen_kappa(labels, predictions_idx, num_classes, weights=None,
|
||||||
|
metrics_collections=None, updates_collections=None, name=None):
|
||||||
|
"""Calculates Cohen's kappa.
|
||||||
|
|
||||||
|
[Cohen's kappa](https://en.wikipedia.org/wiki/Cohen's_kappa) is a statistic
|
||||||
|
that measures inter-annotator agreement.
|
||||||
|
|
||||||
|
The `cohen_kappa` function calculates the confusion matrix, and creates three
|
||||||
|
local variables to compute the Cohen's kappa: `po`, `pe_row`, and `pe_col`,
|
||||||
|
which refer to the diagonal part, rows and columns totals of the confusion
|
||||||
|
matrix, respectively. This value is ultimately returned as `kappa`, an
|
||||||
|
idempotent operation that is calculated by
|
||||||
|
|
||||||
|
pe = (pe_row * pe_col) / N
|
||||||
|
k = (sum(po) - sum(pe)) / (N - sum(pe))
|
||||||
|
|
||||||
|
For estimation of the metric over a stream of data, the function creates an
|
||||||
|
`update_op` operation that updates these variables and returns the
|
||||||
|
`kappa`. `update_op` weights each prediction by the corresponding value in
|
||||||
|
`weights`.
|
||||||
|
|
||||||
|
Class labels are expected to start at 0. E.g., if `num_classes`
|
||||||
|
was three, then the possible labels would be [0, 1, 2].
|
||||||
|
|
||||||
|
If `weights` is `None`, weights default to 1. Use weights of 0 to mask values.
|
||||||
|
|
||||||
|
NOTE: Equivalent to `sklearn.metrics.cohen_kappa_score`, but the method
|
||||||
|
doesn't support weighted matrix yet.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
labels: 1-D `Tensor` of real labels for the classification task. Must be
|
||||||
|
one of the following types: int16, int32, int64.
|
||||||
|
predictions_idx: 1-D `Tensor` of predicted class indices for a given
|
||||||
|
classification. Must have the same type as `labels`.
|
||||||
|
num_classes: The possible number of labels.
|
||||||
|
weights: Optional `Tensor` whose shape matches `predictions`.
|
||||||
|
metrics_collections: An optional list of collections that `kappa` should
|
||||||
|
be added to.
|
||||||
|
updates_collections: An optional list of collections that `update_op` should
|
||||||
|
be added to.
|
||||||
|
name: An optional variable_scope name.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
kappa: Scalar float `Tensor` representing the current Cohen's kappa.
|
||||||
|
update_op: `Operation` that increments `po`, `pe_row` and `pe_col`
|
||||||
|
variables appropriately and whose value matches `kappa`.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If `num_classes` is less than 2, or `predictions` and `labels`
|
||||||
|
have mismatched shapes, or if `weights` is not `None` and its shape
|
||||||
|
doesn't match `predictions`, or if either `metrics_collections` or
|
||||||
|
`updates_collections` are not a list or tuple.
|
||||||
|
RuntimeError: If eager execution is enabled.
|
||||||
|
"""
|
||||||
|
if context.in_eager_mode():
|
||||||
|
raise RuntimeError('tf.contrib.metrics.cohen_kappa is not supported'
|
||||||
|
'when eager execution is enabled.')
|
||||||
|
if num_classes < 2:
|
||||||
|
raise ValueError('`num_classes` must be >= 2.'
|
||||||
|
'Found: {}'.format(num_classes))
|
||||||
|
with variable_scope.variable_scope(name, 'cohen_kappa',
|
||||||
|
(labels, predictions_idx, weights)):
|
||||||
|
# Convert 2-dim (num, 1) to 1-dim (num,)
|
||||||
|
labels.get_shape().with_rank_at_most(2)
|
||||||
|
if labels.get_shape().ndims == 2:
|
||||||
|
labels = array_ops.squeeze(labels, axis=[-1])
|
||||||
|
predictions_idx, labels, weights = (
|
||||||
|
metrics_impl._remove_squeezable_dimensions( # pylint: disable=protected-access
|
||||||
|
predictions=predictions_idx, labels=labels, weights=weights))
|
||||||
|
predictions_idx.get_shape().assert_is_compatible_with(labels.get_shape())
|
||||||
|
|
||||||
|
stat_dtype = (dtypes.int64
|
||||||
|
if weights is None or weights.dtype.is_integer
|
||||||
|
else dtypes.float32)
|
||||||
|
po = metrics_impl.metric_variable(
|
||||||
|
(num_classes,), stat_dtype, name='po')
|
||||||
|
pe_row = metrics_impl.metric_variable(
|
||||||
|
(num_classes,), stat_dtype, name='pe_row')
|
||||||
|
pe_col = metrics_impl.metric_variable(
|
||||||
|
(num_classes,), stat_dtype, name='pe_col')
|
||||||
|
|
||||||
|
# Table of the counts of agreement:
|
||||||
|
counts_in_table = confusion_matrix.confusion_matrix(
|
||||||
|
labels, predictions_idx,
|
||||||
|
num_classes=num_classes, weights=weights,
|
||||||
|
dtype=stat_dtype, name="counts_in_table")
|
||||||
|
|
||||||
|
po_t = array_ops.diag_part(counts_in_table)
|
||||||
|
pe_row_t = math_ops.reduce_sum(counts_in_table, axis=0)
|
||||||
|
pe_col_t = math_ops.reduce_sum(counts_in_table, axis=1)
|
||||||
|
update_po = state_ops.assign_add(po, po_t)
|
||||||
|
update_pe_row = state_ops.assign_add(pe_row, pe_row_t)
|
||||||
|
update_pe_col = state_ops.assign_add(pe_col, pe_col_t)
|
||||||
|
|
||||||
|
def _calculate_k(po, pe_row, pe_col, name):
|
||||||
|
po_sum = math_ops.reduce_sum(po)
|
||||||
|
total = math_ops.reduce_sum(pe_row)
|
||||||
|
pe_sum = math_ops.reduce_sum(
|
||||||
|
metrics_impl._safe_div( # pylint: disable=protected-access
|
||||||
|
pe_row * pe_col, total, None))
|
||||||
|
po_sum, pe_sum, total = (math_ops.to_double(po_sum),
|
||||||
|
math_ops.to_double(pe_sum),
|
||||||
|
math_ops.to_double(total))
|
||||||
|
# kappa = (po - pe) / (N - pe)
|
||||||
|
k = metrics_impl._safe_scalar_div( # pylint: disable=protected-access
|
||||||
|
po_sum - pe_sum, total - pe_sum, name=name)
|
||||||
|
return k
|
||||||
|
|
||||||
|
kappa = _calculate_k(po, pe_row, pe_col, name='value')
|
||||||
|
update_op = _calculate_k(update_po, update_pe_row, update_pe_col,
|
||||||
|
name='update_op')
|
||||||
|
|
||||||
|
if metrics_collections:
|
||||||
|
ops.add_to_collections(metrics_collections, kappa)
|
||||||
|
|
||||||
|
if updates_collections:
|
||||||
|
ops.add_to_collections(updates_collections, update_op)
|
||||||
|
|
||||||
|
return kappa, update_op
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'aggregate_metric_map',
|
'aggregate_metric_map',
|
||||||
'aggregate_metrics',
|
'aggregate_metrics',
|
||||||
|
'cohen_kappa',
|
||||||
'count',
|
'count',
|
||||||
'precision_recall_at_equal_thresholds',
|
'precision_recall_at_equal_thresholds',
|
||||||
'recall_at_precision',
|
'recall_at_precision',
|
||||||
|
@ -6660,5 +6660,213 @@ class CountTest(test.TestCase):
|
|||||||
self.assertAlmostEqual(4.1, result.eval(), 5)
|
self.assertAlmostEqual(4.1, result.eval(), 5)
|
||||||
|
|
||||||
|
|
||||||
|
class CohenKappaTest(test.TestCase):
|
||||||
|
|
||||||
|
def _confusion_matrix_to_samples(self, confusion_matrix):
|
||||||
|
x, y = confusion_matrix.shape
|
||||||
|
pairs = []
|
||||||
|
for label in range(x):
|
||||||
|
for feature in range(y):
|
||||||
|
pairs += [label, feature] * confusion_matrix[label, feature]
|
||||||
|
pairs = np.array(pairs).reshape((-1, 2))
|
||||||
|
return pairs[:, 0], pairs[:, 1]
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
np.random.seed(1)
|
||||||
|
ops.reset_default_graph()
|
||||||
|
|
||||||
|
def testVars(self):
|
||||||
|
metrics.cohen_kappa(
|
||||||
|
predictions_idx=array_ops.ones((10, 1)),
|
||||||
|
labels=array_ops.ones((10, 1)),
|
||||||
|
num_classes=2)
|
||||||
|
_assert_metric_variables(self, (
|
||||||
|
'cohen_kappa/po:0',
|
||||||
|
'cohen_kappa/pe_row:0',
|
||||||
|
'cohen_kappa/pe_col:0',))
|
||||||
|
|
||||||
|
def testMetricsCollection(self):
|
||||||
|
my_collection_name = '__metrics__'
|
||||||
|
kappa, _ = metrics.cohen_kappa(
|
||||||
|
predictions_idx=array_ops.ones((10, 1)),
|
||||||
|
labels=array_ops.ones((10, 1)),
|
||||||
|
num_classes=2,
|
||||||
|
metrics_collections=[my_collection_name])
|
||||||
|
self.assertListEqual(ops.get_collection(my_collection_name), [kappa])
|
||||||
|
|
||||||
|
def testUpdatesCollection(self):
|
||||||
|
my_collection_name = '__updates__'
|
||||||
|
_, update_op = metrics.cohen_kappa(
|
||||||
|
predictions_idx=array_ops.ones((10, 1)),
|
||||||
|
labels=array_ops.ones((10, 1)),
|
||||||
|
num_classes=2,
|
||||||
|
updates_collections=[my_collection_name])
|
||||||
|
self.assertListEqual(ops.get_collection(my_collection_name), [update_op])
|
||||||
|
|
||||||
|
def testValueTensorIsIdempotent(self):
|
||||||
|
predictions = random_ops.random_uniform(
|
||||||
|
(10, 1), maxval=3, dtype=dtypes_lib.int64, seed=1)
|
||||||
|
labels = random_ops.random_uniform(
|
||||||
|
(10, 1), maxval=3, dtype=dtypes_lib.int64, seed=2)
|
||||||
|
kappa, update_op = metrics.cohen_kappa(labels, predictions, 3)
|
||||||
|
|
||||||
|
with self.test_session() as sess:
|
||||||
|
sess.run(variables.local_variables_initializer())
|
||||||
|
|
||||||
|
# Run several updates.
|
||||||
|
for _ in range(10):
|
||||||
|
sess.run(update_op)
|
||||||
|
|
||||||
|
# Then verify idempotency.
|
||||||
|
initial_kappa = kappa.eval()
|
||||||
|
for _ in range(10):
|
||||||
|
self.assertAlmostEqual(initial_kappa, kappa.eval(), 5)
|
||||||
|
|
||||||
|
def testBasic(self):
|
||||||
|
confusion_matrix = np.array([
|
||||||
|
[9, 3, 1],
|
||||||
|
[4, 8, 2],
|
||||||
|
[2, 1, 6]])
|
||||||
|
# overall total = 36
|
||||||
|
# po = [9, 8, 6], sum(po) = 23
|
||||||
|
# pe_row = [15, 12, 9], pe_col = [13, 14, 9], so pe = [5.42, 4.67, 2.25]
|
||||||
|
# finally, kappa = (sum(po) - sum(pe)) / (N - sum(pe))
|
||||||
|
# = (23 - 12.34) / (36 - 12.34)
|
||||||
|
# = 0.45
|
||||||
|
# see: http://psych.unl.edu/psycrs/handcomp/hckappa.PDF
|
||||||
|
expect = 0.45
|
||||||
|
labels, predictions = self._confusion_matrix_to_samples(confusion_matrix)
|
||||||
|
|
||||||
|
dtypes = [dtypes_lib.int16, dtypes_lib.int32, dtypes_lib.int64]
|
||||||
|
shapes = [(len(labels,)), # 1-dim
|
||||||
|
(len(labels), 1)] # 2-dim
|
||||||
|
weights = [None, np.ones_like(labels)]
|
||||||
|
|
||||||
|
for dtype in dtypes:
|
||||||
|
for shape in shapes:
|
||||||
|
for weight in weights:
|
||||||
|
with self.test_session() as sess:
|
||||||
|
predictions_tensor = constant_op.constant(
|
||||||
|
np.reshape(predictions, shape), dtype=dtype)
|
||||||
|
labels_tensor = constant_op.constant(
|
||||||
|
np.reshape(labels, shape), dtype=dtype)
|
||||||
|
kappa, update_op = metrics.cohen_kappa(
|
||||||
|
labels_tensor, predictions_tensor, 3, weights=weight)
|
||||||
|
|
||||||
|
sess.run(variables.local_variables_initializer())
|
||||||
|
self.assertAlmostEqual(expect, sess.run(update_op), 2)
|
||||||
|
self.assertAlmostEqual(expect, kappa.eval(), 2)
|
||||||
|
|
||||||
|
def testAllCorrect(self):
|
||||||
|
inputs = np.arange(0, 100) % 4
|
||||||
|
# confusion matrix
|
||||||
|
# [[25, 0, 0],
|
||||||
|
# [0, 25, 0],
|
||||||
|
# [0, 0, 25]]
|
||||||
|
# Calculated by v0.19: sklearn.metrics.cohen_kappa_score(inputs, inputs)
|
||||||
|
expect = 1.0
|
||||||
|
|
||||||
|
with self.test_session() as sess:
|
||||||
|
predictions = constant_op.constant(inputs, dtype=dtypes_lib.float32)
|
||||||
|
labels = constant_op.constant(inputs)
|
||||||
|
kappa, update_op = metrics.cohen_kappa(labels, predictions, 4)
|
||||||
|
|
||||||
|
sess.run(variables.local_variables_initializer())
|
||||||
|
self.assertAlmostEqual(expect, sess.run(update_op), 5)
|
||||||
|
self.assertAlmostEqual(expect, kappa.eval(), 5)
|
||||||
|
|
||||||
|
def testAllIncorrect(self):
|
||||||
|
labels = np.arange(0, 100) % 4
|
||||||
|
predictions = (labels + 1) % 4
|
||||||
|
# confusion matrix
|
||||||
|
# [[0, 25, 0],
|
||||||
|
# [0, 0, 25],
|
||||||
|
# [25, 0, 0]]
|
||||||
|
# Calculated by v0.19: sklearn.metrics.cohen_kappa_score(labels, predictions)
|
||||||
|
expect = -0.333333333333
|
||||||
|
|
||||||
|
with self.test_session() as sess:
|
||||||
|
predictions = constant_op.constant(predictions, dtype=dtypes_lib.float32)
|
||||||
|
labels = constant_op.constant(labels)
|
||||||
|
kappa, update_op = metrics.cohen_kappa(labels, predictions, 4)
|
||||||
|
|
||||||
|
sess.run(variables.local_variables_initializer())
|
||||||
|
self.assertAlmostEqual(expect, sess.run(update_op), 5)
|
||||||
|
self.assertAlmostEqual(expect, kappa.eval(), 5)
|
||||||
|
|
||||||
|
def testWeighted(self):
|
||||||
|
confusion_matrix = np.array([
|
||||||
|
[9, 3, 1],
|
||||||
|
[4, 8, 2],
|
||||||
|
[2, 1, 6]])
|
||||||
|
labels, predictions = self._confusion_matrix_to_samples(confusion_matrix)
|
||||||
|
num_samples = np.sum(confusion_matrix, dtype=np.int32)
|
||||||
|
weights = (np.arange(0, num_samples) % 5) / 5.0
|
||||||
|
# Calculated by v0.19: sklearn.metrics.cohen_kappa_score(
|
||||||
|
# labels, predictions, sample_weight=weights)
|
||||||
|
expect = 0.453466583385
|
||||||
|
|
||||||
|
with self.test_session() as sess:
|
||||||
|
predictions = constant_op.constant(predictions, dtype=dtypes_lib.float32)
|
||||||
|
labels = constant_op.constant(labels)
|
||||||
|
kappa, update_op = metrics.cohen_kappa(labels, predictions, 4,
|
||||||
|
weights=weights)
|
||||||
|
|
||||||
|
sess.run(variables.local_variables_initializer())
|
||||||
|
self.assertAlmostEqual(expect, sess.run(update_op), 5)
|
||||||
|
self.assertAlmostEqual(expect, kappa.eval(), 5)
|
||||||
|
|
||||||
|
def testWithMultipleUpdates(self):
|
||||||
|
confusion_matrix = np.array([
|
||||||
|
[90, 30, 10, 20],
|
||||||
|
[40, 80, 20, 30],
|
||||||
|
[20, 10, 60, 35],
|
||||||
|
[15, 25, 30, 25]])
|
||||||
|
labels, predictions = self._confusion_matrix_to_samples(confusion_matrix)
|
||||||
|
num_samples = np.sum(confusion_matrix, dtype=np.int32)
|
||||||
|
weights = (np.arange(0, num_samples) % 5) / 5.0
|
||||||
|
num_classes = confusion_matrix.shape[0]
|
||||||
|
|
||||||
|
batch_size = num_samples // 10
|
||||||
|
predictions_t = array_ops.placeholder(dtypes_lib.float32,
|
||||||
|
shape=(batch_size,))
|
||||||
|
labels_t = array_ops.placeholder(dtypes_lib.int32,
|
||||||
|
shape=(batch_size,))
|
||||||
|
weights_t = array_ops.placeholder(dtypes_lib.float32,
|
||||||
|
shape=(batch_size,))
|
||||||
|
kappa, update_op = metrics.cohen_kappa(
|
||||||
|
labels_t, predictions_t, num_classes, weights=weights_t)
|
||||||
|
with self.test_session() as sess:
|
||||||
|
sess.run(variables.local_variables_initializer())
|
||||||
|
|
||||||
|
for idx in range(0, num_samples, batch_size):
|
||||||
|
batch_start, batch_end = idx, idx + batch_size
|
||||||
|
sess.run(update_op,
|
||||||
|
feed_dict={labels_t: labels[batch_start:batch_end],
|
||||||
|
predictions_t: predictions[batch_start:batch_end],
|
||||||
|
weights_t: weights[batch_start:batch_end]})
|
||||||
|
# Calculated by v0.19: sklearn.metrics.cohen_kappa_score(
|
||||||
|
# labels_np, predictions_np, sample_weight=weights_np)
|
||||||
|
expect = 0.289965397924
|
||||||
|
self.assertAlmostEqual(expect, kappa.eval(), 5)
|
||||||
|
|
||||||
|
def testInvalidNumClasses(self):
|
||||||
|
predictions = array_ops.placeholder(dtypes_lib.float32, shape=(4, 1))
|
||||||
|
labels = array_ops.placeholder(dtypes_lib.int32, shape=(4, 1))
|
||||||
|
with self.assertRaisesRegexp(ValueError, 'num_classes'):
|
||||||
|
metrics.cohen_kappa(labels, predictions, 1)
|
||||||
|
|
||||||
|
def testInvalidDimension(self):
|
||||||
|
predictions = array_ops.placeholder(dtypes_lib.float32, shape=(4, 1))
|
||||||
|
invalid_labels = array_ops.placeholder(dtypes_lib.int32, shape=(4, 2))
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
metrics.cohen_kappa(invalid_labels, predictions, 3)
|
||||||
|
|
||||||
|
invalid_predictions = array_ops.placeholder(dtypes_lib.float32, shape=(4, 2))
|
||||||
|
labels = array_ops.placeholder(dtypes_lib.int32, shape=(4, 1))
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
metrics.cohen_kappa(labels, invalid_predictions, 3)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
test.main()
|
test.main()
|
||||||
|
@ -6,20 +6,9 @@ package(default_visibility = [
|
|||||||
|
|
||||||
licenses(["notice"]) # Apache 2.0
|
licenses(["notice"]) # Apache 2.0
|
||||||
|
|
||||||
filegroup(
|
|
||||||
name = "all_files",
|
|
||||||
srcs = glob(
|
|
||||||
["**/*"],
|
|
||||||
exclude = [
|
|
||||||
"**/METADATA",
|
|
||||||
"**/OWNERS",
|
|
||||||
],
|
|
||||||
),
|
|
||||||
visibility = ["//tensorflow:__subpackages__"],
|
|
||||||
)
|
|
||||||
|
|
||||||
load(
|
load(
|
||||||
"//tensorflow/core:platform/default/build_config.bzl",
|
"//tensorflow/core:platform/default/build_config.bzl",
|
||||||
|
"tf_additional_mpi_lib_defines",
|
||||||
"tf_proto_library_cc",
|
"tf_proto_library_cc",
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -33,26 +22,98 @@ tf_proto_library_cc(
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
load("//tensorflow:tensorflow.bzl", "tf_custom_op_library")
|
cc_library(
|
||||||
load("//tensorflow:tensorflow.bzl", "tf_py_test")
|
name = "mpi_defines",
|
||||||
|
defines = tf_additional_mpi_lib_defines(),
|
||||||
|
)
|
||||||
|
|
||||||
|
load(
|
||||||
|
"//tensorflow:tensorflow.bzl",
|
||||||
|
"tf_custom_op_py_library",
|
||||||
|
"tf_custom_op_library",
|
||||||
|
"tf_gen_op_wrapper_py",
|
||||||
|
"tf_gen_op_libs",
|
||||||
|
"tf_kernel_library",
|
||||||
|
"tf_py_test",
|
||||||
|
)
|
||||||
|
|
||||||
tf_custom_op_library(
|
tf_custom_op_library(
|
||||||
name = "mpi_collectives.so",
|
name = "python/ops/_mpi_ops.so",
|
||||||
srcs = [
|
srcs = [
|
||||||
"mpi_ops.cc",
|
"kernels/mpi_ops.cc",
|
||||||
"ring.cc",
|
"kernels/ring.cc",
|
||||||
"ring.h",
|
"kernels/ring.h",
|
||||||
|
"ops/mpi_ops.cc",
|
||||||
],
|
],
|
||||||
gpu_srcs = [
|
gpu_srcs = [
|
||||||
"ring.cu.cc",
|
"kernels/ring.cu.cc",
|
||||||
"ring.h",
|
"kernels/ring.h",
|
||||||
],
|
],
|
||||||
deps = [
|
deps = [
|
||||||
|
":mpi_defines",
|
||||||
":mpi_message_proto_cc",
|
":mpi_message_proto_cc",
|
||||||
"//third_party/mpi",
|
"//third_party/mpi",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
tf_kernel_library(
|
||||||
|
name = "mpi_ops_kernels",
|
||||||
|
srcs = [
|
||||||
|
"kernels/mpi_ops.cc",
|
||||||
|
"kernels/ring.cc",
|
||||||
|
],
|
||||||
|
hdrs = [
|
||||||
|
"kernels/ring.h",
|
||||||
|
],
|
||||||
|
gpu_srcs = [
|
||||||
|
"kernels/ring.cu.cc",
|
||||||
|
],
|
||||||
|
deps = [
|
||||||
|
":mpi_defines",
|
||||||
|
"//tensorflow/core:core_cpu",
|
||||||
|
"//tensorflow/core:framework",
|
||||||
|
"//tensorflow/core:gpu_headers_lib",
|
||||||
|
"//tensorflow/core:lib",
|
||||||
|
"//tensorflow/core:proto_text",
|
||||||
|
"//tensorflow/core:stream_executor",
|
||||||
|
],
|
||||||
|
# TODO: Include? alwayslink = 1,
|
||||||
|
)
|
||||||
|
|
||||||
|
tf_gen_op_libs(
|
||||||
|
op_lib_names = ["mpi_ops"],
|
||||||
|
)
|
||||||
|
|
||||||
|
tf_gen_op_wrapper_py(
|
||||||
|
name = "mpi_ops",
|
||||||
|
deps = [":mpi_ops_op_lib"],
|
||||||
|
)
|
||||||
|
|
||||||
|
tf_custom_op_py_library(
|
||||||
|
name = "mpi_collectives_py",
|
||||||
|
srcs = [
|
||||||
|
"__init__.py",
|
||||||
|
"python/ops/mpi_ops.py",
|
||||||
|
],
|
||||||
|
dso = [
|
||||||
|
":python/ops/_mpi_ops.so",
|
||||||
|
],
|
||||||
|
kernels = [
|
||||||
|
":mpi_ops_kernels",
|
||||||
|
":mpi_ops_op_lib",
|
||||||
|
],
|
||||||
|
srcs_version = "PY2AND3",
|
||||||
|
visibility = ["//visibility:public"],
|
||||||
|
deps = [
|
||||||
|
":mpi_ops",
|
||||||
|
"//tensorflow/contrib/util:util_py",
|
||||||
|
"//tensorflow/python:device",
|
||||||
|
"//tensorflow/python:framework_ops",
|
||||||
|
"//tensorflow/python:platform",
|
||||||
|
"//tensorflow/python:util",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
tf_py_test(
|
tf_py_test(
|
||||||
name = "mpi_ops_test",
|
name = "mpi_ops_test",
|
||||||
srcs = ["mpi_ops_test.py"],
|
srcs = ["mpi_ops_test.py"],
|
||||||
@ -61,20 +122,19 @@ tf_py_test(
|
|||||||
"//tensorflow/python:platform",
|
"//tensorflow/python:platform",
|
||||||
],
|
],
|
||||||
data = [
|
data = [
|
||||||
":mpi_collectives.so",
|
":python/ops/_mpi_ops.so",
|
||||||
],
|
],
|
||||||
tags = ["manual"],
|
tags = ["manual"],
|
||||||
)
|
)
|
||||||
|
|
||||||
py_library(
|
filegroup(
|
||||||
name = "mpi_ops_py",
|
name = "all_files",
|
||||||
srcs = [
|
srcs = glob(
|
||||||
"__init__.py",
|
["**/*"],
|
||||||
"mpi_ops.py",
|
exclude = [
|
||||||
|
"**/METADATA",
|
||||||
|
"**/OWNERS",
|
||||||
],
|
],
|
||||||
data = [
|
),
|
||||||
":mpi_collectives.so",
|
visibility = ["//tensorflow:__subpackages__"],
|
||||||
],
|
|
||||||
srcs_version = "PY2AND3",
|
|
||||||
visibility = ["//visibility:public"],
|
|
||||||
)
|
)
|
||||||
|
@ -37,7 +37,7 @@ for detecting the running MPI configuration.
|
|||||||
Example:
|
Example:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from tensorflow.contrib import mpi
|
import tensorflow.contrib.mpi_collectives as mpi
|
||||||
|
|
||||||
# Use `mpi.Session` instead of `tf.Session`
|
# Use `mpi.Session` instead of `tf.Session`
|
||||||
with mpi.Session() as session:
|
with mpi.Session() as session:
|
||||||
@ -48,8 +48,10 @@ with mpi.Session() as session:
|
|||||||
print("MPI Size:", session.run(mpi.size()))
|
print("MPI Size:", session.run(mpi.size()))
|
||||||
```
|
```
|
||||||
|
|
||||||
@@rank
|
@@init
|
||||||
@@size
|
@@size
|
||||||
|
@@rank
|
||||||
|
@@local_rank
|
||||||
|
|
||||||
### Ring Allreduce and Allgather
|
### Ring Allreduce and Allgather
|
||||||
|
|
||||||
@ -123,12 +125,12 @@ from __future__ import print_function
|
|||||||
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
||||||
from tensorflow.contrib.mpi_collectives.mpi_ops import size
|
from tensorflow.contrib.mpi_collectives.python.ops.mpi_ops import init
|
||||||
from tensorflow.contrib.mpi_collectives.mpi_ops import rank
|
from tensorflow.contrib.mpi_collectives.python.ops.mpi_ops import size
|
||||||
from tensorflow.contrib.mpi_collectives.mpi_ops import local_rank
|
from tensorflow.contrib.mpi_collectives.python.ops.mpi_ops import rank
|
||||||
from tensorflow.contrib.mpi_collectives.mpi_ops import allgather
|
from tensorflow.contrib.mpi_collectives.python.ops.mpi_ops import local_rank
|
||||||
from tensorflow.contrib.mpi_collectives.mpi_ops import _allreduce
|
from tensorflow.contrib.mpi_collectives.python.ops.mpi_ops import allgather
|
||||||
from tensorflow.contrib.mpi_collectives.mpi_ops import init
|
from tensorflow.contrib.mpi_collectives.python.ops.mpi_ops import _allreduce
|
||||||
|
|
||||||
|
|
||||||
def allreduce(tensor, average=True):
|
def allreduce(tensor, average=True):
|
||||||
|
1132
tensorflow/contrib/mpi_collectives/kernels/mpi_ops.cc
Normal file
1132
tensorflow/contrib/mpi_collectives/kernels/mpi_ops.cc
Normal file
File diff suppressed because it is too large
Load Diff
80
tensorflow/contrib/mpi_collectives/kernels/ring.cc
Normal file
80
tensorflow/contrib/mpi_collectives/kernels/ring.cc
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#ifdef TENSORFLOW_USE_MPI
|
||||||
|
|
||||||
|
#define EIGEN_USE_THREADS
|
||||||
|
|
||||||
|
#include "tensorflow/contrib/mpi_collectives/kernels/ring.h"
|
||||||
|
|
||||||
|
namespace tensorflow {
|
||||||
|
namespace contrib {
|
||||||
|
namespace mpi_collectives {
|
||||||
|
|
||||||
|
using CPUDevice = Eigen::ThreadPoolDevice;
|
||||||
|
|
||||||
|
extern template MPI_Datatype MPIType<float>();
|
||||||
|
extern template MPI_Datatype MPIType<int>();
|
||||||
|
extern template MPI_Datatype MPIType<long long>();
|
||||||
|
extern template DataType TensorFlowDataType<float>();
|
||||||
|
extern template DataType TensorFlowDataType<int>();
|
||||||
|
extern template DataType TensorFlowDataType<long long>();
|
||||||
|
|
||||||
|
// Generate all necessary specializations for RingAllreduce.
|
||||||
|
template Status RingAllreduce<CPUDevice, int>(OpKernelContext*, const Tensor*,
|
||||||
|
Tensor*, Tensor*);
|
||||||
|
template Status RingAllreduce<CPUDevice, long long>(OpKernelContext*,
|
||||||
|
const Tensor*, Tensor*,
|
||||||
|
Tensor*);
|
||||||
|
template Status RingAllreduce<CPUDevice, float>(OpKernelContext*, const Tensor*,
|
||||||
|
Tensor*, Tensor*);
|
||||||
|
|
||||||
|
// Generate all necessary specializations for RingAllgather.
|
||||||
|
template Status RingAllgather<CPUDevice, int>(OpKernelContext*, const Tensor*,
|
||||||
|
const std::vector<size_t>&,
|
||||||
|
Tensor*);
|
||||||
|
template Status RingAllgather<CPUDevice, long long>(OpKernelContext*,
|
||||||
|
const Tensor*,
|
||||||
|
const std::vector<size_t>&,
|
||||||
|
Tensor*);
|
||||||
|
template Status RingAllgather<CPUDevice, float>(OpKernelContext*, const Tensor*,
|
||||||
|
const std::vector<size_t>&,
|
||||||
|
Tensor*);
|
||||||
|
|
||||||
|
// Copy data on a CPU using a straight-forward memcpy.
|
||||||
|
template <>
|
||||||
|
void CopyTensorData<CPUDevice>(void* dst, void* src, size_t size) {
|
||||||
|
std::memcpy(dst, src, size);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Accumulate values on a CPU.
|
||||||
|
#define GENERATE_ACCUMULATE(type) \
|
||||||
|
template <> \
|
||||||
|
void AccumulateTensorData<CPUDevice, type>(type * dst, type * src, \
|
||||||
|
size_t size) { \
|
||||||
|
for (unsigned int i = 0; i < size; i++) { \
|
||||||
|
dst[i] += src[i]; \
|
||||||
|
} \
|
||||||
|
};
|
||||||
|
GENERATE_ACCUMULATE(int);
|
||||||
|
GENERATE_ACCUMULATE(long long);
|
||||||
|
GENERATE_ACCUMULATE(float);
|
||||||
|
#undef GENERATE_ACCUMULATE
|
||||||
|
|
||||||
|
} // namespace mpi_collectives
|
||||||
|
} // namespace contrib
|
||||||
|
} // namespace tensorflow
|
||||||
|
|
||||||
|
#endif // TENSORFLOW_USE_MPI
|
117
tensorflow/contrib/mpi_collectives/kernels/ring.cu.cc
Normal file
117
tensorflow/contrib/mpi_collectives/kernels/ring.cu.cc
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#ifdef TENSORFLOW_USE_MPI
|
||||||
|
|
||||||
|
#if GOOGLE_CUDA
|
||||||
|
|
||||||
|
#define EIGEN_USE_GPU
|
||||||
|
|
||||||
|
#include "tensorflow/contrib/mpi_collectives/kernels/ring.h"
|
||||||
|
|
||||||
|
namespace tensorflow {
|
||||||
|
namespace contrib {
|
||||||
|
namespace mpi_collectives {
|
||||||
|
|
||||||
|
using CPUDevice = Eigen::ThreadPoolDevice;
|
||||||
|
|
||||||
|
template <>
|
||||||
|
MPI_Datatype MPIType<float>() {
|
||||||
|
return MPI_FLOAT;
|
||||||
|
};
|
||||||
|
template <>
|
||||||
|
MPI_Datatype MPIType<int>() {
|
||||||
|
return MPI_INT;
|
||||||
|
};
|
||||||
|
template <>
|
||||||
|
MPI_Datatype MPIType<long long>() {
|
||||||
|
return MPI_LONG_LONG;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
DataType TensorFlowDataType<float>() {
|
||||||
|
return DT_FLOAT;
|
||||||
|
};
|
||||||
|
template <>
|
||||||
|
DataType TensorFlowDataType<int>() {
|
||||||
|
return DT_INT32;
|
||||||
|
};
|
||||||
|
template <>
|
||||||
|
DataType TensorFlowDataType<long long>() {
|
||||||
|
return DT_INT64;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Generate all necessary specializations for RingAllreduce.
|
||||||
|
template Status RingAllreduce<GPUDevice, int>(OpKernelContext*, const Tensor*,
|
||||||
|
Tensor*, Tensor*);
|
||||||
|
template Status RingAllreduce<GPUDevice, long long>(OpKernelContext*,
|
||||||
|
const Tensor*, Tensor*,
|
||||||
|
Tensor*);
|
||||||
|
template Status RingAllreduce<GPUDevice, float>(OpKernelContext*, const Tensor*,
|
||||||
|
Tensor*, Tensor*);
|
||||||
|
|
||||||
|
// Generate all necessary specializations for RingAllgather.
|
||||||
|
template Status RingAllgather<GPUDevice, int>(OpKernelContext*, const Tensor*,
|
||||||
|
const std::vector<size_t>&,
|
||||||
|
Tensor*);
|
||||||
|
template Status RingAllgather<GPUDevice, long long>(OpKernelContext*,
|
||||||
|
const Tensor*,
|
||||||
|
const std::vector<size_t>&,
|
||||||
|
Tensor*);
|
||||||
|
template Status RingAllgather<GPUDevice, float>(OpKernelContext*, const Tensor*,
|
||||||
|
const std::vector<size_t>&,
|
||||||
|
Tensor*);
|
||||||
|
|
||||||
|
// Synchronously copy data on the GPU, using a different stream than the default
|
||||||
|
// and than TensorFlow to avoid synchronizing on operations unrelated to the
|
||||||
|
// allreduce.
|
||||||
|
template <>
|
||||||
|
void CopyTensorData<GPUDevice>(void* dst, void* src, size_t size) {
|
||||||
|
auto stream = CudaStreamForMPI();
|
||||||
|
cudaMemcpyAsync(dst, src, size, cudaMemcpyDeviceToDevice, stream);
|
||||||
|
cudaStreamSynchronize(stream);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Elementwise accumulation kernel for GPU.
|
||||||
|
template <typename T>
|
||||||
|
__global__ void elemwise_accum(T* out, const T* in, const size_t N) {
|
||||||
|
for (size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < N;
|
||||||
|
i += blockDim.x * gridDim.x) {
|
||||||
|
out[i] += in[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Synchronously accumulate tensors on the GPU, using a different stream than
|
||||||
|
// the default and than TensorFlow to avoid synchronizing on operations
|
||||||
|
// unrelated to the allreduce.
|
||||||
|
#define GENERATE_ACCUMULATE(type) \
|
||||||
|
template <> \
|
||||||
|
void AccumulateTensorData<GPUDevice, type>(type * dst, type * src, \
|
||||||
|
size_t size) { \
|
||||||
|
auto stream = CudaStreamForMPI(); \
|
||||||
|
elemwise_accum<type><<<32, 256, 0, stream>>>(dst, src, size); \
|
||||||
|
cudaStreamSynchronize(stream); \
|
||||||
|
};
|
||||||
|
GENERATE_ACCUMULATE(int);
|
||||||
|
GENERATE_ACCUMULATE(long long);
|
||||||
|
GENERATE_ACCUMULATE(float);
|
||||||
|
#undef GENERATE_ACCUMULATE
|
||||||
|
|
||||||
|
} // namespace mpi_collectives
|
||||||
|
} // namespace contrib
|
||||||
|
} // namespace tensorflow
|
||||||
|
#endif // GOOGLE_CUDA
|
||||||
|
|
||||||
|
#endif // TENSORFLOW_USE_MPI
|
327
tensorflow/contrib/mpi_collectives/kernels/ring.h
Normal file
327
tensorflow/contrib/mpi_collectives/kernels/ring.h
Normal file
@ -0,0 +1,327 @@
|
|||||||
|
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#ifndef TENSORFLOW_CONTRIB_MPI_H_
|
||||||
|
#define TENSORFLOW_CONTRIB_MPI_H_
|
||||||
|
|
||||||
|
#ifdef TENSORFLOW_USE_MPI
|
||||||
|
|
||||||
|
#include "tensorflow/core/framework/op.h"
|
||||||
|
#include "tensorflow/core/framework/op_kernel.h"
|
||||||
|
#include "tensorflow/core/framework/shape_inference.h"
|
||||||
|
|
||||||
|
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
|
||||||
|
#include "tensorflow/core/framework/tensor_types.h"
|
||||||
|
|
||||||
|
#if GOOGLE_CUDA
|
||||||
|
#include "cuda_runtime.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Needed to avoid header issues with C++-supporting MPI implementations
|
||||||
|
#define OMPI_SKIP_MPICXX
|
||||||
|
#include "third_party/mpi/mpi.h"
|
||||||
|
|
||||||
|
#define TAG_TENSOR 12
|
||||||
|
|
||||||
|
namespace tensorflow {
|
||||||
|
namespace contrib {
|
||||||
|
namespace mpi_collectives {
|
||||||
|
|
||||||
|
using CPUDevice = Eigen::ThreadPoolDevice;
|
||||||
|
using GPUDevice = Eigen::GpuDevice;
|
||||||
|
|
||||||
|
// Convert from templated types to values we can pass to MPI.
|
||||||
|
template <typename T>
|
||||||
|
MPI_Datatype MPIType();
|
||||||
|
|
||||||
|
// Convert from templated types to TensorFlow data types.
|
||||||
|
template <typename T>
|
||||||
|
DataType TensorFlowDataType();
|
||||||
|
|
||||||
|
#define MPI_REQUIRES_OK(MPI_STATUS) \
|
||||||
|
if ((MPI_STATUS) != MPI_SUCCESS) { \
|
||||||
|
return errors::Unknown("MPI operation failed unexpectedly."); \
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy data from one tensor to another tensor.
|
||||||
|
// This uses a custom CUDA stream on GPU, which is necessary to overlay the
|
||||||
|
// backpropagation computations with the allreduce.
|
||||||
|
template <typename Device>
|
||||||
|
void CopyTensorData(void* destination, void* source, size_t size);
|
||||||
|
|
||||||
|
// Add a tensor into another tensor, accumulating in place.
|
||||||
|
// This uses a custom CUDA stream on GPU, which is necessary to overlay the
|
||||||
|
// backpropagation computations with the allreduce.
|
||||||
|
template <typename Device, typename T>
|
||||||
|
void AccumulateTensorData(T* destination, T* source, size_t size);
|
||||||
|
|
||||||
|
// We need to get the right stream for doing CUDA memory transfers and
|
||||||
|
// operations, which is possibly different from the standard TensorFlow stream.
|
||||||
|
#if GOOGLE_CUDA
|
||||||
|
cudaStream_t CudaStreamForMPI();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Perform a ring allreduce on the data. Allocate the necessary output tensor
|
||||||
|
* and store it in the output parameter.
|
||||||
|
*
|
||||||
|
* Assumes that all MPI processes are doing an allreduce of the same tensor,
|
||||||
|
* with the same dimensions.
|
||||||
|
*
|
||||||
|
* A ring allreduce is a bandwidth-optimal way to do an allreduce. To do the
|
||||||
|
* allreduce, the nodes involved are arranged in a ring:
|
||||||
|
*
|
||||||
|
* .--0--.
|
||||||
|
* / \
|
||||||
|
* 3 1
|
||||||
|
* \ /
|
||||||
|
* *--2--*
|
||||||
|
*
|
||||||
|
* Each node always sends to the next clockwise node in the ring, and receives
|
||||||
|
* from the previous one.
|
||||||
|
*
|
||||||
|
* The allreduce is done in two parts: a scatter-reduce and an allgather. In
|
||||||
|
* the scatter reduce, a reduction is done, so that each node ends up with a
|
||||||
|
* chunk of the final output tensor which has contributions from all other
|
||||||
|
* nodes. In the allgather, those chunks are distributed among all the nodes,
|
||||||
|
* so that all nodes have the entire output tensor.
|
||||||
|
*
|
||||||
|
* Both of these operations are done by dividing the input tensor into N
|
||||||
|
* evenly sized chunks (where N is the number of nodes in the ring).
|
||||||
|
*
|
||||||
|
* The scatter-reduce is done in N-1 steps. In the ith step, node j will send
|
||||||
|
* the (j - i)th chunk and receive the (j - i - 1)th chunk, adding it in to
|
||||||
|
* its existing data for that chunk. For example, in the first iteration with
|
||||||
|
* the ring depicted above, you will have the following transfers:
|
||||||
|
*
|
||||||
|
* Segment 0: Node 0 --> Node 1
|
||||||
|
* Segment 1: Node 1 --> Node 2
|
||||||
|
* Segment 2: Node 2 --> Node 3
|
||||||
|
* Segment 3: Node 3 --> Node 0
|
||||||
|
*
|
||||||
|
* In the second iteration, you'll have the following transfers:
|
||||||
|
*
|
||||||
|
* Segment 0: Node 1 --> Node 2
|
||||||
|
* Segment 1: Node 2 --> Node 3
|
||||||
|
* Segment 2: Node 3 --> Node 0
|
||||||
|
* Segment 3: Node 0 --> Node 1
|
||||||
|
*
|
||||||
|
* After this iteration, Node 2 has 3 of the four contributions to Segment 0.
|
||||||
|
* The last iteration has the following transfers:
|
||||||
|
*
|
||||||
|
* Segment 0: Node 2 --> Node 3
|
||||||
|
* Segment 1: Node 3 --> Node 0
|
||||||
|
* Segment 2: Node 0 --> Node 1
|
||||||
|
* Segment 3: Node 1 --> Node 2
|
||||||
|
*
|
||||||
|
* After this iteration, Node 3 has the fully accumulated Segment 0; Node 0
|
||||||
|
* has the fully accumulated Segment 1; and so on. The scatter-reduce is
|
||||||
|
* complete.
|
||||||
|
*
|
||||||
|
* Next, the allgather distributes these fully accumululated chunks across all
|
||||||
|
* nodes. Communication proceeds in the same ring, once again in N-1 steps. At
|
||||||
|
* the ith step, node j will send chunk (j - i + 1) and receive chunk (j - i).
|
||||||
|
* For example, at the first iteration, the following transfers will occur:
|
||||||
|
*
|
||||||
|
* Segment 0: Node 3 --> Node 0
|
||||||
|
* Segment 1: Node 0 --> Node 1
|
||||||
|
* Segment 2: Node 1 --> Node 2
|
||||||
|
* Segment 3: Node 2 --> Node 3
|
||||||
|
*
|
||||||
|
* After the first iteration, Node 0 will have a fully accumulated Segment 0
|
||||||
|
* (from Node 3) and Segment 1. In the next iteration, Node 0 will send its
|
||||||
|
* just-received Segment 0 onward to Node 1, and receive Segment 3 from Node 3.
|
||||||
|
* After this has continued for N - 1 iterations, all nodes will have a the
|
||||||
|
* fully accumulated tensor.
|
||||||
|
*
|
||||||
|
* Each node will do (N-1) sends for the scatter-reduce and (N-1) sends for the
|
||||||
|
* allgather. Each send will contain K / N bytes, if there are K bytes in the
|
||||||
|
* original tensor on every node. Thus, each node sends and receives 2K(N - 1)/N
|
||||||
|
* bytes of data, and the performance of the allreduce (assuming no latency in
|
||||||
|
* connections) is constrained by the slowest interconnect between the nodes.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
template <typename Device, typename T>
|
||||||
|
Status RingAllreduce(OpKernelContext* context, const Tensor* input,
|
||||||
|
Tensor* temp, Tensor* output) {
|
||||||
|
// Acquire MPI size and rank
|
||||||
|
int n, r;
|
||||||
|
MPI_REQUIRES_OK(MPI_Comm_size(MPI_COMM_WORLD, &n));
|
||||||
|
MPI_REQUIRES_OK(MPI_Comm_rank(MPI_COMM_WORLD, &r));
|
||||||
|
|
||||||
|
T* buffer = (T*)output->tensor_data().data();
|
||||||
|
|
||||||
|
CopyTensorData<Device>((void*)buffer, (void*)input->tensor_data().data(),
|
||||||
|
output->tensor_data().size());
|
||||||
|
|
||||||
|
// Calculate segment sizes and segment ends
|
||||||
|
const size_t elements_to_reduce = input->NumElements();
|
||||||
|
const size_t segment_size = elements_to_reduce / n;
|
||||||
|
std::vector<size_t> segment_sizes(n, segment_size);
|
||||||
|
|
||||||
|
const size_t residual = elements_to_reduce % n;
|
||||||
|
for (size_t i = 0; i < residual; ++i) {
|
||||||
|
segment_sizes[i]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<size_t> segment_starts(n);
|
||||||
|
segment_starts[0] = 0;
|
||||||
|
for (size_t i = 1; i < segment_starts.size(); ++i) {
|
||||||
|
segment_starts[i] = segment_starts[i - 1] + segment_sizes[i - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(segment_starts[n - 1] + segment_sizes[n - 1] == elements_to_reduce);
|
||||||
|
|
||||||
|
T* segment_recv = (T*)temp->tensor_data().data();
|
||||||
|
|
||||||
|
// Receive from your left neighbor with wrap-around
|
||||||
|
const size_t recv_from = ((r - 1) + n) % n;
|
||||||
|
|
||||||
|
// Send to your right neighbor with wrap-around
|
||||||
|
const size_t send_to = (r + 1) % n;
|
||||||
|
|
||||||
|
MPI_Status recv_status;
|
||||||
|
MPI_Request recv_req;
|
||||||
|
|
||||||
|
// Now start ring. At every step, for every rank, we iterate through
|
||||||
|
// segments with wraparound and send and recv from our neighbors and reduce
|
||||||
|
// locally. At the i'th iteration, rank r, sends segment (r-i) and receives
|
||||||
|
// segment (r-i-1).
|
||||||
|
for (int i = 0; i < n - 1; i++) {
|
||||||
|
const size_t send_seg_id = ((r - i) + n) % n;
|
||||||
|
const size_t recv_seg_id = ((r - i - 1) + n) % n;
|
||||||
|
|
||||||
|
T* segment_send = &(buffer[segment_starts[send_seg_id]]);
|
||||||
|
|
||||||
|
MPI_REQUIRES_OK(MPI_Irecv(segment_recv, segment_sizes[recv_seg_id],
|
||||||
|
MPIType<T>(), recv_from, TAG_TENSOR,
|
||||||
|
MPI_COMM_WORLD, &recv_req));
|
||||||
|
|
||||||
|
MPI_REQUIRES_OK(MPI_Send(segment_send, segment_sizes[send_seg_id],
|
||||||
|
MPIType<T>(), send_to, TAG_TENSOR,
|
||||||
|
MPI_COMM_WORLD));
|
||||||
|
|
||||||
|
T* segment_update = &(buffer[segment_starts[recv_seg_id]]);
|
||||||
|
|
||||||
|
// Wait for recv to complete before reduction
|
||||||
|
MPI_REQUIRES_OK(MPI_Wait(&recv_req, &recv_status));
|
||||||
|
|
||||||
|
const size_t recv_seg_size = segment_sizes[recv_seg_id];
|
||||||
|
AccumulateTensorData<Device, T>(segment_update, segment_recv,
|
||||||
|
recv_seg_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now start pipelined ring allgather. At every step, for every rank, we
|
||||||
|
// iterate through segments with wraparound and send and recv from our
|
||||||
|
// neighbors. At the i'th iteration, rank r, sends segment (r-i+1) and
|
||||||
|
// receives segment (r-i).
|
||||||
|
for (size_t i = 0; i < n - 1; ++i) {
|
||||||
|
const size_t send_seg_id = ((r - i + 1) + n) % n;
|
||||||
|
const size_t recv_seg_id = ((r - i) + n) % n;
|
||||||
|
|
||||||
|
// Segment to send - at every iteration we send segment (r-i+1)
|
||||||
|
T* segment_send = &(buffer[segment_starts[send_seg_id]]);
|
||||||
|
|
||||||
|
// Segment to recv - at every iteration we receive segment (r-i)
|
||||||
|
T* segment_recv = &(buffer[segment_starts[recv_seg_id]]);
|
||||||
|
|
||||||
|
MPI_REQUIRES_OK(MPI_Sendrecv(
|
||||||
|
segment_send, segment_sizes[send_seg_id], MPIType<T>(), send_to,
|
||||||
|
TAG_TENSOR, segment_recv, segment_sizes[recv_seg_id], MPIType<T>(),
|
||||||
|
recv_from, TAG_TENSOR, MPI_COMM_WORLD, &recv_status));
|
||||||
|
}
|
||||||
|
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Perform a ring allgather on a Tensor. Other ranks may allgather with a
|
||||||
|
// tensor which differs in the first dimension only; all other dimensions must
|
||||||
|
// be the same.
|
||||||
|
//
|
||||||
|
// For more information on the ring allgather, read the documentation for the
|
||||||
|
// ring allreduce, which includes a ring allgather.
|
||||||
|
template <typename Device, typename T>
|
||||||
|
Status RingAllgather(OpKernelContext* context, const Tensor* input,
|
||||||
|
const std::vector<size_t>& sizes, Tensor* output) {
|
||||||
|
// Acquire MPI size and rank
|
||||||
|
int n, r;
|
||||||
|
MPI_REQUIRES_OK(MPI_Comm_size(MPI_COMM_WORLD, &n));
|
||||||
|
MPI_REQUIRES_OK(MPI_Comm_rank(MPI_COMM_WORLD, &r));
|
||||||
|
|
||||||
|
assert(sizes.size() == n);
|
||||||
|
assert(input->dim_size(0) == sizes[r]);
|
||||||
|
|
||||||
|
// Compute number of elements in every "row". We can't compute number of
|
||||||
|
// elements in every chunks, because those chunks are variable length.
|
||||||
|
size_t elements_per_row = 1;
|
||||||
|
for (int i = 1; i < input->shape().dims(); i++) {
|
||||||
|
elements_per_row *= input->dim_size(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy data from input tensor to correct place in output tensor.
|
||||||
|
std::vector<size_t> segment_starts(n);
|
||||||
|
segment_starts[0] = 0;
|
||||||
|
for (int i = 1; i < n; i++) {
|
||||||
|
segment_starts[i] = segment_starts[i - 1] + elements_per_row * sizes[i - 1];
|
||||||
|
}
|
||||||
|
size_t offset = segment_starts[r];
|
||||||
|
|
||||||
|
// Copy data to the right offset for this rank.
|
||||||
|
T* buffer = (T*)output->tensor_data().data();
|
||||||
|
CopyTensorData<Device>((void*)(buffer + offset),
|
||||||
|
(void*)input->tensor_data().data(),
|
||||||
|
elements_per_row * sizes[r] * sizeof(T));
|
||||||
|
|
||||||
|
// Receive from your left neighbor with wrap-around
|
||||||
|
const size_t recv_from = ((r - 1) + n) % n;
|
||||||
|
|
||||||
|
// Send to your right neighbor with wrap-around
|
||||||
|
const size_t send_to = (r + 1) % n;
|
||||||
|
|
||||||
|
// Perform a ring allgather. At every step, for every rank, we iterate
|
||||||
|
// through segments with wraparound and send and recv from our neighbors.
|
||||||
|
// At the i'th iteration, rank r, sends segment (r-i) and receives segment
|
||||||
|
// (r-1-i).
|
||||||
|
MPI_Status recv_status;
|
||||||
|
for (size_t i = 0; i < n - 1; ++i) {
|
||||||
|
const size_t send_seg_id = ((r - i) + n) % n;
|
||||||
|
const size_t recv_seg_id = ((r - i - 1) + n) % n;
|
||||||
|
|
||||||
|
// Segment to send - at every iteration we send segment (r-i)
|
||||||
|
size_t offset_send = segment_starts[send_seg_id];
|
||||||
|
size_t rows_send = sizes[send_seg_id];
|
||||||
|
T* segment_send = &(buffer[offset_send]);
|
||||||
|
|
||||||
|
// Segment to recv - at every iteration we receive segment (r-1-i)
|
||||||
|
size_t offset_recv = segment_starts[recv_seg_id];
|
||||||
|
size_t rows_recv = sizes[recv_seg_id];
|
||||||
|
T* segment_recv = &(buffer[offset_recv]);
|
||||||
|
|
||||||
|
MPI_REQUIRES_OK(MPI_Sendrecv(
|
||||||
|
segment_send, elements_per_row * rows_send, MPIType<T>(), send_to,
|
||||||
|
TAG_TENSOR, segment_recv, elements_per_row * rows_recv, MPIType<T>(),
|
||||||
|
recv_from, TAG_TENSOR, MPI_COMM_WORLD, &recv_status));
|
||||||
|
}
|
||||||
|
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace mpi_collectives
|
||||||
|
} // namespace contrib
|
||||||
|
} // namespace tensorflow
|
||||||
|
|
||||||
|
#endif // TENSORFLOW_USE_MPI
|
||||||
|
|
||||||
|
#undef TENSORFLOW_CONTRIB_MPI_H_
|
||||||
|
#endif // TENSORFLOW_CONTRIB_MPI_H_
|
@ -15,7 +15,7 @@ limitations under the License.
|
|||||||
|
|
||||||
syntax = "proto3";
|
syntax = "proto3";
|
||||||
|
|
||||||
package tensorflow.contrib.mpi;
|
package tensorflow.contrib.mpi_collectives;
|
||||||
|
|
||||||
import "tensorflow/core/framework/tensor_shape.proto";
|
import "tensorflow/core/framework/tensor_shape.proto";
|
||||||
import "tensorflow/core/framework/types.proto";
|
import "tensorflow/core/framework/types.proto";
|
||||||
|
132
tensorflow/contrib/mpi_collectives/ops/mpi_ops.cc
Normal file
132
tensorflow/contrib/mpi_collectives/ops/mpi_ops.cc
Normal file
@ -0,0 +1,132 @@
|
|||||||
|
/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#ifdef TENSORFLOW_USE_MPI
|
||||||
|
|
||||||
|
#include "tensorflow/core/framework/op.h"
|
||||||
|
#include "tensorflow/core/framework/shape_inference.h"
|
||||||
|
|
||||||
|
namespace tensorflow {
|
||||||
|
namespace contrib {
|
||||||
|
namespace mpi_collectives {
|
||||||
|
|
||||||
|
REGISTER_OP("MPIInit").Doc(R"doc(
|
||||||
|
Initialize MPI for the current process.
|
||||||
|
|
||||||
|
If this is run on a GPU, then that GPU must be used for all future MPI
|
||||||
|
operations. If it is run on CPU, then all future MPI operations must also
|
||||||
|
run on CPU.
|
||||||
|
)doc");
|
||||||
|
|
||||||
|
REGISTER_OP("MPISize")
|
||||||
|
.Output("size: int32")
|
||||||
|
.SetShapeFn([](shape_inference::InferenceContext* c) {
|
||||||
|
c->set_output(0, c->Scalar());
|
||||||
|
return Status::OK();
|
||||||
|
})
|
||||||
|
.Doc(R"doc(
|
||||||
|
Returns the number of running MPI processes.
|
||||||
|
|
||||||
|
More precisely, returns the number of MPI processes in the group associated
|
||||||
|
with the MPI_COMM_WORLD communicator.
|
||||||
|
|
||||||
|
size: Size of the MPI group.
|
||||||
|
)doc");
|
||||||
|
|
||||||
|
REGISTER_OP("MPIRank")
|
||||||
|
.Output("rank: int32")
|
||||||
|
.SetShapeFn([](shape_inference::InferenceContext* c) {
|
||||||
|
c->set_output(0, c->Scalar());
|
||||||
|
return Status::OK();
|
||||||
|
})
|
||||||
|
.Doc(R"doc(
|
||||||
|
Returns the index of the current process in the MPI group.
|
||||||
|
|
||||||
|
More precisely, returns the rank of the calling process in the MPI_COMM_WORLD
|
||||||
|
communicator.
|
||||||
|
|
||||||
|
rank: Rank of the calling process.
|
||||||
|
)doc");
|
||||||
|
|
||||||
|
REGISTER_OP("MPILocalRank")
|
||||||
|
.Output("rank: int32")
|
||||||
|
.SetShapeFn([](shape_inference::InferenceContext* c) {
|
||||||
|
c->set_output(0, c->Scalar());
|
||||||
|
return Status::OK();
|
||||||
|
})
|
||||||
|
.Doc(R"doc(
|
||||||
|
Returns the index of the current process in the node it is on.
|
||||||
|
|
||||||
|
More precisely, returns the rank of the calling process in communicator that
|
||||||
|
only spans the MPI processes running on that node.
|
||||||
|
|
||||||
|
rank: Rank of the calling process on the node it is on.
|
||||||
|
)doc");
|
||||||
|
|
||||||
|
REGISTER_OP("MPIAllreduce")
|
||||||
|
.Attr("T: {int32, int64, float32}")
|
||||||
|
.Input("tensor: T")
|
||||||
|
.Output("sum: T")
|
||||||
|
.SetShapeFn([](shape_inference::InferenceContext* c) {
|
||||||
|
c->set_output(0, c->input(0));
|
||||||
|
return Status::OK();
|
||||||
|
})
|
||||||
|
.Doc(R"doc(
|
||||||
|
Perform an MPI Allreduce on a tensor. All other processes that do a reduction
|
||||||
|
on a tensor with the same name must have the same dimension for that tensor.
|
||||||
|
Tensors are reduced with other tensors that have the same node name for the
|
||||||
|
allreduce.
|
||||||
|
|
||||||
|
Arguments
|
||||||
|
tensor: A tensor to reduce.
|
||||||
|
|
||||||
|
Output
|
||||||
|
sum: A tensor with the same shape as `tensor`, summed across all
|
||||||
|
MPI processes.
|
||||||
|
)doc");
|
||||||
|
|
||||||
|
REGISTER_OP("MPIAllgather")
|
||||||
|
.Attr("T: {int32, int64, float32}")
|
||||||
|
.Attr("S: {int64}")
|
||||||
|
.Input("tensor: T")
|
||||||
|
.Input("sizes: S")
|
||||||
|
.Output("gathered: T")
|
||||||
|
.SetShapeFn([](shape_inference::InferenceContext* c) {
|
||||||
|
shape_inference::ShapeHandle output;
|
||||||
|
TF_RETURN_IF_ERROR(
|
||||||
|
c->ReplaceDim(c->input(0), 0, c->UnknownDim(), &output));
|
||||||
|
c->set_output(0, output);
|
||||||
|
return Status::OK();
|
||||||
|
})
|
||||||
|
.Doc(R"doc(
|
||||||
|
Perform an MPI Allgather on a tensor. All other processes that do a gather on a
|
||||||
|
tensor with the same name must have the same rank for that tensor, and have the
|
||||||
|
same dimension on all but the first dimension.
|
||||||
|
|
||||||
|
Arguments
|
||||||
|
tensor: A tensor to gather.
|
||||||
|
sizes: A tensor containing the first-dimension sizes of tensors to be
|
||||||
|
gathered from other ranks
|
||||||
|
|
||||||
|
Output
|
||||||
|
gathered: A tensor with the same shape as `tensor` except for the first
|
||||||
|
dimension, which is the sum of dimensions in `sizes`.
|
||||||
|
)doc");
|
||||||
|
|
||||||
|
} // namespace mpi_collectives
|
||||||
|
} // namespace contrib
|
||||||
|
} // namespace tensorflow
|
||||||
|
|
||||||
|
#endif // TENSORFLOW_USE_MPI
|
134
tensorflow/contrib/mpi_collectives/python/ops/mpi_ops.py
Normal file
134
tensorflow/contrib/mpi_collectives/python/ops/mpi_ops.py
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# =============================================================================
|
||||||
|
"""Inter-process communication using MPI."""
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
from tensorflow.contrib.mpi_collectives.ops import gen_mpi_ops
|
||||||
|
from tensorflow.contrib.util import loader
|
||||||
|
from tensorflow.python.framework import ops
|
||||||
|
from tensorflow.python.platform import resource_loader
|
||||||
|
|
||||||
|
_mpi_ops_so = loader.load_op_library(
|
||||||
|
resource_loader.get_path_to_datafile("_mpi_ops.so"))
|
||||||
|
|
||||||
|
def size(name=None):
|
||||||
|
"""An op which returns the number of MPI processes.
|
||||||
|
|
||||||
|
This is equivalent to running `MPI_Comm_size(MPI_COMM_WORLD, ...)` to get the
|
||||||
|
size of the global communicator.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
An integer scalar containing the number of MPI processes.
|
||||||
|
"""
|
||||||
|
return gen_mpi_ops.mpi_size(name=name)
|
||||||
|
|
||||||
|
|
||||||
|
ops.NotDifferentiable('MPISize')
|
||||||
|
|
||||||
|
|
||||||
|
def rank(name=None):
|
||||||
|
"""An op which returns the MPI rank of the calling process.
|
||||||
|
|
||||||
|
This is equivalent to running `MPI_Comm_rank(MPI_COMM_WORLD, ...)` to get the
|
||||||
|
rank of the current process in the global communicator.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
An integer scalar with the MPI rank of the calling process.
|
||||||
|
"""
|
||||||
|
return gen_mpi_ops.mpi_rank(name=name)
|
||||||
|
|
||||||
|
|
||||||
|
ops.NotDifferentiable('MPIRank')
|
||||||
|
|
||||||
|
|
||||||
|
def init(name=None):
|
||||||
|
"""An op which initializes MPI on the device on which it is run.
|
||||||
|
|
||||||
|
All future MPI ops must be run on the same device that the `init` op was run
|
||||||
|
on.
|
||||||
|
"""
|
||||||
|
return gen_mpi_ops.mpi_init(name=name)
|
||||||
|
|
||||||
|
|
||||||
|
ops.NotDifferentiable('MPIInit')
|
||||||
|
|
||||||
|
|
||||||
|
def local_rank(name=None):
|
||||||
|
"""An op which returns the local MPI rank of the calling process, within the
|
||||||
|
node that it is running on. For example, if there are seven processes running
|
||||||
|
on a node, their local ranks will be zero through six, inclusive.
|
||||||
|
|
||||||
|
This is equivalent to running `MPI_Comm_rank(...)` on a new communicator
|
||||||
|
which only includes processes on the same node.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
An integer scalar with the local MPI rank of the calling process.
|
||||||
|
"""
|
||||||
|
return gen_mpi_ops.mpi_local_rank(name=name)
|
||||||
|
|
||||||
|
|
||||||
|
ops.NotDifferentiable('MPILocalRank')
|
||||||
|
|
||||||
|
|
||||||
|
def _allreduce(tensor, name=None):
|
||||||
|
"""An op which sums an input tensor over all the MPI processes.
|
||||||
|
|
||||||
|
The reduction operation is keyed by the name of the op. The tensor type and
|
||||||
|
shape must be the same on all MPI processes for a given name. The reduction
|
||||||
|
will not start until all processes are ready to send and receive the tensor.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A tensor of the same shape and type as `tensor`, summed across all
|
||||||
|
processes.
|
||||||
|
"""
|
||||||
|
return gen_mpi_ops.mpi_allreduce(tensor, name=name)
|
||||||
|
|
||||||
|
|
||||||
|
ops.NotDifferentiable('MPIAllreduce')
|
||||||
|
|
||||||
|
|
||||||
|
def allgather(tensor, name=None):
|
||||||
|
"""An op which concatenates the input tensor with the same input tensor on
|
||||||
|
all other MPI processes.
|
||||||
|
|
||||||
|
The concatenation is done on the first dimension, so the input tensors on the
|
||||||
|
different processes must have the same rank and shape, except for the first
|
||||||
|
dimension, which is allowed to be different.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A tensor of the same type as `tensor`, concatenated on dimension zero
|
||||||
|
across all processes. The shape is identical to the input shape, except for
|
||||||
|
the first dimension, which may be greater and is the sum of all first
|
||||||
|
dimensions of the tensors in different MPI processes.
|
||||||
|
"""
|
||||||
|
# Specify that first allgather is to collect the tensor gather sizes,
|
||||||
|
# indicated by passing in a scalar (0-D tensor) of value 0
|
||||||
|
sizes_flag = tf.constant(0, dtype=tf.int64, name="size_flag_const")
|
||||||
|
my_size = tf.slice(tf.shape(tensor, out_type=tf.int64), [0], [1], name="size_slice")
|
||||||
|
if name is None:
|
||||||
|
name = "allgather"
|
||||||
|
sizing_name = "{}_sizing".format(name)
|
||||||
|
sizes = gen_mpi_ops.mpi_allgather(my_size, sizes_flag, name=sizing_name)
|
||||||
|
return gen_mpi_ops.mpi_allgather(tensor, sizes, name=name)
|
||||||
|
|
||||||
|
|
||||||
|
ops.NotDifferentiable('MPIAllgather')
|
||||||
|
|
||||||
|
|
@ -0,0 +1,18 @@
|
|||||||
|
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
@ -140,6 +140,20 @@ class RNNCellTest(test.TestCase):
|
|||||||
# Smoke test
|
# Smoke test
|
||||||
self.assertAllClose(res[0], [[0.156736, 0.156736]])
|
self.assertAllClose(res[0], [[0.156736, 0.156736]])
|
||||||
|
|
||||||
|
def testSRUCell(self):
|
||||||
|
with self.test_session() as sess:
|
||||||
|
with variable_scope.variable_scope(
|
||||||
|
"root", initializer=init_ops.constant_initializer(0.5)):
|
||||||
|
x = array_ops.zeros([1, 2])
|
||||||
|
m = array_ops.zeros([1, 2])
|
||||||
|
g, _ = contrib_rnn_cell.SRUCell(2)(x, m)
|
||||||
|
sess.run([variables_lib.global_variables_initializer()])
|
||||||
|
res = sess.run(
|
||||||
|
[g], {x.name: np.array([[1., 1.]]),
|
||||||
|
m.name: np.array([[0.1, 0.1]])})
|
||||||
|
# Smoke test
|
||||||
|
self.assertAllClose(res[0], [[0.509682, 0.509682]])
|
||||||
|
|
||||||
def testBasicLSTMCell(self):
|
def testBasicLSTMCell(self):
|
||||||
for dtype in [dtypes.float16, dtypes.float32]:
|
for dtype in [dtypes.float16, dtypes.float32]:
|
||||||
np_dtype = dtype.as_numpy_dtype
|
np_dtype = dtype.as_numpy_dtype
|
||||||
|
@ -28,6 +28,7 @@ from tensorflow.python.framework import dtypes
|
|||||||
from tensorflow.python.framework import op_def_registry
|
from tensorflow.python.framework import op_def_registry
|
||||||
from tensorflow.python.framework import ops
|
from tensorflow.python.framework import ops
|
||||||
from tensorflow.python.framework import tensor_shape
|
from tensorflow.python.framework import tensor_shape
|
||||||
|
from tensorflow.python.layers import base as base_layer
|
||||||
from tensorflow.python.ops import array_ops
|
from tensorflow.python.ops import array_ops
|
||||||
from tensorflow.python.ops import clip_ops
|
from tensorflow.python.ops import clip_ops
|
||||||
from tensorflow.python.ops import init_ops
|
from tensorflow.python.ops import init_ops
|
||||||
@ -2630,3 +2631,95 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell):
|
|||||||
|
|
||||||
new_state = (rnn_cell_impl.LSTMStateTuple(c, m))
|
new_state = (rnn_cell_impl.LSTMStateTuple(c, m))
|
||||||
return m, new_state
|
return m, new_state
|
||||||
|
|
||||||
|
|
||||||
|
class SRUCell(rnn_cell_impl._LayerRNNCell):
|
||||||
|
"""SRU, Simple Recurrent Unit
|
||||||
|
Implementation based on
|
||||||
|
Training RNNs as Fast as CNNs (cf. https://arxiv.org/abs/1709.02755).
|
||||||
|
|
||||||
|
This variation of RNN cell is characterized by the simplified data dependence
|
||||||
|
between hidden states of two consecutive time steps. Traditionally, hidden
|
||||||
|
states from a cell at time step t-1 needs to be multiplied with a matrix
|
||||||
|
W_hh before being fed into the ensuing cell at time step t.
|
||||||
|
This flavor of RNN replaces the matrix multiplication between h_{t-1}
|
||||||
|
and W_hh with a pointwise multiplication, resulting in performance
|
||||||
|
gain.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
num_units: int, The number of units in the SRU cell.
|
||||||
|
activation: Nonlinearity to use. Default: `tanh`.
|
||||||
|
reuse: (optional) Python boolean describing whether to reuse variables
|
||||||
|
in an existing scope. If not `True`, and the existing scope already has
|
||||||
|
the given variables, an error is raised.
|
||||||
|
name: (optional) String, the name of the layer. Layers with the same name
|
||||||
|
will share weights, but to avoid mistakes we require reuse=True in such
|
||||||
|
cases.
|
||||||
|
"""
|
||||||
|
def __init__(self, num_units,
|
||||||
|
activation=None, reuse=None, name=None):
|
||||||
|
super(SRUCell, self).__init__(_reuse=reuse, name=name)
|
||||||
|
self._num_units = num_units
|
||||||
|
self._activation = activation or math_ops.tanh
|
||||||
|
|
||||||
|
# Restrict inputs to be 2-dimensional matrices
|
||||||
|
self.input_spec = base_layer.InputSpec(ndim=2)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def state_size(self):
|
||||||
|
return self._num_units
|
||||||
|
|
||||||
|
@property
|
||||||
|
def output_size(self):
|
||||||
|
return self._num_units
|
||||||
|
|
||||||
|
def build(self, inputs_shape):
|
||||||
|
if inputs_shape[1].value is None:
|
||||||
|
raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
|
||||||
|
% inputs_shape)
|
||||||
|
|
||||||
|
input_depth = inputs_shape[1].value
|
||||||
|
|
||||||
|
# Here the contributor believes that the following constraints
|
||||||
|
# are implied. The reasoning is explained here with reference to
|
||||||
|
# the paper https://arxiv.org/pdf/1709.02755.pdf upon which this
|
||||||
|
# implementation is based.
|
||||||
|
# In section 2.1 Equation 5, specifically:
|
||||||
|
# h_t = r_t \odot g(c_t) + (1 - r_t) \odot x_t
|
||||||
|
# the pointwise operation between r_t and x_t means they have
|
||||||
|
# the same shape (since we are implementing an RNN cell, braodcasting
|
||||||
|
# does not happen to input of a single timestep); by the same
|
||||||
|
# reasons, x_t has the same shape as h_t, essentially mandating that
|
||||||
|
# input_depth = unit_num.
|
||||||
|
if input_depth != self._num_units:
|
||||||
|
raise ValueError("SRU requires input_depth == num_units, got "
|
||||||
|
"input_depth = %s, num_units = %s" % (input_depth,
|
||||||
|
self._num_units))
|
||||||
|
|
||||||
|
self._kernel = self.add_variable(
|
||||||
|
rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
|
||||||
|
shape=[input_depth, 3 * self._num_units])
|
||||||
|
|
||||||
|
self._bias = self.add_variable(
|
||||||
|
rnn_cell_impl._BIAS_VARIABLE_NAME,
|
||||||
|
shape=[2 * self._num_units],
|
||||||
|
initializer=init_ops.constant_initializer(0.0, dtype=self.dtype))
|
||||||
|
|
||||||
|
self._built = True
|
||||||
|
|
||||||
|
def call(self, inputs, state):
|
||||||
|
"""Simple recurrent unit (SRU) with num_units cells."""
|
||||||
|
|
||||||
|
U = math_ops.matmul(inputs, self._kernel)
|
||||||
|
x_bar, f_intermediate, r_intermediate = array_ops.split(value=U,
|
||||||
|
num_or_size_splits=3,
|
||||||
|
axis=1)
|
||||||
|
|
||||||
|
f_r = math_ops.sigmoid(nn_ops.bias_add(array_ops.concat(
|
||||||
|
[f_intermediate, r_intermediate], 1), self._bias))
|
||||||
|
f, r = array_ops.split(value=f_r, num_or_size_splits=2, axis=1)
|
||||||
|
|
||||||
|
c = f * state + (1.0 - f) * x_bar
|
||||||
|
h = r * self._activation(c) + (1.0 - r) * inputs
|
||||||
|
|
||||||
|
return h, c
|
||||||
|
@ -0,0 +1,18 @@
|
|||||||
|
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
@ -0,0 +1,18 @@
|
|||||||
|
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
@ -0,0 +1,18 @@
|
|||||||
|
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
@ -0,0 +1,18 @@
|
|||||||
|
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
@ -0,0 +1,18 @@
|
|||||||
|
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
@ -0,0 +1,18 @@
|
|||||||
|
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
@ -0,0 +1,18 @@
|
|||||||
|
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ==============================================================================
|
||||||
|
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
@ -274,7 +274,7 @@ cc_library(
|
|||||||
"platform/platform.h",
|
"platform/platform.h",
|
||||||
"platform/protobuf.h",
|
"platform/protobuf.h",
|
||||||
"platform/types.h",
|
"platform/types.h",
|
||||||
] + glob(tf_additional_proto_hdrs()) + glob(tf_env_time_hdrs()),
|
] + tf_additional_proto_hdrs() + glob(tf_env_time_hdrs()),
|
||||||
copts = tf_copts(),
|
copts = tf_copts(),
|
||||||
deps = tf_lib_proto_parsing_deps(),
|
deps = tf_lib_proto_parsing_deps(),
|
||||||
)
|
)
|
||||||
|
@ -117,7 +117,7 @@ For example,
|
|||||||
# Draw the bounding box in an image summary.
|
# Draw the bounding box in an image summary.
|
||||||
image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
|
image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
|
||||||
bbox_for_draw)
|
bbox_for_draw)
|
||||||
tf.image_summary('images_with_box', image_with_box)
|
tf.summary.image('images_with_box', image_with_box)
|
||||||
|
|
||||||
# Employ the bounding box to distort the image.
|
# Employ the bounding box to distort the image.
|
||||||
distorted_image = tf.slice(image, begin, size)
|
distorted_image = tf.slice(image, begin, size)
|
||||||
|
@ -117,7 +117,7 @@ For example,
|
|||||||
# Draw the bounding box in an image summary.
|
# Draw the bounding box in an image summary.
|
||||||
image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
|
image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
|
||||||
bbox_for_draw)
|
bbox_for_draw)
|
||||||
tf.image_summary('images_with_box', image_with_box)
|
tf.summary.image('images_with_box', image_with_box)
|
||||||
|
|
||||||
# Employ the bounding box to distort the image.
|
# Employ the bounding box to distort the image.
|
||||||
distorted_image = tf.slice(image, begin, size)
|
distorted_image = tf.slice(image, begin, size)
|
||||||
|
@ -259,9 +259,10 @@ DirectSession::DirectSession(const SessionOptions& options,
|
|||||||
factory_(factory),
|
factory_(factory),
|
||||||
cancellation_manager_(new CancellationManager()),
|
cancellation_manager_(new CancellationManager()),
|
||||||
operation_timeout_in_ms_(options_.config.operation_timeout_in_ms()) {
|
operation_timeout_in_ms_(options_.config.operation_timeout_in_ms()) {
|
||||||
if (options_.config.session_inter_op_thread_pool_size() > 0) {
|
const int thread_pool_size =
|
||||||
for (int i = 0; i < options_.config.session_inter_op_thread_pool_size();
|
options_.config.session_inter_op_thread_pool_size();
|
||||||
++i) {
|
if (thread_pool_size > 0) {
|
||||||
|
for (int i = 0; i < thread_pool_size; ++i) {
|
||||||
thread::ThreadPool* pool = nullptr;
|
thread::ThreadPool* pool = nullptr;
|
||||||
bool owned = false;
|
bool owned = false;
|
||||||
init_error_.Update(NewThreadPoolFromThreadPoolOptions(
|
init_error_.Update(NewThreadPoolFromThreadPoolOptions(
|
||||||
|
@ -142,7 +142,7 @@ TEST(DirectSessionWithTrackingAllocTest, CostModelWarmup) {
|
|||||||
DirectSession* ds = static_cast<DirectSession*>(session.get());
|
DirectSession* ds = static_cast<DirectSession*>(session.get());
|
||||||
CostModelManager::CostModelMap cost_models;
|
CostModelManager::CostModelMap cost_models;
|
||||||
ds->ExportCostModels(&cost_models);
|
ds->ExportCostModels(&cost_models);
|
||||||
CHECK_EQ(cost_models.size(), 1);
|
CHECK_GE(cost_models.size(), 1);
|
||||||
const CostModel* cm = (*cost_models.begin()).second;
|
const CostModel* cm = (*cost_models.begin()).second;
|
||||||
EXPECT_EQ(measure_steps, cm->GetUpdateTimes());
|
EXPECT_EQ(measure_steps, cm->GetUpdateTimes());
|
||||||
}
|
}
|
||||||
|
@ -2495,14 +2495,12 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
|
|||||||
rinfo_.push_back({csinfo_.identity,
|
rinfo_.push_back({csinfo_.identity,
|
||||||
mkl_op_registry::GetMklOpName(csinfo_.identity),
|
mkl_op_registry::GetMklOpName(csinfo_.identity),
|
||||||
CopyAttrsDataType, AlwaysRewrite});
|
CopyAttrsDataType, AlwaysRewrite});
|
||||||
/*
|
|
||||||
rinfo_.push_back({csinfo_.lrn,
|
rinfo_.push_back({csinfo_.lrn,
|
||||||
mkl_op_registry::GetMklOpName(csinfo_.lrn),
|
mkl_op_registry::GetMklOpName(csinfo_.lrn),
|
||||||
CopyAttrsLRN, AlwaysRewrite});
|
CopyAttrsLRN, AlwaysRewrite});
|
||||||
rinfo_.push_back({csinfo_.lrn_grad,
|
rinfo_.push_back({csinfo_.lrn_grad,
|
||||||
mkl_op_registry::GetMklOpName(csinfo_.lrn_grad),
|
mkl_op_registry::GetMklOpName(csinfo_.lrn_grad),
|
||||||
CopyAttrsLRN, AlwaysRewrite});
|
CopyAttrsLRN, AlwaysRewrite});
|
||||||
*/
|
|
||||||
rinfo_.push_back({csinfo_.max_pool,
|
rinfo_.push_back({csinfo_.max_pool,
|
||||||
mkl_op_registry::GetMklOpName(csinfo_.max_pool),
|
mkl_op_registry::GetMklOpName(csinfo_.max_pool),
|
||||||
CopyAttrsPooling, NonDepthBatchWisePoolRewrite});
|
CopyAttrsPooling, NonDepthBatchWisePoolRewrite});
|
||||||
|
@ -22,11 +22,10 @@ limitations under the License.
|
|||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
|
|
||||||
// Given shapes of two tensors, computes the broadcast shape.
|
// Given shapes of two tensors, computes the broadcast shape.
|
||||||
|
template <typename T>
|
||||||
class BCastArgsOp : public OpKernel {
|
class BCastArgsOp : public OpKernel {
|
||||||
public:
|
public:
|
||||||
explicit BCastArgsOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
|
explicit BCastArgsOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
|
||||||
OP_REQUIRES_OK(ctx, ctx->MatchSignature({DT_INT32, DT_INT32}, {DT_INT32}));
|
|
||||||
}
|
|
||||||
|
|
||||||
void Compute(OpKernelContext* ctx) override {
|
void Compute(OpKernelContext* ctx) override {
|
||||||
OP_REQUIRES(
|
OP_REQUIRES(
|
||||||
@ -40,7 +39,7 @@ class BCastArgsOp : public OpKernel {
|
|||||||
in.shape().DebugString()));
|
in.shape().DebugString()));
|
||||||
BCast::Vec vec;
|
BCast::Vec vec;
|
||||||
for (int64 i = 0; i < in.NumElements(); ++i) {
|
for (int64 i = 0; i < in.NumElements(); ++i) {
|
||||||
vec.push_back(in.vec<int32>()(i));
|
vec.push_back(in.vec<T>()(i));
|
||||||
}
|
}
|
||||||
shapes.push_back(vec);
|
shapes.push_back(vec);
|
||||||
}
|
}
|
||||||
@ -60,7 +59,7 @@ class BCastArgsOp : public OpKernel {
|
|||||||
Tensor* o = nullptr;
|
Tensor* o = nullptr;
|
||||||
OP_REQUIRES_OK(ctx, ctx->allocate_output(idx, TensorShape({len}), &o));
|
OP_REQUIRES_OK(ctx, ctx->allocate_output(idx, TensorShape({len}), &o));
|
||||||
for (int64 i = 0; i < len; ++i) {
|
for (int64 i = 0; i < len; ++i) {
|
||||||
o->flat<int32>()(i) = static_cast<int32>(v[i]);
|
o->flat<T>()(i) = static_cast<T>(v[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -72,12 +71,10 @@ class BCastArgsOp : public OpKernel {
|
|||||||
//
|
//
|
||||||
// TODO(zhifengc):
|
// TODO(zhifengc):
|
||||||
// 1. Adds support for n-ary (n >= 2).
|
// 1. Adds support for n-ary (n >= 2).
|
||||||
|
template <typename T>
|
||||||
class BCastGradArgsOp : public OpKernel {
|
class BCastGradArgsOp : public OpKernel {
|
||||||
public:
|
public:
|
||||||
explicit BCastGradArgsOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
|
explicit BCastGradArgsOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
|
||||||
OP_REQUIRES_OK(
|
|
||||||
ctx, ctx->MatchSignature({DT_INT32, DT_INT32}, {DT_INT32, DT_INT32}));
|
|
||||||
}
|
|
||||||
|
|
||||||
void Compute(OpKernelContext* ctx) override {
|
void Compute(OpKernelContext* ctx) override {
|
||||||
OP_REQUIRES(
|
OP_REQUIRES(
|
||||||
@ -91,7 +88,7 @@ class BCastGradArgsOp : public OpKernel {
|
|||||||
in.shape().DebugString()));
|
in.shape().DebugString()));
|
||||||
BCast::Vec vec;
|
BCast::Vec vec;
|
||||||
for (int64 i = 0; i < in.NumElements(); ++i) {
|
for (int64 i = 0; i < in.NumElements(); ++i) {
|
||||||
vec.push_back(in.vec<int32>()(i));
|
vec.push_back(in.vec<T>()(i));
|
||||||
}
|
}
|
||||||
shapes.push_back(vec);
|
shapes.push_back(vec);
|
||||||
}
|
}
|
||||||
@ -112,7 +109,7 @@ class BCastGradArgsOp : public OpKernel {
|
|||||||
Tensor* o = nullptr;
|
Tensor* o = nullptr;
|
||||||
OP_REQUIRES_OK(ctx, ctx->allocate_output(idx, TensorShape({len}), &o));
|
OP_REQUIRES_OK(ctx, ctx->allocate_output(idx, TensorShape({len}), &o));
|
||||||
for (int64 i = 0; i < len; ++i) {
|
for (int64 i = 0; i < len; ++i) {
|
||||||
o->flat<int32>()(i) = static_cast<int32>(v[i]);
|
o->flat<T>()(i) = static_cast<T>(v[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -125,14 +122,28 @@ REGISTER_KERNEL_BUILDER(Name("BroadcastArgs")
|
|||||||
.HostMemory("s0")
|
.HostMemory("s0")
|
||||||
.HostMemory("s1")
|
.HostMemory("s1")
|
||||||
.HostMemory("r0"),
|
.HostMemory("r0"),
|
||||||
BCastArgsOp);
|
BCastArgsOp<int32>);
|
||||||
|
REGISTER_KERNEL_BUILDER(Name("BroadcastArgs")
|
||||||
|
.Device(DEVICE_CPU)
|
||||||
|
.TypeConstraint<int64>("T")
|
||||||
|
.HostMemory("s0")
|
||||||
|
.HostMemory("s1")
|
||||||
|
.HostMemory("r0"),
|
||||||
|
BCastArgsOp<int64>);
|
||||||
REGISTER_KERNEL_BUILDER(Name("BroadcastArgs")
|
REGISTER_KERNEL_BUILDER(Name("BroadcastArgs")
|
||||||
.Device(DEVICE_GPU)
|
.Device(DEVICE_GPU)
|
||||||
.TypeConstraint<int32>("T")
|
.TypeConstraint<int32>("T")
|
||||||
.HostMemory("s0")
|
.HostMemory("s0")
|
||||||
.HostMemory("s1")
|
.HostMemory("s1")
|
||||||
.HostMemory("r0"),
|
.HostMemory("r0"),
|
||||||
BCastArgsOp);
|
BCastArgsOp<int32>);
|
||||||
|
REGISTER_KERNEL_BUILDER(Name("BroadcastArgs")
|
||||||
|
.Device(DEVICE_GPU)
|
||||||
|
.TypeConstraint<int64>("T")
|
||||||
|
.HostMemory("s0")
|
||||||
|
.HostMemory("s1")
|
||||||
|
.HostMemory("r0"),
|
||||||
|
BCastArgsOp<int64>);
|
||||||
|
|
||||||
#if TENSORFLOW_USE_SYCL
|
#if TENSORFLOW_USE_SYCL
|
||||||
REGISTER_KERNEL_BUILDER(Name("BroadcastArgs")
|
REGISTER_KERNEL_BUILDER(Name("BroadcastArgs")
|
||||||
@ -141,7 +152,14 @@ REGISTER_KERNEL_BUILDER(Name("BroadcastArgs")
|
|||||||
.HostMemory("s0")
|
.HostMemory("s0")
|
||||||
.HostMemory("s1")
|
.HostMemory("s1")
|
||||||
.HostMemory("r0"),
|
.HostMemory("r0"),
|
||||||
BCastArgsOp);
|
BCastArgsOp<int32>);
|
||||||
|
REGISTER_KERNEL_BUILDER(Name("BroadcastArgs")
|
||||||
|
.Device(DEVICE_SYCL)
|
||||||
|
.TypeConstraint<int64>("T")
|
||||||
|
.HostMemory("s0")
|
||||||
|
.HostMemory("s1")
|
||||||
|
.HostMemory("r0"),
|
||||||
|
BCastArgsOp<int32>);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
|
REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
|
||||||
@ -151,7 +169,15 @@ REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
|
|||||||
.HostMemory("s1")
|
.HostMemory("s1")
|
||||||
.HostMemory("r0")
|
.HostMemory("r0")
|
||||||
.HostMemory("r1"),
|
.HostMemory("r1"),
|
||||||
BCastGradArgsOp);
|
BCastGradArgsOp<int32>);
|
||||||
|
REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
|
||||||
|
.Device(DEVICE_CPU)
|
||||||
|
.TypeConstraint<int64>("T")
|
||||||
|
.HostMemory("s0")
|
||||||
|
.HostMemory("s1")
|
||||||
|
.HostMemory("r0")
|
||||||
|
.HostMemory("r1"),
|
||||||
|
BCastGradArgsOp<int64>);
|
||||||
REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
|
REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
|
||||||
.Device(DEVICE_GPU)
|
.Device(DEVICE_GPU)
|
||||||
.TypeConstraint<int32>("T")
|
.TypeConstraint<int32>("T")
|
||||||
@ -159,7 +185,15 @@ REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
|
|||||||
.HostMemory("s1")
|
.HostMemory("s1")
|
||||||
.HostMemory("r0")
|
.HostMemory("r0")
|
||||||
.HostMemory("r1"),
|
.HostMemory("r1"),
|
||||||
BCastGradArgsOp);
|
BCastGradArgsOp<int32>);
|
||||||
|
REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
|
||||||
|
.Device(DEVICE_GPU)
|
||||||
|
.TypeConstraint<int64>("T")
|
||||||
|
.HostMemory("s0")
|
||||||
|
.HostMemory("s1")
|
||||||
|
.HostMemory("r0")
|
||||||
|
.HostMemory("r1"),
|
||||||
|
BCastGradArgsOp<int64>);
|
||||||
|
|
||||||
#if TENSORFLOW_USE_SYCL
|
#if TENSORFLOW_USE_SYCL
|
||||||
REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
|
REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
|
||||||
@ -169,6 +203,14 @@ REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
|
|||||||
.HostMemory("s1")
|
.HostMemory("s1")
|
||||||
.HostMemory("r0")
|
.HostMemory("r0")
|
||||||
.HostMemory("r1"),
|
.HostMemory("r1"),
|
||||||
BCastGradArgsOp);
|
BCastGradArgsOp<int32>);
|
||||||
|
REGISTER_KERNEL_BUILDER(Name("BroadcastGradientArgs")
|
||||||
|
.Device(DEVICE_SYCL)
|
||||||
|
.TypeConstraint<int64>("T")
|
||||||
|
.HostMemory("s0")
|
||||||
|
.HostMemory("s1")
|
||||||
|
.HostMemory("r0")
|
||||||
|
.HostMemory("r1"),
|
||||||
|
BCastGradArgsOp<int64>);
|
||||||
#endif
|
#endif
|
||||||
} // end namespace tensorflow
|
} // end namespace tensorflow
|
||||||
|
@ -146,7 +146,7 @@ class ConvParameters {
|
|||||||
int64 total_size = 16 * std::ceil(batch_ / 16.0) *
|
int64 total_size = 16 * std::ceil(batch_ / 16.0) *
|
||||||
std::max(in_depths_, out_depths_) * in_[0] * in_[1] *
|
std::max(in_depths_, out_depths_) * in_[0] * in_[1] *
|
||||||
sizeof(T);
|
sizeof(T);
|
||||||
int64 threshold = 1L << 31;
|
int64 threshold = 1LL << 31;
|
||||||
if (total_size >= threshold) {
|
if (total_size >= threshold) {
|
||||||
return false;
|
return false;
|
||||||
} else {
|
} else {
|
||||||
|
@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
==============================================================================*/
|
==============================================================================*/
|
||||||
|
|
||||||
|
#include <mutex>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
|
#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
|
||||||
@ -80,6 +81,26 @@ class FloatFiller {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
struct ExampleStore {
|
struct ExampleStore {
|
||||||
|
private:
|
||||||
|
static ExampleTensorMap serialized_example;
|
||||||
|
static std::once_flag flags_init;
|
||||||
|
|
||||||
|
public:
|
||||||
|
static ExampleTensorMap& GetSerializedExample() {
|
||||||
|
std::call_once(flags_init, [] {
|
||||||
|
AddExample(&serialized_example, 10, 1, 1);
|
||||||
|
AddExample(&serialized_example, 100, 1, 1);
|
||||||
|
AddExample(&serialized_example, 1000, 1, 1);
|
||||||
|
AddExample(&serialized_example, 10, 128, 1);
|
||||||
|
AddExample(&serialized_example, 100, 128, 1);
|
||||||
|
AddExample(&serialized_example, 1000, 128, 1);
|
||||||
|
AddExample(&serialized_example, 10, 512, 1);
|
||||||
|
AddExample(&serialized_example, 100, 512, 1);
|
||||||
|
AddExample(&serialized_example, 1000, 512, 1);
|
||||||
|
AddExample(&serialized_example, 1, 1, 1000000);
|
||||||
|
});
|
||||||
|
return serialized_example;
|
||||||
|
}
|
||||||
typedef T Filler;
|
typedef T Filler;
|
||||||
static void AddExample(ExampleTensorMap* examples, int num_keys,
|
static void AddExample(ExampleTensorMap* examples, int num_keys,
|
||||||
int batch_size, int feature_size) {
|
int batch_size, int feature_size) {
|
||||||
@ -101,34 +122,15 @@ struct ExampleStore {
|
|||||||
(*examples)[std::make_tuple(batch_size, num_keys, feature_size)] =
|
(*examples)[std::make_tuple(batch_size, num_keys, feature_size)] =
|
||||||
record_string;
|
record_string;
|
||||||
}
|
}
|
||||||
static ExampleTensorMap GetSerializedExamples() {
|
|
||||||
ExampleTensorMap examples;
|
|
||||||
AddExample(&examples, 10, 1, 1);
|
|
||||||
AddExample(&examples, 100, 1, 1);
|
|
||||||
AddExample(&examples, 1000, 1, 1);
|
|
||||||
AddExample(&examples, 10, 128, 1);
|
|
||||||
AddExample(&examples, 100, 128, 1);
|
|
||||||
AddExample(&examples, 1000, 128, 1);
|
|
||||||
AddExample(&examples, 10, 512, 1);
|
|
||||||
AddExample(&examples, 100, 512, 1);
|
|
||||||
AddExample(&examples, 1000, 512, 1);
|
|
||||||
AddExample(&examples, 1, 1, 1000000);
|
|
||||||
return examples;
|
|
||||||
}
|
|
||||||
static ExampleTensorMap serialized_example;
|
|
||||||
};
|
};
|
||||||
|
template <typename T>
|
||||||
|
ExampleTensorMap ExampleStore<T>::serialized_example;
|
||||||
|
template <typename T>
|
||||||
|
std::once_flag ExampleStore<T>::flags_init;
|
||||||
|
|
||||||
template <>
|
template class ExampleStore<BytesFiller>;
|
||||||
ExampleTensorMap ExampleStore<BytesFiller>::serialized_example =
|
template class ExampleStore<Int64Filler>;
|
||||||
ExampleStore<BytesFiller>::GetSerializedExamples();
|
template class ExampleStore<FloatFiller>;
|
||||||
|
|
||||||
template <>
|
|
||||||
ExampleTensorMap ExampleStore<Int64Filler>::serialized_example =
|
|
||||||
ExampleStore<Int64Filler>::GetSerializedExamples();
|
|
||||||
|
|
||||||
template <>
|
|
||||||
ExampleTensorMap ExampleStore<FloatFiller>::serialized_example =
|
|
||||||
ExampleStore<FloatFiller>::GetSerializedExamples();
|
|
||||||
|
|
||||||
enum BenchmarkType { kDense, kSparse, kVarLenDense };
|
enum BenchmarkType { kDense, kSparse, kVarLenDense };
|
||||||
|
|
||||||
@ -142,7 +144,7 @@ struct BenchmarkOptions {
|
|||||||
template <typename Options>
|
template <typename Options>
|
||||||
static Graph* ParseExample(int batch_size, int num_keys, int feature_size) {
|
static Graph* ParseExample(int batch_size, int num_keys, int feature_size) {
|
||||||
Graph* g = new Graph(OpRegistry::Global());
|
Graph* g = new Graph(OpRegistry::Global());
|
||||||
Tensor& serialized = Options::Store::serialized_example[std::make_tuple(
|
Tensor& serialized = Options::Store::GetSerializedExample()[std::make_tuple(
|
||||||
batch_size, num_keys, feature_size)];
|
batch_size, num_keys, feature_size)];
|
||||||
Tensor names(DT_STRING, TensorShape({batch_size}));
|
Tensor names(DT_STRING, TensorShape({batch_size}));
|
||||||
|
|
||||||
@ -193,7 +195,7 @@ template <typename Options>
|
|||||||
static Graph* ParseSingleExample(int num_keys, int feature_size) {
|
static Graph* ParseSingleExample(int num_keys, int feature_size) {
|
||||||
Graph* g = new Graph(OpRegistry::Global());
|
Graph* g = new Graph(OpRegistry::Global());
|
||||||
Tensor& serialized_batch_1 =
|
Tensor& serialized_batch_1 =
|
||||||
Options::Store::serialized_example[std::make_tuple(1, num_keys,
|
Options::Store::GetSerializedExample()[std::make_tuple(1, num_keys,
|
||||||
feature_size)];
|
feature_size)];
|
||||||
Tensor serialized(DT_STRING, TensorShape());
|
Tensor serialized(DT_STRING, TensorShape());
|
||||||
serialized.scalar<string>()() = serialized_batch_1.vec<string>()(0);
|
serialized.scalar<string>()() = serialized_batch_1.vec<string>()(0);
|
||||||
|
@ -294,7 +294,7 @@ class MklAddNOp : public OpKernel {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
auto cpu_engine = engine(engine::cpu, 0);
|
auto cpu_engine = engine(engine::cpu, 0);
|
||||||
size_t src1_idx = 0, src2_idx = 1;
|
size_t src1_idx = 0, src2_idx = 1, output_idx = 0;
|
||||||
const Tensor& src1_tensor = MklGetInput(ctx, src1_idx);
|
const Tensor& src1_tensor = MklGetInput(ctx, src1_idx);
|
||||||
const Tensor& src2_tensor = MklGetInput(ctx, src2_idx);
|
const Tensor& src2_tensor = MklGetInput(ctx, src2_idx);
|
||||||
|
|
||||||
@ -312,7 +312,7 @@ class MklAddNOp : public OpKernel {
|
|||||||
Tensor* dst_tensor = nullptr;
|
Tensor* dst_tensor = nullptr;
|
||||||
MklShape mkl_shape_dst;
|
MklShape mkl_shape_dst;
|
||||||
mkl_shape_dst.SetMklTensor(false);
|
mkl_shape_dst.SetMklTensor(false);
|
||||||
AllocateOutputSetMklShape(ctx, src1_idx, &dst_tensor,
|
AllocateOutputSetMklShape(ctx, output_idx, &dst_tensor,
|
||||||
src1_tensor.shape(), mkl_shape_dst);
|
src1_tensor.shape(), mkl_shape_dst);
|
||||||
float user_i1 = (src1_tensor.scalar<T>()());
|
float user_i1 = (src1_tensor.scalar<T>()());
|
||||||
float user_i2 = (src2_tensor.scalar<T>()());
|
float user_i2 = (src2_tensor.scalar<T>()());
|
||||||
@ -327,13 +327,12 @@ class MklAddNOp : public OpKernel {
|
|||||||
Tensor* dst_tensor = nullptr;
|
Tensor* dst_tensor = nullptr;
|
||||||
MklShape mkl_shape_dst;
|
MklShape mkl_shape_dst;
|
||||||
mkl_shape_dst.SetMklTensor(false);
|
mkl_shape_dst.SetMklTensor(false);
|
||||||
AllocateOutputSetMklShape(ctx, src1_idx, &dst_tensor,
|
AllocateOutputSetMklShape(ctx, output_idx, &dst_tensor,
|
||||||
src1_tensor.shape(), mkl_shape_dst);
|
src1_tensor.shape(), mkl_shape_dst);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// element-wise add operator for tensor input1 and tensor input2
|
|
||||||
std::vector<double> coeff(2, 1.0);
|
std::vector<double> coeff(2, 1.0);
|
||||||
MklDnnData<T> src1(&cpu_engine);
|
MklDnnData<T> src1(&cpu_engine);
|
||||||
MklDnnData<T> src2(&cpu_engine);
|
MklDnnData<T> src2(&cpu_engine);
|
||||||
@ -345,47 +344,105 @@ class MklAddNOp : public OpKernel {
|
|||||||
memory::desc md1({}, memory::data_undef, memory::format_undef);
|
memory::desc md1({}, memory::data_undef, memory::format_undef);
|
||||||
memory::desc md2({}, memory::data_undef, memory::format_undef);
|
memory::desc md2({}, memory::data_undef, memory::format_undef);
|
||||||
|
|
||||||
if ( input1_in_mkl_format || input2_in_mkl_format ) {
|
// For creating Sum primitive, we need to ensure that all inputs are in
|
||||||
if ( input1_in_mkl_format ) {
|
// same format. What that means is if we have a mixed input case - where
|
||||||
md1 = src1_mkl_shape.GetMklLayout();
|
// one input is in Tensorflow format and one input is in MKL format -,
|
||||||
md2 = md1;
|
// then we need to ensure that all inputs are in same format for
|
||||||
dst.SetUsrMem(md1);
|
// primitive construction. For performance reason, we say that all inputs
|
||||||
} else {
|
// are in MKL format in such case, and insert reorder for input that is
|
||||||
md2 = src2_mkl_shape.GetMklLayout();
|
// in Tensorflow format into MKL format. On the other hand, if both the
|
||||||
md1 = md2;
|
// inputs are in MKL format or both are in Tensorflow format, then we
|
||||||
dst.SetUsrMem(md2);
|
// dont need reorder.
|
||||||
}
|
if (!input1_in_mkl_format && !input2_in_mkl_format) {
|
||||||
} else {
|
// If both the inputs are in Tensorflow format, we create blocked memory
|
||||||
|
// descriptor.
|
||||||
dims = TFShapeToMklDnnDims(src1_tensor.shape());
|
dims = TFShapeToMklDnnDims(src1_tensor.shape());
|
||||||
strides = CalculateTFStrides(dims);
|
strides = CalculateTFStrides(dims);
|
||||||
md1 = MklDnnData<T>::CreateBlockedMemDesc(dims, strides);
|
md1 = MklDnnData<T>::CreateBlockedMemDesc(dims, strides);
|
||||||
md2 = md1;
|
md2 = md1;
|
||||||
dst.SetUsrMem(dims, strides);
|
} else if (input1_in_mkl_format && !input2_in_mkl_format) {
|
||||||
|
// If one input is in MKL format and other is in Tensorflow, then
|
||||||
|
// create respective descriptors describing the actual case. For input
|
||||||
|
// in Mkl format, we just get Mkl layout from MklDnnShape. For input in
|
||||||
|
// Tensorflow format, we create memory descriptor using data format.
|
||||||
|
md1 = src1_mkl_shape.GetMklLayout();
|
||||||
|
|
||||||
|
memory::format src1_mkl_data_format = src1_mkl_shape.GetTfDataFormat();
|
||||||
|
auto src1_tf_data_format = MklDnnDataFormatToTFDataFormat(
|
||||||
|
src1_mkl_data_format);
|
||||||
|
auto src2_dims = TFShapeToMklDnnDimsInNCHW(src2_tensor.shape(),
|
||||||
|
src1_tf_data_format);
|
||||||
|
md2 = memory::desc(src2_dims, MklDnnType<T>(),
|
||||||
|
src1_mkl_data_format);
|
||||||
|
} else if (input2_in_mkl_format && !input1_in_mkl_format) {
|
||||||
|
// Same comment as above.
|
||||||
|
memory::format src2_mkl_data_format = src2_mkl_shape.GetTfDataFormat();
|
||||||
|
auto src2_tf_data_format = MklDnnDataFormatToTFDataFormat(
|
||||||
|
src2_mkl_data_format);
|
||||||
|
auto src1_dims = TFShapeToMklDnnDimsInNCHW(src1_tensor.shape(),
|
||||||
|
src2_tf_data_format);
|
||||||
|
md1 = memory::desc(src1_dims, MklDnnType<T>(),
|
||||||
|
src2_mkl_data_format);
|
||||||
|
|
||||||
|
md2 = src2_mkl_shape.GetMklLayout();
|
||||||
|
} else {
|
||||||
|
// If both the inputs are in MKL format, we use Mkl layout of the input
|
||||||
|
// tensors.
|
||||||
|
md1 = src1_mkl_shape.GetMklLayout();
|
||||||
|
md2 = src2_mkl_shape.GetMklLayout();
|
||||||
|
}
|
||||||
|
src1.SetUsrMem(md1, &src1_tensor);
|
||||||
|
src2.SetUsrMem(md2, &src2_tensor);
|
||||||
|
|
||||||
|
// As per comment above, we tell MKLDNN that both the inputs are in same
|
||||||
|
// format. So we set common memory descriptor in MKL format, if any of the
|
||||||
|
// inputs are in MKL format. Let's get memory descriptor that we will use
|
||||||
|
// for both the inputs.
|
||||||
|
// We set output memory descriptor in MKL format, if any of the
|
||||||
|
// inputs are in MKL format.
|
||||||
|
memory::desc common_md({}, memory::data_undef, memory::format_undef);
|
||||||
|
if (input1_in_mkl_format || input2_in_mkl_format) {
|
||||||
|
common_md = input1_in_mkl_format ? md1 : md2;
|
||||||
|
dst.SetUsrMem(common_md);
|
||||||
|
} else {
|
||||||
|
// Since both the inputs are in Tensorflow format, and have
|
||||||
|
// same shape, we can get memory descriptor from any input.
|
||||||
|
common_md = md1;
|
||||||
|
dst.SetUsrMem(common_md);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<memory::primitive_desc> srcs_pd;
|
std::vector<memory::primitive_desc> srcs_pd;
|
||||||
|
// Memory descriptor for 1st input
|
||||||
|
srcs_pd.push_back(memory::primitive_desc(common_md, cpu_engine));
|
||||||
|
// Memory descriptor for 2nd input
|
||||||
|
srcs_pd.push_back(memory::primitive_desc(common_md, cpu_engine));
|
||||||
|
auto sum_pd = sum::primitive_desc(dst.GetUsrMemDesc(), coeff, srcs_pd);
|
||||||
|
|
||||||
src1.SetUsrMem(md1, &src1_tensor);
|
// Now we setup resources for primitive execution.
|
||||||
auto mpd1 = src1.GetUsrMemPrimDesc();
|
// First, we need to check if any of the inputs need to be reordered as
|
||||||
srcs_pd.push_back(mpd1);
|
// per the logic described above. Since output will be in MKL format if
|
||||||
|
// atleast one input is in MKL format, we choose output descriptor for
|
||||||
src2.SetUsrMem(md2, &src2_tensor);
|
// reorder.
|
||||||
auto mpd2 = src2.GetUsrMemPrimDesc();
|
|
||||||
srcs_pd.push_back(mpd2);
|
|
||||||
|
|
||||||
std::vector<primitive::at> inputs;
|
std::vector<primitive::at> inputs;
|
||||||
|
std::vector<primitive> net;
|
||||||
|
// Check if actual input format of the tensor is different than common_pd
|
||||||
|
// we told MKLDNN. In that case, we will need reorder.
|
||||||
|
src1.CheckReorderToOpMem(srcs_pd[0], &net);
|
||||||
|
src2.CheckReorderToOpMem(srcs_pd[1], &net);
|
||||||
inputs.push_back(src1.GetOpMem());
|
inputs.push_back(src1.GetOpMem());
|
||||||
inputs.push_back(src2.GetOpMem());
|
inputs.push_back(src2.GetOpMem());
|
||||||
auto output_pd = dst.GetUsrMemPrimDesc();
|
|
||||||
|
// Allocate output tensor now.
|
||||||
Tensor* dst_tensor = nullptr;
|
Tensor* dst_tensor = nullptr;
|
||||||
auto sum_pd = sum::primitive_desc(dst.GetUsrMemDesc(), coeff, srcs_pd);
|
|
||||||
auto sum_op = sum(sum_pd, inputs, dst.GetOpMem());
|
|
||||||
if ( input2_in_mkl_format || input1_in_mkl_format ) {
|
|
||||||
MklDnnShape output_mkl_shape;
|
MklDnnShape output_mkl_shape;
|
||||||
|
TensorShape output_tf_shape;
|
||||||
|
|
||||||
|
if (input2_in_mkl_format || input1_in_mkl_format) {
|
||||||
output_mkl_shape.SetMklTensor(true);
|
output_mkl_shape.SetMklTensor(true);
|
||||||
|
auto output_pd = dst.GetUsrMemPrimDesc();
|
||||||
output_mkl_shape.SetMklLayout(&output_pd);
|
output_mkl_shape.SetMklLayout(&output_pd);
|
||||||
output_mkl_shape.SetElemType(MklDnnType<T>());
|
output_mkl_shape.SetElemType(MklDnnType<T>());
|
||||||
if ( input1_in_mkl_format ) {
|
if (input1_in_mkl_format) {
|
||||||
output_mkl_shape.SetTfLayout(src1_dims_size,
|
output_mkl_shape.SetTfLayout(src1_dims_size,
|
||||||
src1_mkl_shape.GetSizesAsMklDnnDims(),
|
src1_mkl_shape.GetSizesAsMklDnnDims(),
|
||||||
src1_mkl_shape.GetTfDataFormat());
|
src1_mkl_shape.GetTfDataFormat());
|
||||||
@ -394,21 +451,17 @@ class MklAddNOp : public OpKernel {
|
|||||||
src2_mkl_shape.GetSizesAsMklDnnDims(),
|
src2_mkl_shape.GetSizesAsMklDnnDims(),
|
||||||
src2_mkl_shape.GetTfDataFormat());
|
src2_mkl_shape.GetTfDataFormat());
|
||||||
}
|
}
|
||||||
TensorShape output_tf_shape;
|
output_tf_shape.AddDim((output_pd.get_size() / sizeof(T)));
|
||||||
output_tf_shape.AddDim((output_pd.get_size() / sizeof(T))
|
|
||||||
+ (output_pd.get_size()%sizeof(T) == 0 ? 0 : 1));
|
|
||||||
AllocateOutputSetMklShape(ctx, src1_idx, &dst_tensor, output_tf_shape,
|
|
||||||
output_mkl_shape);
|
|
||||||
} else {
|
} else {
|
||||||
MklShape mkl_shape_dst;
|
output_mkl_shape.SetMklTensor(false);
|
||||||
mkl_shape_dst.SetMklTensor(false);
|
output_tf_shape = src1_tensor.shape();
|
||||||
AllocateOutputSetMklShape(ctx, src1_idx,
|
|
||||||
&dst_tensor, src1_tensor.shape(), mkl_shape_dst);
|
|
||||||
}
|
}
|
||||||
|
AllocateOutputSetMklShape(ctx, output_idx, &dst_tensor,
|
||||||
|
output_tf_shape, output_mkl_shape);
|
||||||
dst.SetUsrMemDataHandle(dst_tensor);
|
dst.SetUsrMemDataHandle(dst_tensor);
|
||||||
std::vector<primitive> net;
|
|
||||||
net.push_back(sum_op);
|
// Create Sum op, and submit net for execution.
|
||||||
|
net.push_back(sum(sum_pd, inputs, dst.GetOpMem()));
|
||||||
stream(stream::kind::eager).submit(net).wait();
|
stream(stream::kind::eager).submit(net).wait();
|
||||||
} catch (mkldnn::error &e) {
|
} catch (mkldnn::error &e) {
|
||||||
string error_msg = "Status: " + std::to_string(e.status) +
|
string error_msg = "Status: " + std::to_string(e.status) +
|
||||||
|
@ -367,6 +367,9 @@ class MklConv2DCustomBackpropInputOp :
|
|||||||
~MklConv2DCustomBackpropInputOp() {}
|
~MklConv2DCustomBackpropInputOp() {}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
const int kInputIndex_Filter = 1,
|
||||||
|
kInputIndex_InputSizes = 0,
|
||||||
|
kInputIndex_OutBackProp = 2;
|
||||||
void ValidateMklShapes(const MklDnnShape& input_mkl_shape,
|
void ValidateMklShapes(const MklDnnShape& input_mkl_shape,
|
||||||
const MklDnnShape& filter_mkl_shape,
|
const MklDnnShape& filter_mkl_shape,
|
||||||
const MklDnnShape& obp_mkl_shape) {
|
const MklDnnShape& obp_mkl_shape) {
|
||||||
@ -377,7 +380,7 @@ class MklConv2DCustomBackpropInputOp :
|
|||||||
<< "Conv2DBackpropInput: input should not be in MKL Layout";
|
<< "Conv2DBackpropInput: input should not be in MKL Layout";
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t GetInputTensorIndexWithSizes() { return 0; /* input index */ }
|
size_t GetInputTensorIndexWithSizes() { return kInputIndex_InputSizes; }
|
||||||
|
|
||||||
TensorShape MakeInputTfShape(OpKernelContext* context,
|
TensorShape MakeInputTfShape(OpKernelContext* context,
|
||||||
const Tensor& input_tensor) {
|
const Tensor& input_tensor) {
|
||||||
@ -390,8 +393,7 @@ class MklConv2DCustomBackpropInputOp :
|
|||||||
|
|
||||||
TensorShape MakeFilterTfShape(OpKernelContext* context,
|
TensorShape MakeFilterTfShape(OpKernelContext* context,
|
||||||
const Tensor& filter_tensor) {
|
const Tensor& filter_tensor) {
|
||||||
size_t filter_idx = 1;
|
return GetTfShape(context, kInputIndex_Filter);
|
||||||
return GetTfShape(context, filter_idx);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const memory::dims& GetOutputDims(const memory::dims& fwd_input_dims,
|
const memory::dims& GetOutputDims(const memory::dims& fwd_input_dims,
|
||||||
|
@ -510,15 +510,15 @@ class MklConv2DOp : public OpKernel {
|
|||||||
auto cpu_engine = engine(engine::cpu, 0);
|
auto cpu_engine = engine(engine::cpu, 0);
|
||||||
|
|
||||||
// Input tensors
|
// Input tensors
|
||||||
size_t src_idx = 0, filter_idx = 1;
|
const Tensor& src_tensor = MklGetInput(context, kInputIndex_Src);
|
||||||
const Tensor& src_tensor = MklGetInput(context, src_idx);
|
const Tensor& filter_tensor = MklGetInput(context, kInputIndex_Filter);
|
||||||
const Tensor& filter_tensor = MklGetInput(context, filter_idx);
|
|
||||||
|
|
||||||
MklDnnShape src_mkl_shape, filter_mkl_shape;
|
MklDnnShape src_mkl_shape, filter_mkl_shape;
|
||||||
GetMklShape(context, src_idx, &src_mkl_shape);
|
GetMklShape(context, kInputIndex_Src, &src_mkl_shape);
|
||||||
GetMklShape(context, filter_idx, &filter_mkl_shape);
|
GetMklShape(context, kInputIndex_Filter, &filter_mkl_shape);
|
||||||
CHECK(!filter_mkl_shape.IsMklTensor())
|
OP_REQUIRES(context, filter_mkl_shape.IsMklTensor() == false,
|
||||||
<< "Conv2D filter should not be in MKL Layout";
|
errors::InvalidArgument("Filter should not be in "
|
||||||
|
"Mkl Layout"));
|
||||||
|
|
||||||
MklDnnData<T> src(&cpu_engine);
|
MklDnnData<T> src(&cpu_engine);
|
||||||
MklDnnData<T> filter(&cpu_engine);
|
MklDnnData<T> filter(&cpu_engine);
|
||||||
@ -529,8 +529,8 @@ class MklConv2DOp : public OpKernel {
|
|||||||
|
|
||||||
// Get shapes of input tensors in MKL-DNN order
|
// Get shapes of input tensors in MKL-DNN order
|
||||||
MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
|
MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_);
|
||||||
auto src_tf_shape = GetTfShape(context, src_idx);
|
auto src_tf_shape = GetTfShape(context, kInputIndex_Src);
|
||||||
auto filter_tf_shape = GetTfShape(context, filter_idx);
|
auto filter_tf_shape = GetTfShape(context, kInputIndex_Filter);
|
||||||
conv_utl.GetConvFwdSizesInMklOrder(src_tf_shape, filter_tf_shape,
|
conv_utl.GetConvFwdSizesInMklOrder(src_tf_shape, filter_tf_shape,
|
||||||
&src_dims, &filter_dims, &strides,
|
&src_dims, &filter_dims, &strides,
|
||||||
&output_dims_tf_order,
|
&output_dims_tf_order,
|
||||||
@ -541,9 +541,6 @@ class MklConv2DOp : public OpKernel {
|
|||||||
// Check for corner case - if there is nothing to compute, return.
|
// Check for corner case - if there is nothing to compute, return.
|
||||||
TensorShape output_tf_shape = MklDnnDimsToTFShape(output_dims_tf_order);
|
TensorShape output_tf_shape = MklDnnDimsToTFShape(output_dims_tf_order);
|
||||||
|
|
||||||
// Forward filter in TF format from input at index 1 to output at index 1.
|
|
||||||
ForwardTfTensorInToOut(context, 1, 1);
|
|
||||||
|
|
||||||
// Corner cases: output with 0 elements and 0 batch size.
|
// Corner cases: output with 0 elements and 0 batch size.
|
||||||
Tensor* output_tensor = nullptr;
|
Tensor* output_tensor = nullptr;
|
||||||
if (output_tf_shape.num_elements() == 0 ||
|
if (output_tf_shape.num_elements() == 0 ||
|
||||||
@ -552,8 +549,8 @@ class MklConv2DOp : public OpKernel {
|
|||||||
// Need semantics for Null MKL tensor
|
// Need semantics for Null MKL tensor
|
||||||
MklDnnShape output_mkl_shape;
|
MklDnnShape output_mkl_shape;
|
||||||
output_mkl_shape.SetMklTensor(false);
|
output_mkl_shape.SetMklTensor(false);
|
||||||
AllocateOutputSetMklShape(context, 0, &output_tensor, src_tf_shape,
|
AllocateOutputSetMklShape(context, kOutputIndex_Dst, &output_tensor,
|
||||||
output_mkl_shape);
|
src_tf_shape, output_mkl_shape);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -571,10 +568,11 @@ class MklConv2DOp : public OpKernel {
|
|||||||
src.SetUsrMem(src_md, &src_tensor);
|
src.SetUsrMem(src_md, &src_tensor);
|
||||||
// Although filter shape (filter_dims) required is in MKL-DNN order,
|
// Although filter shape (filter_dims) required is in MKL-DNN order,
|
||||||
// the layout is Tensorflow's layout (HWIO).
|
// the layout is Tensorflow's layout (HWIO).
|
||||||
auto filter_md = filter_mkl_shape.IsMklTensor()
|
auto filter_md = filter_mkl_shape.IsMklTensor() // Should NEVER be true
|
||||||
? filter_mkl_shape.GetMklLayout()
|
? filter_mkl_shape.GetMklLayout()
|
||||||
: memory::desc(filter_dims, MklDnnType<T>(), memory::format::hwio);
|
: memory::desc(filter_dims, MklDnnType<T>(), memory::format::hwio);
|
||||||
filter.SetUsrMem(filter_md, &filter_tensor);
|
filter.SetUsrMem(filter_md, &filter_tensor);
|
||||||
|
|
||||||
// Set output shape (output_dims) required in MKL-DNN order.
|
// Set output shape (output_dims) required in MKL-DNN order.
|
||||||
// Currently, we set output layout as Tensorflow's layout (NHWC or NCHW
|
// Currently, we set output layout as Tensorflow's layout (NHWC or NCHW
|
||||||
// depending on data format). But later we propagate Mkl layout of the
|
// depending on data format). But later we propagate Mkl layout of the
|
||||||
@ -590,8 +588,8 @@ class MklConv2DOp : public OpKernel {
|
|||||||
if (biasEnabled) {
|
if (biasEnabled) {
|
||||||
MklDnnData<T> bias(&cpu_engine);
|
MklDnnData<T> bias(&cpu_engine);
|
||||||
memory::dims bias_size;
|
memory::dims bias_size;
|
||||||
conv_utl.GetBiasSizeInMklOrder(2 /* bias idx */, &bias_size);
|
conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_size);
|
||||||
const Tensor& bias_tensor = MklGetInput(context, 2);
|
const Tensor& bias_tensor = MklGetInput(context, kInputIndex_Bias);
|
||||||
bias.SetUsrMem(bias_size, memory::format::x, &bias_tensor);
|
bias.SetUsrMem(bias_size, memory::format::x, &bias_tensor);
|
||||||
bias.SetOpMemDesc(bias_size, memory::format::any);
|
bias.SetOpMemDesc(bias_size, memory::format::any);
|
||||||
|
|
||||||
@ -607,7 +605,14 @@ class MklConv2DOp : public OpKernel {
|
|||||||
output_dims_mkl_order, tf_fmt, &output_tensor);
|
output_dims_mkl_order, tf_fmt, &output_tensor);
|
||||||
// Set data handle for output.
|
// Set data handle for output.
|
||||||
output.SetUsrMemDataHandle(output_tensor);
|
output.SetUsrMemDataHandle(output_tensor);
|
||||||
PrepareAndExecuteNet(conv_prim_desc, &src, &filter, &bias, &output);
|
|
||||||
|
Tensor* filter_out_tensor = nullptr;
|
||||||
|
AllocateFilterOutputTensor(context, conv_prim_desc,
|
||||||
|
TFShapeToMklDnnDims(filter_tf_shape),
|
||||||
|
&filter_out_tensor);
|
||||||
|
|
||||||
|
PrepareAndExecuteNet(conv_prim_desc, &src, &filter,
|
||||||
|
&bias, &output, filter_out_tensor);
|
||||||
} else {
|
} else {
|
||||||
// Create convolution primitive without Bias.
|
// Create convolution primitive without Bias.
|
||||||
auto conv_desc = convolution_forward::desc(prop_kind::forward,
|
auto conv_desc = convolution_forward::desc(prop_kind::forward,
|
||||||
@ -621,7 +626,13 @@ class MklConv2DOp : public OpKernel {
|
|||||||
tf_fmt, &output_tensor);
|
tf_fmt, &output_tensor);
|
||||||
// Set data handle for output.
|
// Set data handle for output.
|
||||||
output.SetUsrMemDataHandle(output_tensor);
|
output.SetUsrMemDataHandle(output_tensor);
|
||||||
PrepareAndExecuteNet(conv_prim_desc, &src, &filter, nullptr, &output);
|
|
||||||
|
Tensor* filter_out_tensor = nullptr;
|
||||||
|
AllocateFilterOutputTensor(context, conv_prim_desc,
|
||||||
|
TFShapeToMklDnnDims(filter_tf_shape),
|
||||||
|
&filter_out_tensor);
|
||||||
|
PrepareAndExecuteNet(conv_prim_desc, &src, &filter,
|
||||||
|
nullptr, &output, filter_out_tensor);
|
||||||
}
|
}
|
||||||
} catch (mkldnn::error &e) {
|
} catch (mkldnn::error &e) {
|
||||||
string error_msg = "Status: " + std::to_string(e.status) +
|
string error_msg = "Status: " + std::to_string(e.status) +
|
||||||
@ -637,6 +648,10 @@ class MklConv2DOp : public OpKernel {
|
|||||||
std::vector<int32> strides_;
|
std::vector<int32> strides_;
|
||||||
Padding padding_;
|
Padding padding_;
|
||||||
TensorFormat data_format_;
|
TensorFormat data_format_;
|
||||||
|
const int kInputIndex_Src = 0,
|
||||||
|
kInputIndex_Filter = 1,
|
||||||
|
kInputIndex_Bias = 2;
|
||||||
|
const int kOutputIndex_Dst = 0, kOutputIndex_Filter = 1;
|
||||||
|
|
||||||
// Allocate output tensor.
|
// Allocate output tensor.
|
||||||
void AllocateOutputTensor(
|
void AllocateOutputTensor(
|
||||||
@ -659,22 +674,57 @@ class MklConv2DOp : public OpKernel {
|
|||||||
TensorShape output_tf_shape;
|
TensorShape output_tf_shape;
|
||||||
output_tf_shape.AddDim((dst_pd.get_size() / sizeof(T)));
|
output_tf_shape.AddDim((dst_pd.get_size() / sizeof(T)));
|
||||||
|
|
||||||
const int kOutputSlotIdx = 0;
|
AllocateOutputSetMklShape(context, kOutputIndex_Dst, output_tensor,
|
||||||
AllocateOutputSetMklShape(context, kOutputSlotIdx, output_tensor,
|
|
||||||
output_tf_shape, output_mkl_shape);
|
output_tf_shape, output_mkl_shape);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Allocate output tensor.
|
||||||
|
void AllocateFilterOutputTensor(
|
||||||
|
OpKernelContext* context,
|
||||||
|
const convolution_forward::primitive_desc& conv_prim_desc,
|
||||||
|
const memory::dims& filter_dims_tf_order,
|
||||||
|
Tensor** filter_tensor) {
|
||||||
|
CHECK_NOTNULL(filter_tensor);
|
||||||
|
auto filter_pd = conv_prim_desc.weights_primitive_desc();
|
||||||
|
|
||||||
|
// Allocate shape of Mkl tensor.
|
||||||
|
MklDnnShape filter_mkl_shape;
|
||||||
|
filter_mkl_shape.SetMklTensor(true);
|
||||||
|
filter_mkl_shape.SetMklLayout(&filter_pd);
|
||||||
|
filter_mkl_shape.SetElemType(MklDnnType<T>());
|
||||||
|
|
||||||
|
// The format of the filter is actually OIhw8i8o, but TF doesn't support
|
||||||
|
// this format. Just use format::blocked for now because the layout
|
||||||
|
// is stored in the MKL data.
|
||||||
|
filter_mkl_shape.SetTfLayout(filter_dims_tf_order.size(),
|
||||||
|
filter_dims_tf_order, memory::format::blocked);
|
||||||
|
|
||||||
|
// Allocate the data space for the filter to propagate as TF tensor.
|
||||||
|
TensorShape filter_tf_shape;
|
||||||
|
filter_tf_shape.AddDim((filter_pd.get_size() / sizeof(T)));
|
||||||
|
|
||||||
|
AllocateOutputSetMklShape(context, kOutputIndex_Filter, filter_tensor,
|
||||||
|
filter_tf_shape, filter_mkl_shape);
|
||||||
|
}
|
||||||
|
|
||||||
// Prepare and execute net - checks for input and output reorders.
|
// Prepare and execute net - checks for input and output reorders.
|
||||||
void PrepareAndExecuteNet(
|
void PrepareAndExecuteNet(
|
||||||
const convolution_forward::primitive_desc& conv_prim_desc,
|
const convolution_forward::primitive_desc& conv_prim_desc,
|
||||||
MklDnnData<T>* src, MklDnnData<T>* filter,
|
MklDnnData<T>* src, MklDnnData<T>* filter,
|
||||||
MklDnnData<T>* bias, MklDnnData<T>* output) {
|
MklDnnData<T>* bias, MklDnnData<T>* output,
|
||||||
|
Tensor* filter_out_tensor) {
|
||||||
|
CHECK_NOTNULL(filter_out_tensor);
|
||||||
|
|
||||||
// Create reorders between user layout and MKL layout if it is needed and
|
// Create reorders between user layout and MKL layout if it is needed and
|
||||||
// add it to the net before convolution. No need to check for output
|
// add it to the net before convolution. No need to check for output
|
||||||
// reorder as we propagate output layout to the next layer.
|
// reorder as we propagate output layout to the next layer.
|
||||||
std::vector<primitive> net;
|
std::vector<primitive> net;
|
||||||
src->CheckReorderToOpMem(conv_prim_desc.src_primitive_desc(), &net);
|
src->CheckReorderToOpMem(conv_prim_desc.src_primitive_desc(), &net);
|
||||||
filter->CheckReorderToOpMem(conv_prim_desc.weights_primitive_desc(), &net);
|
|
||||||
|
// rather than re-order to a temp buffer, reorder directly to the
|
||||||
|
// filter output tensor
|
||||||
|
filter->CheckReorderToOpMem(conv_prim_desc.weights_primitive_desc(),
|
||||||
|
filter->GetTensorBuffer(filter_out_tensor), &net);
|
||||||
|
|
||||||
// Create convolution primitive and add it to net.
|
// Create convolution primitive and add it to net.
|
||||||
if (bias) {
|
if (bias) {
|
||||||
|
@ -18,6 +18,7 @@ limitations under the License.
|
|||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
#include "tensorflow/core/framework/numeric_op.h"
|
#include "tensorflow/core/framework/numeric_op.h"
|
||||||
#include "tensorflow/core/framework/op_kernel.h"
|
#include "tensorflow/core/framework/op_kernel.h"
|
||||||
|
@ -17,7 +17,7 @@ limitations under the License.
|
|||||||
// See docs in ../ops/nn_ops.cc. This opkernel uses MKL library, create MKL
|
// See docs in ../ops/nn_ops.cc. This opkernel uses MKL library, create MKL
|
||||||
// layout and primitives, use MKL dnn primitives to compute local
|
// layout and primitives, use MKL dnn primitives to compute local
|
||||||
// response normalization
|
// response normalization
|
||||||
#undef INTEL_MKL
|
|
||||||
#ifdef INTEL_MKL
|
#ifdef INTEL_MKL
|
||||||
|
|
||||||
#define EIGEN_USE_THREADS
|
#define EIGEN_USE_THREADS
|
||||||
@ -38,6 +38,15 @@ limitations under the License.
|
|||||||
#include "tensorflow/core/util/work_sharder.h"
|
#include "tensorflow/core/util/work_sharder.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef INTEL_MKL_DNN
|
||||||
|
#include "mkldnn.hpp"
|
||||||
|
using mkldnn::lrn_forward;
|
||||||
|
using mkldnn::lrn_backward;
|
||||||
|
using mkldnn::prop_kind;
|
||||||
|
using mkldnn::algorithm::lrn_across_channels;
|
||||||
|
using mkldnn::stream;
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace tensorflow {
|
namespace tensorflow {
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
@ -58,6 +67,8 @@ void GetBandMatrix(int depth, int depth_radius,
|
|||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
|
#ifndef INTEL_MKL_DNN
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
class MklLRNOp : public OpKernel {
|
class MklLRNOp : public OpKernel {
|
||||||
public:
|
public:
|
||||||
@ -328,6 +339,7 @@ class MklLRNOp : public OpKernel {
|
|||||||
float beta_;
|
float beta_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
class MklLRNGradOp : public OpKernel {
|
class MklLRNGradOp : public OpKernel {
|
||||||
public:
|
public:
|
||||||
@ -648,6 +660,7 @@ class MklLRNGradOp : public OpKernel {
|
|||||||
const auto nodes = cols * rows;
|
const auto nodes = cols * rows;
|
||||||
|
|
||||||
auto grads_shaped = in_grads.shaped<T, 2>({nodes * batch, depth});
|
auto grads_shaped = in_grads.shaped<T, 2>({nodes * batch, depth});
|
||||||
|
|
||||||
auto in_shaped = in_image.shaped<T, 2>({nodes * batch, depth});
|
auto in_shaped = in_image.shaped<T, 2>({nodes * batch, depth});
|
||||||
auto activations = out_image.shaped<T, 2>({nodes * batch, depth});
|
auto activations = out_image.shaped<T, 2>({nodes * batch, depth});
|
||||||
|
|
||||||
@ -717,6 +730,649 @@ class MklLRNGradOp : public OpKernel {
|
|||||||
float beta_;
|
float beta_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
class MklLRNOp : public OpKernel {
|
||||||
|
public:
|
||||||
|
~MklLRNOp() {}
|
||||||
|
|
||||||
|
explicit MklLRNOp(OpKernelConstruction* context) : OpKernel(context) {
|
||||||
|
int64 depth_radius64;
|
||||||
|
OP_REQUIRES_OK(context, context->GetAttr("depth_radius", &depth_radius64));
|
||||||
|
OP_REQUIRES(context, FastBoundsCheck(depth_radius64,
|
||||||
|
std::numeric_limits<int>::max()),
|
||||||
|
errors::InvalidArgument("depth_radius = ", depth_radius64,
|
||||||
|
" larger than int max"));
|
||||||
|
depth_radius_ = static_cast<size_t>(depth_radius64);
|
||||||
|
|
||||||
|
OP_REQUIRES_OK(context, context->GetAttr("bias", &bias_));
|
||||||
|
OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_));
|
||||||
|
OP_REQUIRES_OK(context, context->GetAttr("beta", &beta_));
|
||||||
|
workspace_enabled_ = false;
|
||||||
|
context->GetAttr("workspace_enabled", &workspace_enabled_);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Compute(OpKernelContext* context) override {
|
||||||
|
try {
|
||||||
|
SanityCheckInputs(context);
|
||||||
|
if (!context->status().ok()) return;
|
||||||
|
|
||||||
|
auto cpu_engine = engine(engine::cpu, 0);
|
||||||
|
const Tensor& src_tensor = MklGetInput(context, kIdxInput);
|
||||||
|
MklDnnShape src_dnn_shape;
|
||||||
|
GetMklShape(context, kIdxInput, &src_dnn_shape);
|
||||||
|
|
||||||
|
// MKL-DNN has a notion of kernel_size and not depth_radius.
|
||||||
|
int kernel_size = 2 * depth_radius_ + 1;
|
||||||
|
float new_alpha = alpha_ * kernel_size;
|
||||||
|
|
||||||
|
// if the input tensor is not an MKL Tensor, or if the last
|
||||||
|
// dimension is not channel, then just use Eigen.
|
||||||
|
// MKL only support normalization over the channel dimension.
|
||||||
|
if (!src_dnn_shape.IsMklTensor()) {
|
||||||
|
MklDefaultToEigen(context, src_tensor);
|
||||||
|
return;
|
||||||
|
} else if (!src_dnn_shape.IsMklChannelDim(
|
||||||
|
src_dnn_shape.GetDimension() - 1) ) {
|
||||||
|
Tensor converted_tensor =
|
||||||
|
ConvertMklToTF<T>(context, src_tensor, src_dnn_shape);
|
||||||
|
MklDefaultToEigen(context, converted_tensor);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// At this point, we can assume that the src is an MklTensor
|
||||||
|
// and we can enable the workspace
|
||||||
|
workspace_enabled_ = true;
|
||||||
|
|
||||||
|
MklDnnData<T> src_dnn_data(&cpu_engine);
|
||||||
|
MklDnnData<T> dst_dnn_data(&cpu_engine);
|
||||||
|
MklDnnData<uint8> workspace_dnn_data(&cpu_engine);
|
||||||
|
|
||||||
|
TensorShape tf_output_shape = src_tensor.shape();
|
||||||
|
|
||||||
|
memory::desc src_md = src_dnn_shape.GetCurLayout();
|
||||||
|
memory::dims input_dims = src_dnn_shape.GetSizesAsMklDnnDims();
|
||||||
|
|
||||||
|
// Create memory for user input.
|
||||||
|
// Since Tensorflow always performs normalization over last dimension,
|
||||||
|
// and MKL-DNN performs normalization over Channel, we tell MKL-DNN
|
||||||
|
// that input is in NHWC layout with Channel being the last dimension.
|
||||||
|
src_dnn_data.SetUsrMem(src_md, &src_tensor);
|
||||||
|
src_dnn_data.SetOpMemDesc(input_dims, memory::format::nhwc);
|
||||||
|
|
||||||
|
// output_dnn_data and workspace both have the same shape as input
|
||||||
|
dst_dnn_data.SetUsrMem(src_md);
|
||||||
|
dst_dnn_data.SetOpMemDesc(input_dims, memory::format::nhwc);
|
||||||
|
|
||||||
|
// Create LRN primitive descriptor.
|
||||||
|
// Tensorflow's normalization semantics is across channels.
|
||||||
|
// MKL-DNN also supports normalization within channel.
|
||||||
|
auto lrn_desc = lrn_forward::desc(prop_kind::forward,
|
||||||
|
lrn_across_channels,
|
||||||
|
src_dnn_data.GetUsrMemDesc(),
|
||||||
|
kernel_size,
|
||||||
|
new_alpha, beta_, bias_);
|
||||||
|
auto lrn_prim_desc = lrn_forward::primitive_desc(lrn_desc, cpu_engine);
|
||||||
|
|
||||||
|
// Allocate output_dnn_data tensor.
|
||||||
|
Tensor* output_tensor = nullptr;
|
||||||
|
memory::format input_format = src_dnn_shape.GetTfDataFormat();
|
||||||
|
AllocateOutputTensor(context, lrn_prim_desc, input_dims,
|
||||||
|
input_format, &output_tensor);
|
||||||
|
OP_REQUIRES_OK(context, context->status());
|
||||||
|
CHECK_NOTNULL(output_tensor);
|
||||||
|
dst_dnn_data.SetUsrMemDataHandle(output_tensor);
|
||||||
|
|
||||||
|
// Handle workspace required for MKL-DNN.
|
||||||
|
AllocateWorkspaceTensor(context, lrn_prim_desc, &workspace_dnn_data);
|
||||||
|
OP_REQUIRES_OK(context, context->status());
|
||||||
|
|
||||||
|
PrepareAndExecuteNet(lrn_prim_desc, &src_dnn_data,
|
||||||
|
&dst_dnn_data, &workspace_dnn_data);
|
||||||
|
} catch (mkldnn::error &e) {
|
||||||
|
string error_msg = "Status: " + std::to_string(e.status) +
|
||||||
|
", message: " + string(e.message) +
|
||||||
|
", in file " + string(__FILE__) + ":" +
|
||||||
|
std::to_string(__LINE__);
|
||||||
|
OP_REQUIRES_OK(context,
|
||||||
|
errors::Aborted("Operation received an exception:",
|
||||||
|
error_msg));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void PrepareAndExecuteNet(
|
||||||
|
const lrn_forward::primitive_desc& lrn_fwd_desc,
|
||||||
|
MklDnnData<T>* src_dnn_data,
|
||||||
|
MklDnnData<T>* dst_dnn_data,
|
||||||
|
MklDnnData<uint8>* wksp_dnn_data = nullptr) {
|
||||||
|
std::vector<primitive> net;
|
||||||
|
|
||||||
|
// Check for input reorder
|
||||||
|
src_dnn_data->CheckReorderToOpMem(lrn_fwd_desc.src_primitive_desc(), &net);
|
||||||
|
|
||||||
|
// Create pooling primitive and add it to net
|
||||||
|
if (wksp_dnn_data != nullptr) {
|
||||||
|
net.push_back(lrn_forward(lrn_fwd_desc,
|
||||||
|
src_dnn_data->GetOpMem(),
|
||||||
|
wksp_dnn_data->GetOpMem(),
|
||||||
|
dst_dnn_data->GetOpMem()));
|
||||||
|
} else {
|
||||||
|
net.push_back(lrn_forward(lrn_fwd_desc,
|
||||||
|
src_dnn_data->GetOpMem(),
|
||||||
|
dst_dnn_data->GetOpMem()));
|
||||||
|
}
|
||||||
|
stream(stream::kind::eager).submit(net).wait();
|
||||||
|
}
|
||||||
|
|
||||||
|
void AllocateOutputTensor(OpKernelContext* context,
|
||||||
|
const lrn_forward::primitive_desc& lrn_fwd_prim_desc,
|
||||||
|
const memory::dims output_dims_mkl_order,
|
||||||
|
const memory::format& output_tf_format,
|
||||||
|
Tensor** output_tensor) {
|
||||||
|
CHECK_NOTNULL(output_tensor);
|
||||||
|
memory::primitive_desc dst_pd = lrn_fwd_prim_desc.dst_primitive_desc();
|
||||||
|
|
||||||
|
MklDnnShape output_mkl_shape;
|
||||||
|
// We only handle the case when the inputs and output are in Mkl format
|
||||||
|
// Any other case is handled by Eigen
|
||||||
|
output_mkl_shape.SetMklTensor(true);
|
||||||
|
output_mkl_shape.SetMklLayout(&dst_pd);
|
||||||
|
output_mkl_shape.SetElemType(MklDnnType<T>());
|
||||||
|
output_mkl_shape.SetTfLayout(output_dims_mkl_order.size(),
|
||||||
|
output_dims_mkl_order,
|
||||||
|
output_tf_format);
|
||||||
|
TensorShape output_tf_shape;
|
||||||
|
// only allocate enough space for the elements we need.
|
||||||
|
size_t num_bytes = dst_pd.get_size();
|
||||||
|
CHECK_EQ(num_bytes % sizeof(T), 0);
|
||||||
|
output_tf_shape.AddDim(num_bytes / sizeof(T));
|
||||||
|
AllocateOutputSetMklShape(context, kIdxOutput,
|
||||||
|
output_tensor,
|
||||||
|
output_tf_shape, output_mkl_shape);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback implementation - Taken from lrn_op.cc
|
||||||
|
// TODO(inteltf) Check if we can use EigenLRNOp directly instead of making a
|
||||||
|
// copy.
|
||||||
|
void MklDefaultToEigen(OpKernelContext* context,
|
||||||
|
const Tensor& input) {
|
||||||
|
const int batch = static_cast<int>(input.dim_size(0));
|
||||||
|
const int rows = static_cast<int>(input.dim_size(1));
|
||||||
|
const int cols = static_cast<int>(input.dim_size(2));
|
||||||
|
const int depth = static_cast<int>(input.dim_size(3));
|
||||||
|
const int nodes = cols * rows;
|
||||||
|
|
||||||
|
auto in_shaped = input.shaped<T, 2>({nodes * batch, depth});
|
||||||
|
// Multiplying the input with the band matrix has the effect of reducing
|
||||||
|
// the
|
||||||
|
// correct patch along the depth.
|
||||||
|
Eigen::Tensor<T, 2, Eigen::RowMajor> multiplier(depth, depth);
|
||||||
|
GetBandMatrix<T>(depth, depth_radius_, &multiplier);
|
||||||
|
|
||||||
|
Tensor *output_dnn_data, *workspace;
|
||||||
|
MklDnnShape mkl_output_mkl_shape, mkl_workspace_mkl_shape;
|
||||||
|
mkl_output_mkl_shape.SetMklTensor(false);
|
||||||
|
mkl_output_mkl_shape.SetDimensions(4);
|
||||||
|
AllocateOutputSetMklShape(context, kIdxOutput, &output_dnn_data,
|
||||||
|
input.shape(), mkl_output_mkl_shape);
|
||||||
|
|
||||||
|
mkl_workspace_mkl_shape.SetMklTensor(false);
|
||||||
|
mkl_workspace_mkl_shape.SetDimensions(4);
|
||||||
|
AllocateOutputSetMklShape(context, kIdxWorkspace, &workspace,
|
||||||
|
input.shape(), mkl_workspace_mkl_shape);
|
||||||
|
|
||||||
|
auto out_shaped = output_dnn_data->shaped<T, 2>({nodes * batch, depth});
|
||||||
|
Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}};
|
||||||
|
auto tmp = in_shaped.square().contract(multiplier, dims) * alpha_ + bias_;
|
||||||
|
if (beta_ == T(1)) {
|
||||||
|
out_shaped.device(context->eigen_cpu_device()) =
|
||||||
|
in_shaped * tmp.inverse();
|
||||||
|
} else if (beta_ == T(0.5)) {
|
||||||
|
out_shaped.device(context->eigen_cpu_device()) =
|
||||||
|
in_shaped * tmp.rsqrt();
|
||||||
|
} else {
|
||||||
|
out_shaped.device(context->eigen_cpu_device()) =
|
||||||
|
in_shaped * (tmp.log() * -beta_).exp();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void AllocateWorkspaceTensor(OpKernelContext* context,
|
||||||
|
const lrn_forward::primitive_desc& lrn_fwd_prim_desc,
|
||||||
|
MklDnnData<uint8>* dnn_data_wksp) {
|
||||||
|
CHECK_NOTNULL(dnn_data_wksp);
|
||||||
|
Tensor* workspace_tensor = nullptr;
|
||||||
|
memory::primitive_desc workspace_pd
|
||||||
|
= lrn_fwd_prim_desc.workspace_primitive_desc();
|
||||||
|
size_t workspace_bytes = workspace_pd.get_size();
|
||||||
|
MklDnnShape workspace_mkl_shape;
|
||||||
|
// the workspace tensor is a uint8 tensor that has
|
||||||
|
// exactly the number of bytes necessary
|
||||||
|
workspace_mkl_shape.SetMklTensor(false);
|
||||||
|
TensorShape workspace_tf_shape;
|
||||||
|
workspace_tf_shape.AddDim(workspace_bytes);
|
||||||
|
AllocateOutputSetMklShape(context, kIdxWorkspace,
|
||||||
|
&workspace_tensor,
|
||||||
|
workspace_tf_shape, workspace_mkl_shape);
|
||||||
|
CHECK_NOTNULL(workspace_tensor);
|
||||||
|
dnn_data_wksp->SetUsrMem(workspace_pd, workspace_tensor);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SanityCheckInputs(OpKernelContext* context) {
|
||||||
|
const Tensor& src_tensor = MklGetInput(context, kIdxInput);
|
||||||
|
MklDnnShape src_dnn_shape;
|
||||||
|
GetMklShape(context, kIdxInput, &src_dnn_shape);
|
||||||
|
if (src_dnn_shape.IsMklTensor()) {
|
||||||
|
OP_REQUIRES(context, src_dnn_shape.GetDimension() == 4,
|
||||||
|
errors::InvalidArgument("input must be 4-dimensional"));
|
||||||
|
OP_REQUIRES(context, FastBoundsCheck(src_tensor.NumElements(),
|
||||||
|
std::numeric_limits<int>::max()),
|
||||||
|
errors::InvalidArgument("argument to LRN too large"));
|
||||||
|
} else {
|
||||||
|
OP_REQUIRES(context, src_tensor.dims() == 4,
|
||||||
|
errors::InvalidArgument("input must be 4-dimensional"));
|
||||||
|
OP_REQUIRES(context, FastBoundsCheck(src_tensor.NumElements(),
|
||||||
|
std::numeric_limits<int>::max()),
|
||||||
|
errors::InvalidArgument("argument to LRN too large"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const int kIdxInput = 0,
|
||||||
|
kIdxOutput = 0,
|
||||||
|
kIdxWorkspace = 1;
|
||||||
|
|
||||||
|
typedef typename Eigen::Tensor<T, 1, Eigen::RowMajor>::DimensionPair DimPair;
|
||||||
|
bool workspace_enabled_;
|
||||||
|
int depth_radius_;
|
||||||
|
float bias_;
|
||||||
|
float alpha_;
|
||||||
|
float beta_;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
class MklLRNGradOp : public OpKernel {
|
||||||
|
public:
|
||||||
|
explicit MklLRNGradOp(OpKernelConstruction* context) : OpKernel(context) {
|
||||||
|
int64 depth_radius64;
|
||||||
|
OP_REQUIRES_OK(context, context->GetAttr("depth_radius", &depth_radius64));
|
||||||
|
OP_REQUIRES(context, FastBoundsCheck(depth_radius64,
|
||||||
|
std::numeric_limits<int>::max()),
|
||||||
|
errors::InvalidArgument("depth_radius = ", depth_radius64,
|
||||||
|
" larger than int max"));
|
||||||
|
depth_radius_ = static_cast<int>(depth_radius64);
|
||||||
|
OP_REQUIRES_OK(context, context->GetAttr("bias", &bias_));
|
||||||
|
OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_));
|
||||||
|
OP_REQUIRES_OK(context, context->GetAttr("beta", &beta_));
|
||||||
|
workspace_enabled_ = false;
|
||||||
|
context->GetAttr("workspace_enabled", &workspace_enabled_);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Compute(OpKernelContext* context) override {
|
||||||
|
try {
|
||||||
|
SanityCheckInputs(context);
|
||||||
|
if (!context->status().ok()) return;
|
||||||
|
|
||||||
|
auto cpu_engine = engine(engine::cpu, 0);
|
||||||
|
MklDnnData<T> input_grad_dnn_data(&cpu_engine);
|
||||||
|
MklDnnData<T> orig_input_dnn_data(&cpu_engine);
|
||||||
|
MklDnnData<T> orig_output_dnn_data(&cpu_engine);
|
||||||
|
MklDnnData<T> output_dnn_data(&cpu_engine);
|
||||||
|
|
||||||
|
MklDnnShape input_grad_dnn_shape, orig_input_dnn_shape,
|
||||||
|
orig_output_dnn_shape;
|
||||||
|
GetMklShape(context, kIdxGradient, &input_grad_dnn_shape);
|
||||||
|
GetMklShape(context, kIdxOrigInput, &orig_input_dnn_shape);
|
||||||
|
GetMklShape(context, kIdxOrigOutput, &orig_output_dnn_shape);
|
||||||
|
|
||||||
|
// We only use MKLDNN if all of the necessary inputs are present
|
||||||
|
// in mkldnn format, and Channel is the last dimension
|
||||||
|
bool can_use_mkldnn = workspace_enabled_ &&
|
||||||
|
input_grad_dnn_shape.IsMklTensor() &&
|
||||||
|
orig_input_dnn_shape.IsMklTensor() &&
|
||||||
|
orig_output_dnn_shape.IsMklTensor() &&
|
||||||
|
input_grad_dnn_shape.IsMklChannelDim(
|
||||||
|
input_grad_dnn_shape.GetDimension() - 1) &&
|
||||||
|
orig_input_dnn_shape.IsMklChannelDim(
|
||||||
|
orig_input_dnn_shape.GetDimension() - 1) &&
|
||||||
|
orig_output_dnn_shape.IsMklChannelDim(
|
||||||
|
orig_output_dnn_shape.GetDimension() - 1);
|
||||||
|
|
||||||
|
if (!can_use_mkldnn) {
|
||||||
|
// Fallback to eigen
|
||||||
|
MklDefaultToEigen(context);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// At this point, we have the all clear to use MklDnn constructs
|
||||||
|
// Naming: diff_dst is input_gradient_tensor; src is orig_input_tensor.
|
||||||
|
const Tensor& input_grad_tensor = MklGetInput(context, kIdxGradient);
|
||||||
|
const Tensor& orig_input_tensor = MklGetInput(context, kIdxOrigInput);
|
||||||
|
const Tensor& orig_output_tensor = MklGetInput(context, kIdxOrigOutput);
|
||||||
|
|
||||||
|
// Get input sizes in MKL-DNN required NCHW format.
|
||||||
|
// LRN does not have data_format attribute. But by default it has
|
||||||
|
// NHWC format.
|
||||||
|
memory::desc original_output_md = orig_output_dnn_shape.GetCurLayout();
|
||||||
|
memory::desc target_diff_dst_md = ConfigureInputGradient(
|
||||||
|
input_grad_tensor,
|
||||||
|
input_grad_dnn_shape,
|
||||||
|
&input_grad_dnn_data);
|
||||||
|
|
||||||
|
memory::desc orig_input_md = orig_input_dnn_shape.GetCurLayout();
|
||||||
|
memory::dims orig_input_dims =
|
||||||
|
orig_input_dnn_shape.GetSizesAsMklDnnDims();
|
||||||
|
orig_input_dnn_data.SetUsrMem(orig_input_md, &orig_input_tensor);
|
||||||
|
orig_input_dnn_data.SetOpMemDesc(orig_input_dims, memory::format::nhwc);
|
||||||
|
|
||||||
|
// output_dnn_data has the same shape as original input
|
||||||
|
output_dnn_data.SetUsrMem(orig_input_md);
|
||||||
|
output_dnn_data.SetOpMemDesc(orig_input_dims, memory::format::nhwc);
|
||||||
|
|
||||||
|
// MKL-DNN has a notion of kernel_size and not depth_radius.
|
||||||
|
int kernel_size = 2 * depth_radius_ + 1;
|
||||||
|
float new_alpha = alpha_ * kernel_size;
|
||||||
|
|
||||||
|
// Create LRN backward primitive descriptor. It requires LRN forward
|
||||||
|
// primitive descriptor also.
|
||||||
|
auto lrn_fwd_desc = lrn_forward::desc(prop_kind::forward,
|
||||||
|
lrn_across_channels,
|
||||||
|
orig_input_md,
|
||||||
|
kernel_size,
|
||||||
|
new_alpha, beta_, bias_);
|
||||||
|
auto lrn_fwd_prim_desc = lrn_forward::primitive_desc(lrn_fwd_desc,
|
||||||
|
cpu_engine);
|
||||||
|
auto lrn_bwd_desc = lrn_backward::desc(lrn_across_channels,
|
||||||
|
original_output_md,
|
||||||
|
target_diff_dst_md,
|
||||||
|
kernel_size,
|
||||||
|
new_alpha, beta_, bias_);
|
||||||
|
auto lrn_bwd_prim_desc = lrn_backward::primitive_desc(lrn_bwd_desc,
|
||||||
|
cpu_engine,
|
||||||
|
lrn_fwd_prim_desc);
|
||||||
|
|
||||||
|
Tensor* output_tensor = nullptr;
|
||||||
|
memory::format orig_input_format
|
||||||
|
= orig_input_dnn_shape.GetTfDataFormat();
|
||||||
|
AllocateOutputTensor(context, lrn_bwd_prim_desc,
|
||||||
|
orig_input_dims, orig_input_format, &output_tensor);
|
||||||
|
OP_REQUIRES_OK(context, context->status());
|
||||||
|
CHECK_NOTNULL(output_tensor);
|
||||||
|
output_dnn_data.SetUsrMemDataHandle(output_tensor);
|
||||||
|
|
||||||
|
// Create LRN primitive and add it to the net
|
||||||
|
// At this point, workspace is enabled, so we don't need
|
||||||
|
// to check. Pass input workspace to LRN backward primitive.
|
||||||
|
const Tensor& workspace_tensor = MklGetInput(context, kIdxWorkspace);
|
||||||
|
MklDnnData<uint8> workspace_dnn_data(&cpu_engine);
|
||||||
|
ConfigureWorkspace(workspace_tensor,
|
||||||
|
lrn_fwd_prim_desc.workspace_primitive_desc(),
|
||||||
|
&workspace_dnn_data);
|
||||||
|
|
||||||
|
PrepareAndExecuteNet(lrn_bwd_prim_desc,
|
||||||
|
lrn_fwd_prim_desc,
|
||||||
|
&orig_input_dnn_data,
|
||||||
|
&input_grad_dnn_data,
|
||||||
|
&output_dnn_data,
|
||||||
|
memory::primitive_desc(target_diff_dst_md, cpu_engine),
|
||||||
|
&workspace_dnn_data);
|
||||||
|
} catch (mkldnn::error &e) {
|
||||||
|
string error_msg = "Status: " + std::to_string(e.status) +
|
||||||
|
", message: " + string(e.message) +
|
||||||
|
", in file " + string(__FILE__) + ":" +
|
||||||
|
std::to_string(__LINE__);
|
||||||
|
OP_REQUIRES_OK(context,
|
||||||
|
errors::Aborted("Operation received an exception:",
|
||||||
|
error_msg));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void AllocateOutputTensor(OpKernelContext* context,
|
||||||
|
const lrn_backward::primitive_desc& lrn_bkwd_prim_desc,
|
||||||
|
const memory::dims output_dims_mkl_order,
|
||||||
|
const memory::format& output_tf_format,
|
||||||
|
Tensor** output_tensor) {
|
||||||
|
CHECK_NOTNULL(output_tensor);
|
||||||
|
memory::primitive_desc dst_pd
|
||||||
|
= lrn_bkwd_prim_desc.diff_src_primitive_desc();
|
||||||
|
MklDnnShape output_mkl_shape;
|
||||||
|
|
||||||
|
// We assume that all outputs at this point are MKL Tensors
|
||||||
|
output_mkl_shape.SetMklTensor(true);
|
||||||
|
output_mkl_shape.SetMklLayout(&dst_pd);
|
||||||
|
output_mkl_shape.SetElemType(MklDnnType<T>());
|
||||||
|
output_mkl_shape.SetTfLayout(output_dims_mkl_order.size(),
|
||||||
|
output_dims_mkl_order,
|
||||||
|
output_tf_format);
|
||||||
|
|
||||||
|
TensorShape output_tf_shape;
|
||||||
|
size_t num_bytes = dst_pd.get_size();
|
||||||
|
CHECK_EQ(num_bytes % sizeof(T), 0);
|
||||||
|
output_tf_shape.AddDim(num_bytes / sizeof(T));
|
||||||
|
AllocateOutputSetMklShape(context, kIdxOutput,
|
||||||
|
output_tensor,
|
||||||
|
output_tf_shape, output_mkl_shape);
|
||||||
|
}
|
||||||
|
|
||||||
|
memory::desc ConfigureInputGradient(const Tensor& input_grad_tensor,
|
||||||
|
const MklDnnShape& input_grad_dnn_shape,
|
||||||
|
MklDnnData<T> *input_grad_dnn_data) {
|
||||||
|
CHECK_NOTNULL(input_grad_dnn_data);
|
||||||
|
// This shouldn't be necessary at this point, but just in case
|
||||||
|
CHECK_EQ(input_grad_dnn_shape.IsMklTensor(), true);
|
||||||
|
|
||||||
|
memory::desc input_grad_md = input_grad_dnn_shape.GetCurLayout();
|
||||||
|
memory::dims orig_input_dims =
|
||||||
|
input_grad_dnn_shape.GetSizesAsMklDnnDims();
|
||||||
|
input_grad_dnn_data->SetUsrMem(input_grad_md, &input_grad_tensor);
|
||||||
|
input_grad_dnn_data->SetOpMemDesc(orig_input_dims, memory::format::nhwc);
|
||||||
|
return input_grad_md;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PrepareAndExecuteNet(
|
||||||
|
const lrn_backward::primitive_desc& lrn_bkwd_desc,
|
||||||
|
const lrn_forward::primitive_desc& lrn_fwd_desc,
|
||||||
|
MklDnnData<T>* src_dnn_data,
|
||||||
|
MklDnnData<T>* input_gradient_diff_dst,
|
||||||
|
MklDnnData<T>* output_diff_src,
|
||||||
|
const memory::primitive_desc& target_diff_dst_pd,
|
||||||
|
const MklDnnData<uint8>* workspace_dnn_data = nullptr) {
|
||||||
|
std::vector<primitive> net;
|
||||||
|
|
||||||
|
// Check for input reordering on the diff dst input
|
||||||
|
input_gradient_diff_dst->CheckReorderToOpMem(
|
||||||
|
lrn_bkwd_desc.diff_dst_primitive_desc(), &net);
|
||||||
|
|
||||||
|
// Check for input reordering on the original input
|
||||||
|
src_dnn_data->CheckReorderToOpMem(lrn_fwd_desc.src_primitive_desc(),
|
||||||
|
&net);
|
||||||
|
// Create pooling primitive and add it to net
|
||||||
|
if (nullptr == workspace_dnn_data) {
|
||||||
|
net.push_back(lrn_backward(lrn_bkwd_desc,
|
||||||
|
src_dnn_data->GetOpMem(),
|
||||||
|
input_gradient_diff_dst->GetOpMem(),
|
||||||
|
output_diff_src->GetOpMem()));
|
||||||
|
} else {
|
||||||
|
net.push_back(lrn_backward(lrn_bkwd_desc,
|
||||||
|
src_dnn_data->GetOpMem(),
|
||||||
|
input_gradient_diff_dst->GetOpMem(),
|
||||||
|
workspace_dnn_data->GetOpMem(),
|
||||||
|
output_diff_src->GetOpMem()));
|
||||||
|
}
|
||||||
|
stream(stream::kind::eager).submit(net).wait();
|
||||||
|
}
|
||||||
|
|
||||||
|
void ConfigureWorkspace(const Tensor& workspace_tensor,
|
||||||
|
memory::primitive_desc workspace_pd,
|
||||||
|
MklDnnData<uint8> *workspace_dnn_data) {
|
||||||
|
CHECK_NOTNULL(workspace_dnn_data);
|
||||||
|
|
||||||
|
workspace_dnn_data->SetUsrMem(workspace_pd, &workspace_tensor);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback implementation - Taken from lrn_op.cc
|
||||||
|
// TODO(intelft) Check if we can use EigenLRNOp directly instead of making a
|
||||||
|
// copy.
|
||||||
|
void MklDefaultToEigen(OpKernelContext* context) {
|
||||||
|
Tensor input_gradient_tensor;
|
||||||
|
Tensor orig_input_tensor;
|
||||||
|
Tensor orig_output_tensor;
|
||||||
|
|
||||||
|
MklDnnShape input_grad_dnn_shape, orig_input_dnn_shape,
|
||||||
|
orig_output_dnn_shape;
|
||||||
|
GetMklShape(context, kIdxGradient, &input_grad_dnn_shape);
|
||||||
|
GetMklShape(context, kIdxOrigInput, &orig_input_dnn_shape);
|
||||||
|
GetMklShape(context, kIdxOrigOutput, &orig_output_dnn_shape);
|
||||||
|
|
||||||
|
if (input_grad_dnn_shape.IsMklTensor()) {
|
||||||
|
input_gradient_tensor =
|
||||||
|
ConvertMklToTF<T>(context,
|
||||||
|
MklGetInput(context, kIdxGradient),
|
||||||
|
input_grad_dnn_shape);
|
||||||
|
} else {
|
||||||
|
input_gradient_tensor = MklGetInput(context, kIdxGradient);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (orig_input_dnn_shape.IsMklTensor()) {
|
||||||
|
orig_input_tensor =
|
||||||
|
ConvertMklToTF<T>(context,
|
||||||
|
MklGetInput(context, kIdxOrigInput),
|
||||||
|
orig_input_dnn_shape);
|
||||||
|
} else {
|
||||||
|
orig_input_tensor = MklGetInput(context, kIdxOrigInput);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (orig_output_dnn_shape.IsMklTensor()) {
|
||||||
|
orig_output_tensor =
|
||||||
|
ConvertMklToTF<T>(context,
|
||||||
|
MklGetInput(context, kIdxOrigOutput),
|
||||||
|
orig_output_dnn_shape);
|
||||||
|
} else {
|
||||||
|
orig_output_tensor = MklGetInput(context, kIdxOrigOutput);
|
||||||
|
}
|
||||||
|
|
||||||
|
const int64 batch = static_cast<int64>(input_gradient_tensor.dim_size(0));
|
||||||
|
const int64 rows = static_cast<int64>(input_gradient_tensor.dim_size(1));
|
||||||
|
const int64 cols = static_cast<int64>(input_gradient_tensor.dim_size(2));
|
||||||
|
const int64 depth = static_cast<int64>(input_gradient_tensor.dim_size(3));
|
||||||
|
const auto nodes = cols * rows;
|
||||||
|
|
||||||
|
auto grads_shaped =
|
||||||
|
input_gradient_tensor.shaped<T, 2>({nodes * batch, depth});
|
||||||
|
|
||||||
|
auto in_shaped = orig_input_tensor.shaped<T, 2>({nodes * batch, depth});
|
||||||
|
auto activations =
|
||||||
|
orig_output_tensor.shaped<T, 2>({nodes * batch, depth});
|
||||||
|
|
||||||
|
Tensor* output_dnn_data;
|
||||||
|
MklShape mkl_output_mkl_shape;
|
||||||
|
mkl_output_mkl_shape.SetMklTensor(false);
|
||||||
|
mkl_output_mkl_shape.SetDimensions(4);
|
||||||
|
AllocateOutputSetMklShape(context, kIdxOutput,
|
||||||
|
&output_dnn_data,
|
||||||
|
input_gradient_tensor.shape(),
|
||||||
|
mkl_output_mkl_shape);
|
||||||
|
|
||||||
|
auto out_shaped = output_dnn_data->shaped<T, 2>({nodes * batch, depth});
|
||||||
|
out_shaped.setZero();
|
||||||
|
auto shard = [this, activations, in_shaped, grads_shaped, out_shaped,
|
||||||
|
depth](int64 begin, int64 end) {
|
||||||
|
for (int64 i = begin; i < end; ++i) {
|
||||||
|
for (int64 j = 0; j < depth; ++j) {
|
||||||
|
int64 depth_begin = std::max<int64>(0, j - depth_radius_);
|
||||||
|
int64 depth_end = std::min<int64>(depth, j + depth_radius_ + 1);
|
||||||
|
|
||||||
|
T norm(0);
|
||||||
|
for (int64 k = depth_begin; k < depth_end; ++k) {
|
||||||
|
norm += in_shaped(i, k) * in_shaped(i, k);
|
||||||
|
}
|
||||||
|
norm = alpha_ * norm + bias_;
|
||||||
|
DCHECK_GT(norm, T(1e-6));
|
||||||
|
for (int64 k = depth_begin; k < depth_end; ++k) {
|
||||||
|
T dyi = T(-2) * alpha_ * beta_ * in_shaped(i, k) *
|
||||||
|
activations(i, j) / norm;
|
||||||
|
if (k == j) {
|
||||||
|
dyi += Eigen::numext::pow(norm, -beta_);
|
||||||
|
}
|
||||||
|
dyi *= grads_shaped(i, j);
|
||||||
|
const_cast<typename TTypes<T, 2>::Tensor&>(out_shaped)(i, k) +=
|
||||||
|
dyi;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
auto worker_threads =
|
||||||
|
*(context->device()->tensorflow_cpu_worker_threads());
|
||||||
|
Shard(worker_threads.num_threads, worker_threads.workers, nodes * batch,
|
||||||
|
depth * depth, shard);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SanityCheckInputs(OpKernelContext* context) {
|
||||||
|
const Tensor& input_gradient_tensor = MklGetInput(context, kIdxGradient);
|
||||||
|
const Tensor& orig_input_tensor = MklGetInput(context, kIdxOrigInput);
|
||||||
|
const Tensor& orig_output_tensor = MklGetInput(context, kIdxOrigOutput);
|
||||||
|
const Tensor& workspace_tensor = MklGetInput(context, kIdxWorkspace);
|
||||||
|
MklDnnShape in_grads_dnn_shape, in_image_dnn_shape, out_image_dnn_shape,
|
||||||
|
workspace_dnn_shape;
|
||||||
|
GetMklShape(context, kIdxGradient, &in_grads_dnn_shape);
|
||||||
|
GetMklShape(context, kIdxOrigInput, &in_image_dnn_shape);
|
||||||
|
GetMklShape(context, kIdxOrigOutput, &out_image_dnn_shape);
|
||||||
|
GetMklShape(context, kIdxWorkspace, &workspace_dnn_shape);
|
||||||
|
if (in_grads_dnn_shape.IsMklTensor()) {
|
||||||
|
OP_REQUIRES(context, in_grads_dnn_shape.GetDimension() == 4,
|
||||||
|
errors::InvalidArgument("Input gradient must be "
|
||||||
|
"4-dimensional"));
|
||||||
|
} else {
|
||||||
|
OP_REQUIRES(context, input_gradient_tensor.dims() == 4,
|
||||||
|
errors::InvalidArgument("input gradient must be 4-dimensional"));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (in_image_dnn_shape.IsMklTensor()) {
|
||||||
|
OP_REQUIRES(context, in_image_dnn_shape.GetDimension() == 4,
|
||||||
|
errors::InvalidArgument("input images must be "
|
||||||
|
"4-dimensional"));
|
||||||
|
} else {
|
||||||
|
OP_REQUIRES(context, orig_input_tensor.dims() == 4,
|
||||||
|
errors::InvalidArgument("input images must be "
|
||||||
|
"4-dimensional"));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (out_image_dnn_shape.IsMklTensor()) {
|
||||||
|
OP_REQUIRES(context, out_image_dnn_shape.GetDimension() == 4,
|
||||||
|
errors::InvalidArgument("Output image must be "
|
||||||
|
"4-dimensional"));
|
||||||
|
} else {
|
||||||
|
OP_REQUIRES(context, orig_output_tensor.dims() == 4,
|
||||||
|
errors::InvalidArgument("Output image must be 4-dimensional"));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (workspace_dnn_shape.IsMklTensor()) {
|
||||||
|
OP_REQUIRES(context, workspace_dnn_shape.IsMklTensor() == false,
|
||||||
|
errors::InvalidArgument("Workspace should not be MKL Tensor."));
|
||||||
|
} else {
|
||||||
|
OP_REQUIRES(context, workspace_tensor.dims() == 1,
|
||||||
|
errors::InvalidArgument("Workspace must be 1-dimensional"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Input("input_grads: T")
|
||||||
|
// Input("input_image: T")
|
||||||
|
// Input("output_image: T")
|
||||||
|
// Input("workspace: uint8")
|
||||||
|
const int kIdxGradient = 0,
|
||||||
|
kIdxOrigInput = 1,
|
||||||
|
kIdxOrigOutput = 2,
|
||||||
|
kIdxWorkspace = 3,
|
||||||
|
kIdxOutput = 0;
|
||||||
|
|
||||||
|
typedef typename Eigen::Tensor<T, 1, Eigen::RowMajor>::DimensionPair DimPair;
|
||||||
|
bool workspace_enabled_;
|
||||||
|
int depth_radius_;
|
||||||
|
float bias_;
|
||||||
|
float alpha_;
|
||||||
|
float beta_;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // INTEL_MKL_DNN
|
||||||
|
|
||||||
#define REGISTER_MKL_LRN_CPU(T) \
|
#define REGISTER_MKL_LRN_CPU(T) \
|
||||||
REGISTER_KERNEL_BUILDER(Name("_MklLRN") \
|
REGISTER_KERNEL_BUILDER(Name("_MklLRN") \
|
||||||
.Device(DEVICE_CPU) \
|
.Device(DEVICE_CPU) \
|
||||||
@ -729,6 +1385,7 @@ class MklLRNGradOp : public OpKernel {
|
|||||||
.Label(mkl_op_registry::kMklOpLabel), \
|
.Label(mkl_op_registry::kMklOpLabel), \
|
||||||
MklLRNGradOp<T>);
|
MklLRNGradOp<T>);
|
||||||
|
|
||||||
|
|
||||||
TF_CALL_float(REGISTER_MKL_LRN_CPU);
|
TF_CALL_float(REGISTER_MKL_LRN_CPU);
|
||||||
|
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
||||||
|
@ -500,30 +500,81 @@ class MklReluGradOpBase : public OpKernel {
|
|||||||
// Set DNN primitives for src & diff_dst
|
// Set DNN primitives for src & diff_dst
|
||||||
memory::desc src_md({}, memory::data_undef, memory::format_undef);
|
memory::desc src_md({}, memory::data_undef, memory::format_undef);
|
||||||
memory::desc diff_dst_md({}, memory::data_undef, memory::format_undef);
|
memory::desc diff_dst_md({}, memory::data_undef, memory::format_undef);
|
||||||
if (dnn_shape_src.IsMklTensor() || dnn_shape_diff_dst.IsMklTensor()) {
|
|
||||||
if (dnn_shape_diff_dst.IsMklTensor()) {
|
// For creating Sum primitive, we need to ensure that all inputs are in
|
||||||
diff_dst_md = dnn_shape_diff_dst.GetMklLayout();
|
// same format. What that means is if we have a mixed input case - where
|
||||||
src_md = diff_dst_md;
|
// one input is in Tensorflow format and one input is in MKL format -,
|
||||||
} else {
|
// then we need to ensure that all inputs are in same format for
|
||||||
src_md = dnn_shape_src.GetMklLayout();
|
// primitive construction. For performance reason, we say that all inputs
|
||||||
diff_dst_md = src_md;
|
// are in MKL format in such case, and insert reorder for input that is
|
||||||
}
|
// in Tensorflow format into MKL format. On the other hand, if both the
|
||||||
} else {
|
// inputs are in MKL format or both are in Tensorflow format, then we
|
||||||
|
// dont need reorder.
|
||||||
|
if (!dnn_shape_src.IsMklTensor() && !dnn_shape_diff_dst.IsMklTensor()) {
|
||||||
|
// If both the inputs are in Tensorflow format, we create blocked memory
|
||||||
|
// descriptor.
|
||||||
auto src_dims = TFShapeToMklDnnDims(src_tensor.shape());
|
auto src_dims = TFShapeToMklDnnDims(src_tensor.shape());
|
||||||
auto src_strides = CalculateTFStrides(src_dims);
|
auto src_strides = CalculateTFStrides(src_dims);
|
||||||
src_md = MklDnnData<T>::CreateBlockedMemDesc(src_dims, src_strides);
|
src_md = MklDnnData<T>::CreateBlockedMemDesc(src_dims, src_strides);
|
||||||
diff_dst_md = src_md;
|
diff_dst_md = src_md;
|
||||||
|
} else if (dnn_shape_src.IsMklTensor() &&
|
||||||
|
!dnn_shape_diff_dst.IsMklTensor()) {
|
||||||
|
// If one input is in MKL format and other is in Tensorflow, then
|
||||||
|
// create respective descriptors describing the actual case. For input
|
||||||
|
// in Mkl format, we just get Mkl layout from MklDnnShape. For input in
|
||||||
|
// Tensorflow format, we create memory descriptor using data format.
|
||||||
|
src_md = dnn_shape_src.GetMklLayout();
|
||||||
|
|
||||||
|
memory::format src_mkl_data_format = dnn_shape_src.GetTfDataFormat();
|
||||||
|
auto src_tf_data_format = MklDnnDataFormatToTFDataFormat(
|
||||||
|
src_mkl_data_format);
|
||||||
|
auto diff_dst_dims = TFShapeToMklDnnDimsInNCHW(diff_dst_tensor.shape(),
|
||||||
|
src_tf_data_format);
|
||||||
|
diff_dst_md = memory::desc(diff_dst_dims, MklDnnType<T>(),
|
||||||
|
src_mkl_data_format);
|
||||||
|
} else if (!dnn_shape_src.IsMklTensor() &&
|
||||||
|
dnn_shape_diff_dst.IsMklTensor()) {
|
||||||
|
// Same comment as above.
|
||||||
|
diff_dst_md = dnn_shape_diff_dst.GetMklLayout();
|
||||||
|
|
||||||
|
memory::format diff_dst_mkl_data_format =
|
||||||
|
dnn_shape_diff_dst.GetTfDataFormat();
|
||||||
|
auto diff_dst_tf_data_format = MklDnnDataFormatToTFDataFormat(
|
||||||
|
diff_dst_mkl_data_format);
|
||||||
|
auto src_dims = TFShapeToMklDnnDimsInNCHW(src_tensor.shape(),
|
||||||
|
diff_dst_tf_data_format);
|
||||||
|
src_md = memory::desc(src_dims, MklDnnType<T>(),
|
||||||
|
diff_dst_mkl_data_format);
|
||||||
|
} else {
|
||||||
|
// If both the inputs are in MKL format, we use Mkl layout of the input
|
||||||
|
// tensors.
|
||||||
|
src_md = dnn_shape_src.GetMklLayout();
|
||||||
|
diff_dst_md = dnn_shape_diff_dst.GetMklLayout();
|
||||||
}
|
}
|
||||||
|
|
||||||
src.SetUsrMem(src_md, &src_tensor);
|
src.SetUsrMem(src_md, &src_tensor);
|
||||||
diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor);
|
diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor);
|
||||||
|
|
||||||
|
// As per comment above, we tell MKLDNN that both the inputs are in same
|
||||||
|
// format. So we set common memory descriptor in MKL format, if any of the
|
||||||
|
// inputs are in MKL format. Let's get memory descriptor that we will use
|
||||||
|
// for both the inputs.
|
||||||
|
memory::desc common_md({}, memory::data_undef, memory::format_undef);
|
||||||
|
if (dnn_shape_src.IsMklTensor() || dnn_shape_diff_dst.IsMklTensor()) {
|
||||||
|
common_md = dnn_shape_src.IsMklTensor() ? src_md : diff_dst_md;
|
||||||
|
} else {
|
||||||
|
// Since both the inputs are in Tensorflow format, and have
|
||||||
|
// same shape, we can get memory descriptor from any input.
|
||||||
|
common_md = src_md;
|
||||||
|
}
|
||||||
|
|
||||||
T alpha = 0, beta = 0;
|
T alpha = 0, beta = 0;
|
||||||
std::shared_ptr<relu_forward::primitive_desc> relu_fwd_pd;
|
std::shared_ptr<relu_forward::primitive_desc> relu_fwd_pd;
|
||||||
auto relu_fwd_desc = relu_forward::desc(prop_kind::forward_training,
|
auto relu_fwd_desc = relu_forward::desc(prop_kind::forward_training,
|
||||||
alg_kind, src_md, alpha, beta);
|
alg_kind, src_md, alpha, beta);
|
||||||
relu_fwd_pd.reset(new relu_forward::primitive_desc(relu_fwd_desc,
|
relu_fwd_pd.reset(new relu_forward::primitive_desc(relu_fwd_desc,
|
||||||
cpu_engine));
|
cpu_engine));
|
||||||
auto relu_bwd_desc = relu_backward::desc(alg_kind, diff_dst_md, src_md,
|
auto relu_bwd_desc = relu_backward::desc(alg_kind, common_md, common_md,
|
||||||
alpha, beta);
|
alpha, beta);
|
||||||
auto relu_bwd_pd = relu_backward::primitive_desc(relu_bwd_desc,
|
auto relu_bwd_pd = relu_backward::primitive_desc(relu_bwd_desc,
|
||||||
cpu_engine, *relu_fwd_pd);
|
cpu_engine, *relu_fwd_pd);
|
||||||
@ -547,9 +598,9 @@ class MklReluGradOpBase : public OpKernel {
|
|||||||
AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
|
AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
|
||||||
tf_shape_diff_src, dnn_shape_diff_src);
|
tf_shape_diff_src, dnn_shape_diff_src);
|
||||||
|
|
||||||
// diff_src memory descriptor is same as diff_dst memory descriptor.
|
// diff_src memory descriptor is same as memory descriptor for both
|
||||||
auto diff_src_md = diff_dst_md;
|
// inputs.
|
||||||
diff_src.SetUsrMem(diff_src_md, diff_src_tensor);
|
diff_src.SetUsrMem(common_md, diff_src_tensor);
|
||||||
|
|
||||||
PrepareAndExecuteNet(relu_bwd_pd, &src, &diff_src, &diff_dst);
|
PrepareAndExecuteNet(relu_bwd_pd, &src, &diff_src, &diff_dst);
|
||||||
} catch (mkldnn::error &e) {
|
} catch (mkldnn::error &e) {
|
||||||
@ -567,6 +618,14 @@ class MklReluGradOpBase : public OpKernel {
|
|||||||
MklDnnData<T>* src, MklDnnData<T>* diff_src, MklDnnData<T>*
|
MklDnnData<T>* src, MklDnnData<T>* diff_src, MklDnnData<T>*
|
||||||
diff_dst) {
|
diff_dst) {
|
||||||
std::vector<primitive> net;
|
std::vector<primitive> net;
|
||||||
|
|
||||||
|
// Check if we need to reorder original input tensors into common_md layout
|
||||||
|
// that we set for primitive creation. diff_src_primitive_desc is same as
|
||||||
|
// common_md.
|
||||||
|
src->CheckReorderToOpMem(relu_prim_desc.diff_src_primitive_desc(), &net);
|
||||||
|
diff_dst->CheckReorderToOpMem(relu_prim_desc.diff_src_primitive_desc(),
|
||||||
|
&net);
|
||||||
|
|
||||||
net.push_back(relu_backward(relu_prim_desc, src->GetOpMem(),
|
net.push_back(relu_backward(relu_prim_desc, src->GetOpMem(),
|
||||||
diff_dst->GetOpMem(), diff_src->GetOpMem()));
|
diff_dst->GetOpMem(), diff_src->GetOpMem()));
|
||||||
stream(stream::kind::eager).submit(net).wait();
|
stream(stream::kind::eager).submit(net).wait();
|
||||||
@ -622,7 +681,6 @@ class MklReluGradOp : public MklReluGradOpBase<Device, T, eltwise_relu> {
|
|||||||
MklDnnShape dnn_shape_diff_dst;
|
MklDnnShape dnn_shape_diff_dst;
|
||||||
GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
|
GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
|
||||||
|
|
||||||
int src_dims_size = src_tensor.dims();
|
|
||||||
MklDnnShape dnn_shape_diff_src;
|
MklDnnShape dnn_shape_diff_src;
|
||||||
dnn_shape_diff_src.SetMklTensor(false);
|
dnn_shape_diff_src.SetMklTensor(false);
|
||||||
AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
|
AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
|
||||||
@ -690,7 +748,6 @@ class MklEluGradOp : public MklReluGradOpBase<Device, T, eltwise_elu> {
|
|||||||
MklDnnShape dnn_shape_diff_dst;
|
MklDnnShape dnn_shape_diff_dst;
|
||||||
GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
|
GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
|
||||||
|
|
||||||
int src_dims_size = src_tensor.dims();
|
|
||||||
MklDnnShape dnn_shape_diff_src;
|
MklDnnShape dnn_shape_diff_src;
|
||||||
dnn_shape_diff_src.SetMklTensor(false);
|
dnn_shape_diff_src.SetMklTensor(false);
|
||||||
AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
|
AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
|
||||||
@ -762,7 +819,6 @@ class MklTanhGradOp : public MklReluGradOpBase<Device, T, eltwise_tanh> {
|
|||||||
MklDnnShape dnn_shape_diff_dst;
|
MklDnnShape dnn_shape_diff_dst;
|
||||||
GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
|
GetMklShape(context, diff_dst_index, &dnn_shape_diff_dst);
|
||||||
|
|
||||||
int src_dims_size = src_tensor.dims();
|
|
||||||
MklDnnShape dnn_shape_diff_src;
|
MklDnnShape dnn_shape_diff_src;
|
||||||
dnn_shape_diff_src.SetMklTensor(false);
|
dnn_shape_diff_src.SetMklTensor(false);
|
||||||
AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
|
AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor,
|
||||||
|
@ -228,7 +228,7 @@ void HandleReverseV2Case(OpKernelContext* context,
|
|||||||
result->tensor<T, NDIMS>());
|
result->tensor<T, NDIMS>());
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Device, typename T>
|
template <typename Device, typename T, typename Tidx>
|
||||||
class ReverseV2Op : public OpKernel {
|
class ReverseV2Op : public OpKernel {
|
||||||
public:
|
public:
|
||||||
explicit ReverseV2Op(OpKernelConstruction* context) : OpKernel(context) {}
|
explicit ReverseV2Op(OpKernelConstruction* context) : OpKernel(context) {}
|
||||||
@ -242,15 +242,15 @@ class ReverseV2Op : public OpKernel {
|
|||||||
} else {
|
} else {
|
||||||
const int input_dims = input.dims();
|
const int input_dims = input.dims();
|
||||||
const TensorShape& sparse_dims_shape = sparse_dims.shape();
|
const TensorShape& sparse_dims_shape = sparse_dims.shape();
|
||||||
const auto& axes_sparse_flat = sparse_dims.flat<int32>();
|
const auto& axes_sparse_flat = sparse_dims.flat<Tidx>();
|
||||||
|
|
||||||
OP_REQUIRES(context, TensorShapeUtils::IsVector(sparse_dims_shape),
|
OP_REQUIRES(context, TensorShapeUtils::IsVector(sparse_dims_shape),
|
||||||
errors::InvalidArgument("'dims' must be 1-dimension, not ",
|
errors::InvalidArgument("'dims' must be 1-dimension, not ",
|
||||||
sparse_dims.dims()));
|
sparse_dims.dims()));
|
||||||
gtl::InlinedVector<bool, 8> axes_dense(input_dims, false);
|
gtl::InlinedVector<bool, 8> axes_dense(input_dims, false);
|
||||||
for (int dummy = 0; dummy < axes_sparse_flat.size(); dummy++) {
|
for (int dummy = 0; dummy < axes_sparse_flat.size(); dummy++) {
|
||||||
int32 axis = internal::SubtleMustCopy<int32>(axes_sparse_flat(dummy));
|
Tidx axis = internal::SubtleMustCopy<Tidx>(axes_sparse_flat(dummy));
|
||||||
int32 canonical_axis = axis < 0 ? input_dims + axis : axis;
|
Tidx canonical_axis = axis < 0 ? input_dims + axis : axis;
|
||||||
OP_REQUIRES(context, canonical_axis >= 0 && canonical_axis < input_dims,
|
OP_REQUIRES(context, canonical_axis >= 0 && canonical_axis < input_dims,
|
||||||
errors::InvalidArgument("'axis'[", dummy, "] = ", axis,
|
errors::InvalidArgument("'axis'[", dummy, "] = ", axis,
|
||||||
" is out of valid range [", 0, ", ",
|
" is out of valid range [", 0, ", ",
|
||||||
@ -306,7 +306,13 @@ class ReverseV2Op : public OpKernel {
|
|||||||
.TypeConstraint<T>("T") \
|
.TypeConstraint<T>("T") \
|
||||||
.TypeConstraint<int32>("Tidx") \
|
.TypeConstraint<int32>("Tidx") \
|
||||||
.HostMemory("axis"), \
|
.HostMemory("axis"), \
|
||||||
ReverseV2Op<CPUDevice, T>)
|
ReverseV2Op<CPUDevice, T, int32>) \
|
||||||
|
REGISTER_KERNEL_BUILDER(Name("ReverseV2") \
|
||||||
|
.Device(DEVICE_CPU) \
|
||||||
|
.TypeConstraint<T>("T") \
|
||||||
|
.TypeConstraint<int64>("Tidx") \
|
||||||
|
.HostMemory("axis"), \
|
||||||
|
ReverseV2Op<CPUDevice, T, int64>)
|
||||||
TF_CALL_POD_TYPES(REGISTER_KERNELS);
|
TF_CALL_POD_TYPES(REGISTER_KERNELS);
|
||||||
TF_CALL_string(REGISTER_KERNELS);
|
TF_CALL_string(REGISTER_KERNELS);
|
||||||
#undef REGISTER_KERNELS
|
#undef REGISTER_KERNELS
|
||||||
@ -358,7 +364,13 @@ TF_CALL_complex128(DECLARE_GPU_SPEC);
|
|||||||
.TypeConstraint<T>("T") \
|
.TypeConstraint<T>("T") \
|
||||||
.TypeConstraint<int32>("Tidx") \
|
.TypeConstraint<int32>("Tidx") \
|
||||||
.HostMemory("axis"), \
|
.HostMemory("axis"), \
|
||||||
ReverseV2Op<GPUDevice, T>)
|
ReverseV2Op<GPUDevice, T, int32>) \
|
||||||
|
REGISTER_KERNEL_BUILDER(Name("ReverseV2") \
|
||||||
|
.Device(DEVICE_GPU) \
|
||||||
|
.TypeConstraint<T>("T") \
|
||||||
|
.TypeConstraint<int64>("Tidx") \
|
||||||
|
.HostMemory("axis"), \
|
||||||
|
ReverseV2Op<GPUDevice, T, int64>)
|
||||||
TF_CALL_uint8(REGISTER_GPU_KERNELS);
|
TF_CALL_uint8(REGISTER_GPU_KERNELS);
|
||||||
TF_CALL_int8(REGISTER_GPU_KERNELS);
|
TF_CALL_int8(REGISTER_GPU_KERNELS);
|
||||||
// TODO decide whether we want to enable the bool kernel.
|
// TODO decide whether we want to enable the bool kernel.
|
||||||
@ -387,7 +399,15 @@ REGISTER_KERNEL_BUILDER(Name("ReverseV2")
|
|||||||
.HostMemory("tensor")
|
.HostMemory("tensor")
|
||||||
.HostMemory("axis")
|
.HostMemory("axis")
|
||||||
.HostMemory("output"),
|
.HostMemory("output"),
|
||||||
ReverseV2Op<CPUDevice, int32>);
|
ReverseV2Op<CPUDevice, int32, int32>);
|
||||||
|
REGISTER_KERNEL_BUILDER(Name("ReverseV2")
|
||||||
|
.Device(DEVICE_GPU)
|
||||||
|
.TypeConstraint<int32>("T")
|
||||||
|
.TypeConstraint<int64>("Tidx")
|
||||||
|
.HostMemory("tensor")
|
||||||
|
.HostMemory("axis")
|
||||||
|
.HostMemory("output"),
|
||||||
|
ReverseV2Op<CPUDevice, int32, int64>);
|
||||||
#endif // GOOGLE_CUDA
|
#endif // GOOGLE_CUDA
|
||||||
|
|
||||||
#ifdef TENSORFLOW_USE_SYCL
|
#ifdef TENSORFLOW_USE_SYCL
|
||||||
@ -402,7 +422,13 @@ REGISTER_KERNEL_BUILDER(Name("ReverseV2")
|
|||||||
.TypeConstraint<T>("T") \
|
.TypeConstraint<T>("T") \
|
||||||
.TypeConstraint<int32>("Tidx") \
|
.TypeConstraint<int32>("Tidx") \
|
||||||
.HostMemory("axis"), \
|
.HostMemory("axis"), \
|
||||||
ReverseV2Op<SYCLDevice, T>)
|
ReverseV2Op<SYCLDevice, T, int32>) \
|
||||||
|
REGISTER_KERNEL_BUILDER(Name("ReverseV2") \
|
||||||
|
.Device(DEVICE_SYCL) \
|
||||||
|
.TypeConstraint<T>("T") \
|
||||||
|
.TypeConstraint<int64>("Tidx") \
|
||||||
|
.HostMemory("axis"), \
|
||||||
|
ReverseV2Op<SYCLDevice, T, int64>)
|
||||||
TF_CALL_uint8(REGISTER_SYCL_KERNELS);
|
TF_CALL_uint8(REGISTER_SYCL_KERNELS);
|
||||||
TF_CALL_int8(REGISTER_SYCL_KERNELS);
|
TF_CALL_int8(REGISTER_SYCL_KERNELS);
|
||||||
TF_CALL_float(REGISTER_SYCL_KERNELS);
|
TF_CALL_float(REGISTER_SYCL_KERNELS);
|
||||||
@ -422,6 +448,14 @@ REGISTER_KERNEL_BUILDER(Name("ReverseV2")
|
|||||||
.HostMemory("tensor")
|
.HostMemory("tensor")
|
||||||
.HostMemory("axis")
|
.HostMemory("axis")
|
||||||
.HostMemory("output"),
|
.HostMemory("output"),
|
||||||
ReverseV2Op<CPUDevice, int32>);
|
ReverseV2Op<CPUDevice, int32, int32>);
|
||||||
|
REGISTER_KERNEL_BUILDER(Name("ReverseV2")
|
||||||
|
.Device(DEVICE_SYCL)
|
||||||
|
.TypeConstraint<int32>("T")
|
||||||
|
.TypeConstraint<int64>("Tidx")
|
||||||
|
.HostMemory("tensor")
|
||||||
|
.HostMemory("axis")
|
||||||
|
.HostMemory("output"),
|
||||||
|
ReverseV2Op<CPUDevice, int32, int64>);
|
||||||
#endif // TENSORFLOW_USE_SYCL
|
#endif // TENSORFLOW_USE_SYCL
|
||||||
} // namespace tensorflow
|
} // namespace tensorflow
|
||||||
|
@ -216,7 +216,7 @@ void PopulateFromDenseGroup(OpKernelContext* ctx, const Tensor& input_tensor,
|
|||||||
result->clear();
|
result->clear();
|
||||||
auto input_flat = input_tensor.flat<T>();
|
auto input_flat = input_tensor.flat<T>();
|
||||||
const auto start = std::inner_product(
|
const auto start = std::inner_product(
|
||||||
group_indices.begin(), group_indices.end(), input_strides.begin(), 0L);
|
group_indices.begin(), group_indices.end(), input_strides.begin(), 0LL);
|
||||||
const TensorShape& input_shape = input_tensor.shape();
|
const TensorShape& input_shape = input_tensor.shape();
|
||||||
const auto end = start + input_shape.dim_size(input_shape.dims() - 1);
|
const auto end = start + input_shape.dim_size(input_shape.dims() - 1);
|
||||||
for (int64 i = start; i < end; ++i) {
|
for (int64 i = start; i < end; ++i) {
|
||||||
@ -279,7 +279,7 @@ void SetSizeOp<T>::Compute(OpKernelContext* ctx) {
|
|||||||
|
|
||||||
const auto group_key = group.group();
|
const auto group_key = group.group();
|
||||||
const auto output_index = std::inner_product(
|
const auto output_index = std::inner_product(
|
||||||
group_key.begin(), group_key.end(), output_strides.begin(), 0L);
|
group_key.begin(), group_key.end(), output_strides.begin(), 0LL);
|
||||||
out(output_index) = group_set.size();
|
out(output_index) = group_set.size();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -273,6 +273,7 @@ class MklSliceOp : public OpKernel {
|
|||||||
HANDLE_DIM(1);
|
HANDLE_DIM(1);
|
||||||
HANDLE_DIM(2);
|
HANDLE_DIM(2);
|
||||||
HANDLE_DIM(3);
|
HANDLE_DIM(3);
|
||||||
|
HANDLE_DIM(4);
|
||||||
HANDLE_DIM(5);
|
HANDLE_DIM(5);
|
||||||
HANDLE_DIM(6);
|
HANDLE_DIM(6);
|
||||||
HANDLE_DIM(7);
|
HANDLE_DIM(7);
|
||||||
|
@ -49,43 +49,15 @@ class StringToNumberOp : public OpKernel {
|
|||||||
auto output_flat = output_tensor->flat<OutputType>();
|
auto output_flat = output_tensor->flat<OutputType>();
|
||||||
|
|
||||||
for (int i = 0; i < input_flat.size(); ++i) {
|
for (int i = 0; i < input_flat.size(); ++i) {
|
||||||
Convert(input_flat(i), &output_flat(i), context);
|
OP_REQUIRES(
|
||||||
|
context,
|
||||||
|
strings::SafeStringToNumeric<OutputType>(input_flat(i).c_str(),
|
||||||
|
&output_flat(i)),
|
||||||
|
errors::InvalidArgument(kErrorMessage, input_flat(i).c_str()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
|
||||||
void Convert(const string& s, OutputType* output_data,
|
|
||||||
OpKernelContext* context);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template <>
|
|
||||||
void StringToNumberOp<float>::Convert(const string& s, float* output_data,
|
|
||||||
OpKernelContext* context) {
|
|
||||||
OP_REQUIRES(context, strings::safe_strtof(s.c_str(), output_data),
|
|
||||||
errors::InvalidArgument(kErrorMessage, s));
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void StringToNumberOp<double>::Convert(const string& s, double* output_data,
|
|
||||||
OpKernelContext* context) {
|
|
||||||
OP_REQUIRES(context, strings::safe_strtod(s.c_str(), output_data),
|
|
||||||
errors::InvalidArgument(kErrorMessage, s));
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void StringToNumberOp<int32>::Convert(const string& s, int32* output_data,
|
|
||||||
OpKernelContext* context) {
|
|
||||||
OP_REQUIRES(context, strings::safe_strto32(s, output_data),
|
|
||||||
errors::InvalidArgument(kErrorMessage, s));
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
|
||||||
void StringToNumberOp<int64>::Convert(const string& s, int64* output_data,
|
|
||||||
OpKernelContext* context) {
|
|
||||||
OP_REQUIRES(context, strings::safe_strto64(s, output_data),
|
|
||||||
errors::InvalidArgument(kErrorMessage, s));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Registers the currently supported output types.
|
// Registers the currently supported output types.
|
||||||
#define REGISTER(type) \
|
#define REGISTER(type) \
|
||||||
REGISTER_KERNEL_BUILDER(Name("StringToNumber") \
|
REGISTER_KERNEL_BUILDER(Name("StringToNumber") \
|
||||||
|
@ -55,14 +55,14 @@ namespace functor {
|
|||||||
namespace {
|
namespace {
|
||||||
template <typename T>
|
template <typename T>
|
||||||
int64 CountAccumulator(const T* begin, const T* end) {
|
int64 CountAccumulator(const T* begin, const T* end) {
|
||||||
return std::accumulate(begin, end, 0L, [](int64 accum, const T& val) {
|
return std::accumulate(begin, end, 0LL, [](int64 accum, const T& val) {
|
||||||
return accum + (val != T(0));
|
return accum + (val != T(0));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
int64 CountAccumulator<bool>(const bool* begin, const bool* end) {
|
int64 CountAccumulator<bool>(const bool* begin, const bool* end) {
|
||||||
return std::accumulate(begin, end, 0L);
|
return std::accumulate(begin, end, 0LL);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -18,6 +18,7 @@ limitations under the License.
|
|||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
#include <numeric>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
@ -122,6 +122,38 @@ bool safe_strtof(const char* str, float* value);
|
|||||||
// Values may be rounded on over- and underflow.
|
// Values may be rounded on over- and underflow.
|
||||||
bool safe_strtod(const char* str, double* value);
|
bool safe_strtod(const char* str, double* value);
|
||||||
|
|
||||||
|
inline bool ProtoParseNumeric(StringPiece s, int32* value) {
|
||||||
|
return safe_strto32(s, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool ProtoParseNumeric(StringPiece s, uint32* value) {
|
||||||
|
return safe_strtou32(s, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool ProtoParseNumeric(StringPiece s, int64* value) {
|
||||||
|
return safe_strto64(s, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool ProtoParseNumeric(StringPiece s, uint64* value) {
|
||||||
|
return safe_strtou64(s, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool ProtoParseNumeric(StringPiece s, float* value) {
|
||||||
|
return safe_strtof(s.ToString().c_str(), value);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool ProtoParseNumeric(StringPiece s, double* value) {
|
||||||
|
return safe_strtod(s.ToString().c_str(), value);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert strings to number of type T.
|
||||||
|
// Leading and trailing spaces are allowed.
|
||||||
|
// Values may be rounded on over- and underflow.
|
||||||
|
template <typename T>
|
||||||
|
bool SafeStringToNumeric(StringPiece s, T* value) {
|
||||||
|
return ProtoParseNumeric(s, value);
|
||||||
|
}
|
||||||
|
|
||||||
// Converts from an int64 to a human readable string representing the
|
// Converts from an int64 to a human readable string representing the
|
||||||
// same number, using decimal powers. e.g. 1200000 -> "1.20M".
|
// same number, using decimal powers. e.g. 1200000 -> "1.20M".
|
||||||
string HumanReadableNum(int64 value);
|
string HumanReadableNum(int64 value);
|
||||||
|
@ -118,30 +118,6 @@ class ProtoTextOutput {
|
|||||||
TF_DISALLOW_COPY_AND_ASSIGN(ProtoTextOutput);
|
TF_DISALLOW_COPY_AND_ASSIGN(ProtoTextOutput);
|
||||||
};
|
};
|
||||||
|
|
||||||
inline bool ProtoParseNumeric(StringPiece s, int32* value) {
|
|
||||||
return ::tensorflow::strings::safe_strto32(s, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool ProtoParseNumeric(StringPiece s, uint32* value) {
|
|
||||||
return ::tensorflow::strings::safe_strtou32(s, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool ProtoParseNumeric(StringPiece s, int64* value) {
|
|
||||||
return ::tensorflow::strings::safe_strto64(s, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool ProtoParseNumeric(StringPiece s, uint64* value) {
|
|
||||||
return ::tensorflow::strings::safe_strtou64(s, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool ProtoParseNumeric(StringPiece s, float* value) {
|
|
||||||
return ::tensorflow::strings::safe_strtof(s.ToString().c_str(), value);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool ProtoParseNumeric(StringPiece s, double* value) {
|
|
||||||
return ::tensorflow::strings::safe_strtod(s.ToString().c_str(), value);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void ProtoSpaceAndComments(Scanner* scanner) {
|
inline void ProtoSpaceAndComments(Scanner* scanner) {
|
||||||
for (;;) {
|
for (;;) {
|
||||||
scanner->AnySpace();
|
scanner->AnySpace();
|
||||||
@ -174,7 +150,7 @@ bool ProtoParseNumericFromScanner(Scanner* scanner, T* value) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
ProtoSpaceAndComments(scanner);
|
ProtoSpaceAndComments(scanner);
|
||||||
return ProtoParseNumeric(numeric_str, value);
|
return SafeStringToNumeric<T>(numeric_str, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parse the next boolean value from <scanner>, returning false if parsing
|
// Parse the next boolean value from <scanner>, returning false if parsing
|
||||||
|
@ -83,7 +83,7 @@ string Uppercase(StringPiece s);
|
|||||||
|
|
||||||
// Converts "^2ILoveYou!" to "i_love_you_". More specifically:
|
// Converts "^2ILoveYou!" to "i_love_you_". More specifically:
|
||||||
// - converts all non-alphanumeric characters to underscores
|
// - converts all non-alphanumeric characters to underscores
|
||||||
// - replaces each occurence of a capital letter (except the very
|
// - replaces each occurrence of a capital letter (except the very
|
||||||
// first character and if there is already an '_' before it) with '_'
|
// first character and if there is already an '_' before it) with '_'
|
||||||
// followed by this letter in lower case
|
// followed by this letter in lower case
|
||||||
// - Skips leading non-alpha characters
|
// - Skips leading non-alpha characters
|
||||||
|
@ -884,7 +884,7 @@ For example,
|
|||||||
# Draw the bounding box in an image summary.
|
# Draw the bounding box in an image summary.
|
||||||
image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
|
image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
|
||||||
bbox_for_draw)
|
bbox_for_draw)
|
||||||
tf.image_summary('images_with_box', image_with_box)
|
tf.summary.image('images_with_box', image_with_box)
|
||||||
|
|
||||||
# Employ the bounding box to distort the image.
|
# Employ the bounding box to distort the image.
|
||||||
distorted_image = tf.slice(image, begin, size)
|
distorted_image = tf.slice(image, begin, size)
|
||||||
@ -976,7 +976,7 @@ For example,
|
|||||||
# Draw the bounding box in an image summary.
|
# Draw the bounding box in an image summary.
|
||||||
image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
|
image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
|
||||||
bbox_for_draw)
|
bbox_for_draw)
|
||||||
tf.image_summary('images_with_box', image_with_box)
|
tf.summary.image('images_with_box', image_with_box)
|
||||||
|
|
||||||
# Employ the bounding box to distort the image.
|
# Employ the bounding box to distort the image.
|
||||||
distorted_image = tf.slice(image, begin, size)
|
distorted_image = tf.slice(image, begin, size)
|
||||||
|
@ -3361,7 +3361,11 @@ REGISTER_OP("_MklLRN")
|
|||||||
.Input("input: T")
|
.Input("input: T")
|
||||||
.Input("mkl_input: uint8")
|
.Input("mkl_input: uint8")
|
||||||
.Output("output: T")
|
.Output("output: T")
|
||||||
|
#ifndef INTEL_MKL_DNN
|
||||||
.Output("workspace: T")
|
.Output("workspace: T")
|
||||||
|
#else
|
||||||
|
.Output("workspace: uint8")
|
||||||
|
#endif
|
||||||
.Output("mkl_output: uint8")
|
.Output("mkl_output: uint8")
|
||||||
.Output("mkl_workspace: uint8")
|
.Output("mkl_workspace: uint8")
|
||||||
.Attr("depth_radius: int = 5")
|
.Attr("depth_radius: int = 5")
|
||||||
@ -3385,7 +3389,11 @@ REGISTER_OP("_MklLRNGrad")
|
|||||||
.Input("input_grads: T")
|
.Input("input_grads: T")
|
||||||
.Input("input_image: T")
|
.Input("input_image: T")
|
||||||
.Input("output_image: T")
|
.Input("output_image: T")
|
||||||
|
#ifndef INTEL_MKL_DNN
|
||||||
.Input("workspace: T")
|
.Input("workspace: T")
|
||||||
|
#else
|
||||||
|
.Input("workspace: uint8")
|
||||||
|
#endif
|
||||||
.Input("mkl_input_grads: uint8")
|
.Input("mkl_input_grads: uint8")
|
||||||
.Input("mkl_input_image: uint8")
|
.Input("mkl_input_image: uint8")
|
||||||
.Input("mkl_output_image: uint8")
|
.Input("mkl_output_image: uint8")
|
||||||
|
@ -11,6 +11,7 @@ load(
|
|||||||
"//tensorflow:tensorflow.bzl",
|
"//tensorflow:tensorflow.bzl",
|
||||||
"tf_cc_test",
|
"tf_cc_test",
|
||||||
"tf_copts",
|
"tf_copts",
|
||||||
|
"if_windows",
|
||||||
)
|
)
|
||||||
|
|
||||||
filegroup(
|
filegroup(
|
||||||
@ -261,6 +262,7 @@ tf_cc_test(
|
|||||||
name = "gcs_dns_cache_test",
|
name = "gcs_dns_cache_test",
|
||||||
size = "small",
|
size = "small",
|
||||||
srcs = ["gcs_dns_cache_test.cc"],
|
srcs = ["gcs_dns_cache_test.cc"],
|
||||||
|
linkopts = if_windows(["-DEFAULTLIB:ws2_32.lib"]),
|
||||||
deps = [
|
deps = [
|
||||||
":gcs_dns_cache",
|
":gcs_dns_cache",
|
||||||
"//tensorflow/core:lib",
|
"//tensorflow/core:lib",
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
load("@protobuf_archive//:protobuf.bzl", "proto_gen")
|
load("@protobuf_archive//:protobuf.bzl", "proto_gen")
|
||||||
load("@protobuf_archive//:protobuf.bzl", "py_proto_library")
|
load("@protobuf_archive//:protobuf.bzl", "py_proto_library")
|
||||||
load("//tensorflow:tensorflow.bzl", "if_not_mobile")
|
load("//tensorflow:tensorflow.bzl", "if_not_mobile")
|
||||||
|
load("//tensorflow:tensorflow.bzl", "if_windows")
|
||||||
load("//tensorflow:tensorflow.bzl", "if_not_windows")
|
load("//tensorflow:tensorflow.bzl", "if_not_windows")
|
||||||
load("//tensorflow/core:platform/default/build_config_root.bzl", "if_static")
|
load("//tensorflow/core:platform/default/build_config_root.bzl", "if_static")
|
||||||
load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
|
load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
|
||||||
@ -358,7 +359,9 @@ def tf_additional_proto_hdrs():
|
|||||||
"platform/default/integral_types.h",
|
"platform/default/integral_types.h",
|
||||||
"platform/default/logging.h",
|
"platform/default/logging.h",
|
||||||
"platform/default/protobuf.h"
|
"platform/default/protobuf.h"
|
||||||
]
|
] + if_windows([
|
||||||
|
"platform/windows/integral_types.h",
|
||||||
|
])
|
||||||
|
|
||||||
def tf_additional_proto_srcs():
|
def tf_additional_proto_srcs():
|
||||||
return [
|
return [
|
||||||
|
@ -22,8 +22,10 @@ limitations under the License.
|
|||||||
// Include appropriate platform-dependent implementations
|
// Include appropriate platform-dependent implementations
|
||||||
#if defined(PLATFORM_GOOGLE) || defined(GOOGLE_INTEGRAL_TYPES)
|
#if defined(PLATFORM_GOOGLE) || defined(GOOGLE_INTEGRAL_TYPES)
|
||||||
#include "tensorflow/core/platform/google/integral_types.h"
|
#include "tensorflow/core/platform/google/integral_types.h"
|
||||||
|
#elif defined(PLATFORM_WINDOWS)
|
||||||
|
#include "tensorflow/core/platform/windows/integral_types.h"
|
||||||
#elif defined(PLATFORM_POSIX) || defined(PLATFORM_POSIX_ANDROID) || \
|
#elif defined(PLATFORM_POSIX) || defined(PLATFORM_POSIX_ANDROID) || \
|
||||||
defined(PLATFORM_GOOGLE_ANDROID) || defined(PLATFORM_WINDOWS)
|
defined(PLATFORM_GOOGLE_ANDROID)
|
||||||
#include "tensorflow/core/platform/default/integral_types.h"
|
#include "tensorflow/core/platform/default/integral_types.h"
|
||||||
#else
|
#else
|
||||||
#error Define the appropriate PLATFORM_<foo> macro for this platform
|
#error Define the appropriate PLATFORM_<foo> macro for this platform
|
||||||
|
25
tensorflow/core/platform/windows/integral_types.h
Normal file
25
tensorflow/core/platform/windows/integral_types.h
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==============================================================================*/
|
||||||
|
|
||||||
|
#ifndef TENSORFLOW_PLATFORM_WINDOWS_INTEGRAL_TYPES_H_
|
||||||
|
#define TENSORFLOW_PLATFORM_WINDOWS_INTEGRAL_TYPES_H_
|
||||||
|
|
||||||
|
#include "tensorflow/core/platform/default/integral_types.h"
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
|
||||||
|
typedef std::ptrdiff_t ssize_t;
|
||||||
|
|
||||||
|
#endif // TENSORFLOW_PLATFORM_WINDOWS_INTEGRAL_TYPES_H_
|
@ -616,7 +616,7 @@ SparseTensor SparseTensor::Slice(const SparseTensor& input_tensor,
|
|||||||
int index = 0;
|
int index = 0;
|
||||||
for (int i = 0; i < input_tensor.indices().dim_size(0) && index < count;
|
for (int i = 0; i < input_tensor.indices().dim_size(0) && index < count;
|
||||||
i++) {
|
i++) {
|
||||||
// The logic here is similiar as the above except that the above
|
// The logic here is similar as the above except that the above
|
||||||
// only count the number of indices while here we actually generate
|
// only count the number of indices while here we actually generate
|
||||||
// the output.
|
// the output.
|
||||||
bool hit = true;
|
bool hit = true;
|
||||||
|
@ -537,7 +537,7 @@ import cv2
|
|||||||
# Use a custom OpenCV function to read the image, instead of the standard
|
# Use a custom OpenCV function to read the image, instead of the standard
|
||||||
# TensorFlow `tf.read_file()` operation.
|
# TensorFlow `tf.read_file()` operation.
|
||||||
def _read_py_function(filename, label):
|
def _read_py_function(filename, label):
|
||||||
image_decoded = cv2.imread(image_string, cv2.IMREAD_GRAYSCALE)
|
image_decoded = cv2.imread(filename.decode(), cv2.IMREAD_GRAYSCALE)
|
||||||
return image_decoded, label
|
return image_decoded, label
|
||||||
|
|
||||||
# Use standard TensorFlow operations to resize the image to a fixed shape.
|
# Use standard TensorFlow operations to resize the image to a fixed shape.
|
||||||
|
@ -120,7 +120,7 @@ def generate_batch(batch_size, num_skips, skip_window):
|
|||||||
batch[i * num_skips + j] = buffer[skip_window]
|
batch[i * num_skips + j] = buffer[skip_window]
|
||||||
labels[i * num_skips + j, 0] = buffer[context_word]
|
labels[i * num_skips + j, 0] = buffer[context_word]
|
||||||
if data_index == len(data):
|
if data_index == len(data):
|
||||||
buffer[:] = data[:span]
|
buffer.extend(data[0:span])
|
||||||
data_index = span
|
data_index = span
|
||||||
else:
|
else:
|
||||||
buffer.append(data[data_index])
|
buffer.append(data[data_index])
|
||||||
|
@ -65,6 +65,51 @@ func NewSession(graph *Graph, options *SessionOptions) (*Session, error) {
|
|||||||
return s, nil
|
return s, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Device structure contains information about a device associated with a session, as returned by ListDevices()
|
||||||
|
type Device struct {
|
||||||
|
Name, Type string
|
||||||
|
MemoryLimitBytes int64
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return list of devices associated with a Session
|
||||||
|
func (s *Session) ListDevices() ([]Device, error) {
|
||||||
|
var devices []Device
|
||||||
|
|
||||||
|
status := newStatus()
|
||||||
|
devices_list := C.TF_SessionListDevices(s.c, status.c)
|
||||||
|
if err := status.Err(); err != nil {
|
||||||
|
return nil, fmt.Errorf("SessionListDevices() failed: %v", err)
|
||||||
|
}
|
||||||
|
defer C.TF_DeleteDeviceList(devices_list)
|
||||||
|
|
||||||
|
for i := 0; i < int(C.TF_DeviceListCount(devices_list)); i++ {
|
||||||
|
device_name := C.TF_DeviceListName(devices_list, C.int(i), status.c)
|
||||||
|
if err := status.Err(); err != nil {
|
||||||
|
return nil, fmt.Errorf("DeviceListName(index=%d) failed: %v", i, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
device_type := C.TF_DeviceListType(devices_list, C.int(i), status.c)
|
||||||
|
if err := status.Err(); err != nil {
|
||||||
|
return nil, fmt.Errorf("DeviceListType(index=%d) failed: %v", i, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
memory_limit_bytes := C.TF_DeviceListMemoryBytes(devices_list, C.int(i), status.c)
|
||||||
|
if err := status.Err(); err != nil {
|
||||||
|
return nil, fmt.Errorf("DeviceListMemoryBytes(index=%d) failed: %v", i, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
device := Device{
|
||||||
|
Name: C.GoString(device_name),
|
||||||
|
Type: C.GoString(device_type),
|
||||||
|
MemoryLimitBytes: int64(memory_limit_bytes),
|
||||||
|
}
|
||||||
|
|
||||||
|
devices = append(devices, device)
|
||||||
|
}
|
||||||
|
|
||||||
|
return devices, nil
|
||||||
|
}
|
||||||
|
|
||||||
// Run the graph with the associated session starting with the supplied feeds
|
// Run the graph with the associated session starting with the supplied feeds
|
||||||
// to compute the value of the requested fetches. Runs, but does not return
|
// to compute the value of the requested fetches. Runs, but does not return
|
||||||
// Tensors for operations specified in targets.
|
// Tensors for operations specified in targets.
|
||||||
|
@ -283,3 +283,19 @@ func TestSessionConfig(t *testing.T) {
|
|||||||
t.Fatalf("Got %v, want -1", output[0].Value())
|
t.Fatalf("Got %v, want -1", output[0].Value())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestListDevices(t *testing.T) {
|
||||||
|
s, err := NewSession(NewGraph(), nil)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("NewSession(): %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
devices, err := s.ListDevices()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("ListDevices(): %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(devices) == 0 {
|
||||||
|
t.Fatalf("no devices detected")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -263,6 +263,7 @@ _allowed_symbols.extend([
|
|||||||
'GIT_VERSION',
|
'GIT_VERSION',
|
||||||
'COMPILER_VERSION',
|
'COMPILER_VERSION',
|
||||||
'CXX11_ABI_FLAG',
|
'CXX11_ABI_FLAG',
|
||||||
|
'MONOLITHIC_BUILD',
|
||||||
])
|
])
|
||||||
|
|
||||||
# Remove all extra symbols that don't have a docstring or are not explicitly
|
# Remove all extra symbols that don't have a docstring or are not explicitly
|
||||||
@ -282,6 +283,7 @@ _exported_dunders = set([
|
|||||||
'__git_version__',
|
'__git_version__',
|
||||||
'__compiler_version__',
|
'__compiler_version__',
|
||||||
'__cxx11_abi_flag__',
|
'__cxx11_abi_flag__',
|
||||||
|
'__monolithic_build__',
|
||||||
])
|
])
|
||||||
|
|
||||||
# Expose symbols minus dunders, unless they are whitelisted above.
|
# Expose symbols minus dunders, unless they are whitelisted above.
|
||||||
|
@ -77,6 +77,7 @@ class SessionClusterSpecPropagationTest(test_util.TensorFlowTestCase):
|
|||||||
config = config_pb2.ConfigProto(cluster_def=cluster_def)
|
config = config_pb2.ConfigProto(cluster_def=cluster_def)
|
||||||
|
|
||||||
with ops.Graph().as_default() as g, ops.device('/job:worker/task:1'):
|
with ops.Graph().as_default() as g, ops.device('/job:worker/task:1'):
|
||||||
|
with ops.device('/cpu:0'):
|
||||||
const = constant_op.constant(17)
|
const = constant_op.constant(17)
|
||||||
sess = session.Session(server1.target, config=config, graph=g)
|
sess = session.Session(server1.target, config=config, graph=g)
|
||||||
run_options = config_pb2.RunOptions(
|
run_options = config_pb2.RunOptions(
|
||||||
|
@ -100,6 +100,9 @@ tensorflow::ImportNumpy();
|
|||||||
// _GLIBCXX_USE_CXX11_ABI flag value
|
// _GLIBCXX_USE_CXX11_ABI flag value
|
||||||
%constant const int __cxx11_abi_flag__ = tf_cxx11_abi_flag();
|
%constant const int __cxx11_abi_flag__ = tf_cxx11_abi_flag();
|
||||||
|
|
||||||
|
// Flag indicating whether the build is monolithic
|
||||||
|
%constant const int __monolithic_build__ = tf_monolithic_build();
|
||||||
|
|
||||||
// Release the Python GIL for the duration of most methods.
|
// Release the Python GIL for the duration of most methods.
|
||||||
%exception {
|
%exception {
|
||||||
Py_BEGIN_ALLOW_THREADS;
|
Py_BEGIN_ALLOW_THREADS;
|
||||||
|
@ -28,7 +28,7 @@ models:
|
|||||||
|
|
||||||
* Easy access through session wrappers
|
* Easy access through session wrappers
|
||||||
* Easy integration with common high-level APIs, such as
|
* Easy integration with common high-level APIs, such as
|
||||||
[tf-learn](https://www.tensorflow.org/get_started/tflearn) and
|
[TensorFlow Estimators](https://www.tensorflow.org/programmers_guide/estimators) and
|
||||||
[Keras](https://keras.io/)
|
[Keras](https://keras.io/)
|
||||||
* Inspection of runtime tensor values and node connections
|
* Inspection of runtime tensor values and node connections
|
||||||
* Conditional breaking after runs that generate tensors satisfying given
|
* Conditional breaking after runs that generate tensors satisfying given
|
||||||
|
@ -20,6 +20,7 @@ from __future__ import print_function
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
|
import sys
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@ -765,6 +766,10 @@ class FunctionTest(test.TestCase):
|
|||||||
# We added more randomness to function names in C API.
|
# We added more randomness to function names in C API.
|
||||||
# TODO(iga): Remove this if statement when we switch to C API.
|
# TODO(iga): Remove this if statement when we switch to C API.
|
||||||
if ops._USE_C_API: # pylint: disable=protected-access
|
if ops._USE_C_API: # pylint: disable=protected-access
|
||||||
|
if sys.byteorder == 'big':
|
||||||
|
self.assertEqual("Foo_kEdkAG8SJvg",
|
||||||
|
Foo.instantiate([dtypes.float32] * 3).name)
|
||||||
|
else:
|
||||||
self.assertEqual("Foo_aCYSbwBkR5A",
|
self.assertEqual("Foo_aCYSbwBkR5A",
|
||||||
Foo.instantiate([dtypes.float32] * 3).name)
|
Foo.instantiate([dtypes.float32] * 3).name)
|
||||||
else:
|
else:
|
||||||
|
@ -25,11 +25,13 @@ __version__ = pywrap_tensorflow.__version__
|
|||||||
__git_version__ = pywrap_tensorflow.__git_version__
|
__git_version__ = pywrap_tensorflow.__git_version__
|
||||||
__compiler_version__ = pywrap_tensorflow.__compiler_version__
|
__compiler_version__ = pywrap_tensorflow.__compiler_version__
|
||||||
__cxx11_abi_flag__ = pywrap_tensorflow.__cxx11_abi_flag__
|
__cxx11_abi_flag__ = pywrap_tensorflow.__cxx11_abi_flag__
|
||||||
|
__monolithic_build__ = pywrap_tensorflow.__monolithic_build__
|
||||||
|
|
||||||
VERSION = __version__
|
VERSION = __version__
|
||||||
GIT_VERSION = __git_version__
|
GIT_VERSION = __git_version__
|
||||||
COMPILER_VERSION = __compiler_version__
|
COMPILER_VERSION = __compiler_version__
|
||||||
CXX11_ABI_FLAG = __cxx11_abi_flag__
|
CXX11_ABI_FLAG = __cxx11_abi_flag__
|
||||||
|
MONOLITHIC_BUILD = __monolithic_build__
|
||||||
|
|
||||||
GRAPH_DEF_VERSION = pywrap_tensorflow.GRAPH_DEF_VERSION
|
GRAPH_DEF_VERSION = pywrap_tensorflow.GRAPH_DEF_VERSION
|
||||||
GRAPH_DEF_VERSION_MIN_CONSUMER = (
|
GRAPH_DEF_VERSION_MIN_CONSUMER = (
|
||||||
@ -42,6 +44,7 @@ __all__ = [
|
|||||||
"__git_version__",
|
"__git_version__",
|
||||||
"__compiler_version__",
|
"__compiler_version__",
|
||||||
"__cxx11_abi_flag__",
|
"__cxx11_abi_flag__",
|
||||||
|
"__monolithic_build__",
|
||||||
"COMPILER_VERSION",
|
"COMPILER_VERSION",
|
||||||
"CXX11_ABI_FLAG",
|
"CXX11_ABI_FLAG",
|
||||||
"GIT_VERSION",
|
"GIT_VERSION",
|
||||||
@ -49,4 +52,5 @@ __all__ = [
|
|||||||
"GRAPH_DEF_VERSION_MIN_CONSUMER",
|
"GRAPH_DEF_VERSION_MIN_CONSUMER",
|
||||||
"GRAPH_DEF_VERSION_MIN_PRODUCER",
|
"GRAPH_DEF_VERSION_MIN_PRODUCER",
|
||||||
"VERSION",
|
"VERSION",
|
||||||
|
"MONOLITHIC_BUILD",
|
||||||
]
|
]
|
||||||
|
@ -2198,6 +2198,7 @@ cuda_py_test(
|
|||||||
srcs = ["atrous_convolution_test.py"],
|
srcs = ["atrous_convolution_test.py"],
|
||||||
additional_deps = [
|
additional_deps = [
|
||||||
"//third_party/py/numpy",
|
"//third_party/py/numpy",
|
||||||
|
"//tensorflow/python:array_ops",
|
||||||
"//tensorflow/python:client_testlib",
|
"//tensorflow/python:client_testlib",
|
||||||
"//tensorflow/python:framework_for_generated_wrappers",
|
"//tensorflow/python:framework_for_generated_wrappers",
|
||||||
"//tensorflow/python:nn_grad",
|
"//tensorflow/python:nn_grad",
|
||||||
|
@ -277,8 +277,10 @@ class ReverseV2Test(test_util.TensorFlowTestCase):
|
|||||||
x_np = np.array([1, 200, 3, 40, 5], dtype=np_dtype)
|
x_np = np.array([1, 200, 3, 40, 5], dtype=np_dtype)
|
||||||
|
|
||||||
for use_gpu in [False, True]:
|
for use_gpu in [False, True]:
|
||||||
|
for axis_dtype in [dtypes.int32, dtypes.int64]:
|
||||||
with self.test_session(use_gpu=use_gpu):
|
with self.test_session(use_gpu=use_gpu):
|
||||||
x_tf = array_ops.reverse_v2(x_np, [0]).eval()
|
x_tf = array_ops.reverse_v2(x_np,
|
||||||
|
constant_op.constant([0], dtype=axis_dtype)).eval()
|
||||||
self.assertAllEqual(x_tf, np.asarray(x_np)[::-1])
|
self.assertAllEqual(x_tf, np.asarray(x_np)[::-1])
|
||||||
|
|
||||||
def _reverse2DimAuto(self, np_dtype):
|
def _reverse2DimAuto(self, np_dtype):
|
||||||
@ -286,12 +288,18 @@ class ReverseV2Test(test_util.TensorFlowTestCase):
|
|||||||
|
|
||||||
for reverse_f in [array_ops.reverse_v2, array_ops.reverse]:
|
for reverse_f in [array_ops.reverse_v2, array_ops.reverse]:
|
||||||
for use_gpu in [False, True]:
|
for use_gpu in [False, True]:
|
||||||
|
for axis_dtype in [dtypes.int32, dtypes.int64]:
|
||||||
with self.test_session(use_gpu=use_gpu):
|
with self.test_session(use_gpu=use_gpu):
|
||||||
x_tf_1 = reverse_f(x_np, [0]).eval()
|
x_tf_1 = reverse_f(x_np,
|
||||||
x_tf_2 = reverse_f(x_np, [-2]).eval()
|
constant_op.constant([0], dtype=axis_dtype)).eval()
|
||||||
x_tf_3 = reverse_f(x_np, [1]).eval()
|
x_tf_2 = reverse_f(x_np,
|
||||||
x_tf_4 = reverse_f(x_np, [-1]).eval()
|
constant_op.constant([-2], dtype=axis_dtype)).eval()
|
||||||
x_tf_5 = reverse_f(x_np, [1, 0]).eval()
|
x_tf_3 = reverse_f(x_np,
|
||||||
|
constant_op.constant([1], dtype=axis_dtype)).eval()
|
||||||
|
x_tf_4 = reverse_f(x_np,
|
||||||
|
constant_op.constant([-1], dtype=axis_dtype)).eval()
|
||||||
|
x_tf_5 = reverse_f(x_np,
|
||||||
|
constant_op.constant([1, 0], dtype=axis_dtype)).eval()
|
||||||
self.assertAllEqual(x_tf_1, np.asarray(x_np)[::-1, :])
|
self.assertAllEqual(x_tf_1, np.asarray(x_np)[::-1, :])
|
||||||
self.assertAllEqual(x_tf_2, np.asarray(x_np)[::-1, :])
|
self.assertAllEqual(x_tf_2, np.asarray(x_np)[::-1, :])
|
||||||
self.assertAllEqual(x_tf_3, np.asarray(x_np)[:, ::-1])
|
self.assertAllEqual(x_tf_3, np.asarray(x_np)[:, ::-1])
|
||||||
|
@ -26,6 +26,7 @@ from tensorflow.python.eager import context
|
|||||||
from tensorflow.python.framework import constant_op
|
from tensorflow.python.framework import constant_op
|
||||||
from tensorflow.python.framework import dtypes
|
from tensorflow.python.framework import dtypes
|
||||||
from tensorflow.python.framework import test_util
|
from tensorflow.python.framework import test_util
|
||||||
|
from tensorflow.python.ops import array_ops
|
||||||
from tensorflow.python.ops import gradient_checker
|
from tensorflow.python.ops import gradient_checker
|
||||||
from tensorflow.python.ops import nn_ops
|
from tensorflow.python.ops import nn_ops
|
||||||
import tensorflow.python.ops.nn_grad # pylint: disable=unused-import
|
import tensorflow.python.ops.nn_grad # pylint: disable=unused-import
|
||||||
@ -108,6 +109,18 @@ class AtrousConvolutionTest(test.TestCase):
|
|||||||
|
|
||||||
add_check(check, y1, y2)
|
add_check(check, y1, y2)
|
||||||
|
|
||||||
|
def test_unknown_spatial_dims_for_channel_last_format(self):
|
||||||
|
x = array_ops.placeholder(dtypes.float32, [1, None, None, 10])
|
||||||
|
w = array_ops.zeros([3, 3, 10, 20])
|
||||||
|
y = nn_ops.convolution(x, w, "VALID", dilation_rate=[2, 2], data_format="NHWC")
|
||||||
|
self.assertEqual(y.shape.as_list(), [1, None, None, 20])
|
||||||
|
|
||||||
|
def test_unknown_spatial_dims_for_channel_first_format(self):
|
||||||
|
x = array_ops.placeholder(dtypes.float32, [1, 10, None, None])
|
||||||
|
w = array_ops.zeros([3, 3, 10, 20])
|
||||||
|
y = nn_ops.convolution(x, w, "VALID", dilation_rate=[2, 2], data_format="NCHW")
|
||||||
|
self.assertEqual(y.shape.as_list(), [1, 20, None, None])
|
||||||
|
|
||||||
@test_util.run_in_graph_and_eager_modes()
|
@test_util.run_in_graph_and_eager_modes()
|
||||||
def testAtrousConvolution2D(self):
|
def testAtrousConvolution2D(self):
|
||||||
with self._delay_checks() as add_check:
|
with self._delay_checks() as add_check:
|
||||||
|
@ -18,6 +18,8 @@ from __future__ import absolute_import
|
|||||||
from __future__ import division
|
from __future__ import division
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
||||||
|
from tensorflow.python.framework import constant_op
|
||||||
|
from tensorflow.python.framework import dtypes
|
||||||
from tensorflow.python.ops.gen_array_ops import _broadcast_args
|
from tensorflow.python.ops.gen_array_ops import _broadcast_args
|
||||||
from tensorflow.python.ops.gen_array_ops import _broadcast_gradient_args
|
from tensorflow.python.ops.gen_array_ops import _broadcast_gradient_args
|
||||||
from tensorflow.python.platform import test
|
from tensorflow.python.platform import test
|
||||||
@ -135,6 +137,19 @@ class BcastOpsTest(test.TestCase):
|
|||||||
self.assertAllEqual(r0, [0, 1, 3])
|
self.assertAllEqual(r0, [0, 1, 3])
|
||||||
self.assertAllEqual(r1, [])
|
self.assertAllEqual(r1, [])
|
||||||
|
|
||||||
|
def testDataTypes(self):
|
||||||
|
for dtype in [dtypes.int32, dtypes.int64]:
|
||||||
|
r = self._GetBroadcastShape(
|
||||||
|
constant_op.constant([2, 3, 5], dtype=dtype),
|
||||||
|
constant_op.constant([1], dtype=dtype))
|
||||||
|
self.assertAllEqual(r, [2, 3, 5])
|
||||||
|
|
||||||
|
r0, r1 = self._GetGradientArgs(
|
||||||
|
constant_op.constant([2, 3, 5], dtype=dtype),
|
||||||
|
constant_op.constant([1], dtype=dtype))
|
||||||
|
self.assertAllEqual(r0, [])
|
||||||
|
self.assertAllEqual(r1, [0, 1, 2])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
test.main()
|
test.main()
|
||||||
|
@ -26,7 +26,6 @@ from tensorflow.python.ops import data_flow_ops
|
|||||||
from tensorflow.python.ops import variables
|
from tensorflow.python.ops import variables
|
||||||
from tensorflow.python.platform import test
|
from tensorflow.python.platform import test
|
||||||
|
|
||||||
|
|
||||||
class RecordInputOpTest(test.TestCase):
|
class RecordInputOpTest(test.TestCase):
|
||||||
|
|
||||||
def generateTestData(self,
|
def generateTestData(self,
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user