diff --git a/.bazelrc b/.bazelrc
index 590a87f5732..7a32ca68e40 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -30,6 +30,10 @@ build:monolithic --define framework_shared_object=false
# opts in to modular op registration support by default.
build --define framework_shared_object=true
+# Flags for open source build, always set to be true.
+build --define open_source_build=true
+test --define open_source_build=true
+
# Please note that MKL on MacOS or windows is still not supported.
# If you would like to use a local MKL instead of downloading, please set the
# environment variable "TF_MKL_ROOT" every time before build.
@@ -108,6 +112,10 @@ build --spawn_strategy=standalone
build --strategy=Genrule=standalone
build -c opt
+# By default, build TF in C++ 14 mode.
+build --cxxopt=-std=c++14
+build --host_cxxopt=-std=c++14
+
# Make Bazel print out all options from rc files.
build --announce_rc
diff --git a/CODEOWNERS b/CODEOWNERS
index 2828cf3baf8..25ff318d2d8 100644
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -1,13 +1,14 @@
# Where component owners are known, add them here.
-/tensorflow/c/eager @jaingurav @alextp
+/tensorflow/c/eager @jaingaurav @alextp
/tensorflow/core/common_runtime/eager @jaingaurav @alextp
/tenosrflow/core/debug @caisq
/tensorflow/core/nccl/ @azaks2 @chsigg
/tensorflow/core/platform/windows/ @mrry
/tensorflow/core/platform/s3 @yongtang
+/tensorflow/python/autograph/ @mdanatg @kkimdev
/tensorflow/python/debug @caisq
-/tensorflow/python/eager @jaingurav @alextp
+/tensorflow/python/eager @jaingaurav @alextp
/tensorflow/python/tools/api/generator/ @annarev
/tensorflow/tensorboard/ @jart
/tensorflow/tools/docs/ @markdaoust
@@ -15,6 +16,7 @@
# contrib
# NEED OWNER: /tensorflow/contrib/all_reduce
+/tensorflow/contrib/autograph/ @mdanatg @kkimdev
/tensorflow/contrib/batching/ @alextp @chrisolston
/tensorflow/contrib/bayesflow/ @ebrevdo @rsepassi @jvdillon
/tensorflow/contrib/boosted_trees/ @sshrdp @yk5 @nataliaponomareva
@@ -26,11 +28,10 @@
/tensorflow/contrib/data/ @mrry
/tensorflow/tensorflow/contrib/distribute @joshl @priyag @sourabhbajaj @frankchn
/tensorflow/contrib/distributions/ @jvdillon @langmore @rsepassi
-/tensorflow/contrib/eager @jaingurav @alextp
+/tensorflow/contrib/eager @jaingaurav @alextp
/tensorflow/contrib/factorization/ @agarwal-ashish @xavigonzalvo
/tensorflow/contrib/ffmpeg/ @fredbertsch
/tensorflow/contrib/framework/ @ebrevdo
-/tensorflow/contrib/gan/ @joel-shor
/tensorflow/contrib/graph_editor/ @purpledog
# NEED OWNER: /tensorflow/contrib/grid_rnn/
/tensorflow/contrib/hadoop @yongtang
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
index a4647020ff7..72304bee694 100644
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -60,7 +60,13 @@ If you are experiencing or witnessing conflict, we ask you to use the following
## Reporting Violations
-Violations of the Code of Conduct can be reported to TensorFlow’s Project Stewards, Edd Wilder-James (ewj@google.com) and Sarah Novotny (sarahnovotny@google.com). The Project Steward will determine whether the Code of Conduct was violated, and will issue an appropriate sanction, possibly including a written warning or expulsion from the project, project sponsored spaces, or project forums. We ask that you make a good-faith effort to resolve your conflict via the conflict resolution policy before submitting a report.
+Violations of the Code of Conduct can be reported to TensorFlow’s Project
+Stewards, Edd Wilder-James (ewj@google.com) and Thea Lamkin
+(thealamkin@google.com). The Project Steward will determine whether the Code of
+Conduct was violated, and will issue an appropriate sanction, possibly including
+a written warning or expulsion from the project, project sponsored spaces, or
+project forums. We ask that you make a good-faith effort to resolve your
+conflict via the conflict resolution policy before submitting a report.
Violations of the Code of Conduct can occur in any setting, even those unrelated to the project. We will only consider complaints about conduct that has occurred within one year of the report.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 4ed8a8bf2b2..2b285cd91d7 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -29,7 +29,8 @@ Follow either of the two links above to access the appropriate CLA and instructi
### Contributing code
If you have improvements to TensorFlow, send us your pull requests! For those
-just getting started, Github has a [howto](https://help.github.com/articles/using-pull-requests/).
+just getting started, Github has a
+[how to](https://help.github.com/articles/using-pull-requests/).
TensorFlow team members will be assigned to review your pull requests. Once the
pull requests are approved and pass continuous integration checks, a TensorFlow
diff --git a/README.md b/README.md
index 5a66b9bb03a..1eb06225176 100644
--- a/README.md
+++ b/README.md
@@ -2,61 +2,58 @@
------------------
-
-
| **`Documentation`** |
|-----------------|
| [](https://www.tensorflow.org/api_docs/) |
-**TensorFlow** is an open source software library for numerical computation
-using data flow graphs. The graph nodes represent mathematical operations, while
-the graph edges represent the multidimensional data arrays (tensors) that flow
-between them. This flexible architecture enables you to deploy computation to
-one or more CPUs or GPUs in a desktop, server, or mobile device without
-rewriting code. TensorFlow also includes
-[TensorBoard](https://github.com/tensorflow/tensorboard), a data visualization
-toolkit.
+[TensorFlow](https://www.tensorflow.org/) is an end-to-end open source platform
+for machine learning. It has a comprehensive, flexible ecosystem of
+[tools](https://www.tensorflow.org/resources/tools),
+[libraries](https://www.tensorflow.org/resources/libraries-extensions), and
+[community](https://www.tensorflow.org/community) resources that lets
+researchers push the state-of-the-art in ML and developers easily build and
+deploy ML powered applications.
-TensorFlow was originally developed by researchers and engineers
-working on the Google Brain team within Google's Machine Intelligence Research
-organization for the purposes of conducting machine learning and deep neural
-networks research. The system is general enough to be applicable in a wide
-variety of other domains, as well.
+TensorFlow was originally developed by researchers and engineers working on the
+Google Brain team within Google's Machine Intelligence Research organization for
+the purposes of conducting machine learning and deep neural networks research.
+The system is general enough to be applicable in a wide variety of other
+domains, as well.
-TensorFlow provides stable Python and C APIs as well as non-guaranteed backwards
-compatible API's for C++, Go, Java, JavaScript, and Swift.
+TensorFlow provides stable [Python](https://www.tensorflow.org/api_docs/python)
+and [C++](https://www.tensorflow.org/api_docs/cc) APIs, as well as
+non-guaranteed backwards compatible API for
+[other languages](https://www.tensorflow.org/api_docs).
-Keep up to date with release announcements and security updates by
-subscribing to
+Keep up-to-date with release announcements and security updates by subscribing
+to
[announce@tensorflow.org](https://groups.google.com/a/tensorflow.org/forum/#!forum/announce).
+See all the [mailing lists](https://www.tensorflow.org/community/forums).
-## Installation
+## Install
+
+See the [TensorFlow install guide](https://www.tensorflow.org/install) for the
+[pip package](https://www.tensorflow.org/install/pip), to
+[enable GPU support](https://www.tensorflow.org/install/gpu), use a
+[Docker container](https://www.tensorflow.org/install/docker), and
+[build from source](https://www.tensorflow.org/install/source).
To install the current release for CPU-only:
```
-pip install tensorflow
+$ pip install tensorflow
```
-Use the GPU package for CUDA-enabled GPU cards:
+Use the GPU package for
+[CUDA-enabled GPU cards](https://www.tensorflow.org/install/gpu):
```
-pip install tensorflow-gpu
+$ pip install tensorflow-gpu
```
-*See [Installing TensorFlow](https://www.tensorflow.org/install) for detailed
-instructions, and how to build from source.*
-
-People who are a little more adventurous can also try our nightly binaries:
-
-**Nightly pip packages** * We are pleased to announce that TensorFlow now offers
-nightly pip packages under the
+*Nightly binaries are available for testing using the
[tf-nightly](https://pypi.python.org/pypi/tf-nightly) and
-[tf-nightly-gpu](https://pypi.python.org/pypi/tf-nightly-gpu) project on PyPi.
-Simply run `pip install tf-nightly` or `pip install tf-nightly-gpu` in a clean
-environment to install the nightly TensorFlow build. We support CPU and GPU
-packages on Linux, Mac, and Windows.
+[tf-nightly-gpu](https://pypi.python.org/pypi/tf-nightly-gpu) packages on PyPi.*
#### *Try your first TensorFlow program*
@@ -74,8 +71,8 @@ $ python
'Hello, TensorFlow!'
```
-Learn more examples about how to do specific tasks in TensorFlow at the
-[tutorials page of tensorflow.org](https://www.tensorflow.org/tutorials/).
+For more examples, see the
+[TensorFlow tutorials](https://www.tensorflow.org/tutorials/).
## Contribution guidelines
@@ -116,6 +113,8 @@ The TensorFlow project strives to abide by generally accepted best practices in
Build Type | Status | Artifacts
--------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------
+**Linux AMD ROCm GPU** Nightly | [](http://ml-ci.amd.com:21096/job/tensorflow-rocm-nightly) | [Nightly](http://ml-ci.amd.com:21096/job/tensorflow-rocm-nightly/lastSuccessfulBuild/)
+**Linux AMD ROCm GPU** Stable Release | [](http://ml-ci.amd.com:21096/job/tensorflow-rocm-release/) | [Release](http://ml-ci.amd.com:21096/job/tensorflow-rocm-release/lastSuccessfulBuild/)
**Linux s390x** Nightly | [](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/) | [Nightly](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/)
**Linux s390x CPU** Stable Release | [](https://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_Release_Build/) | [Release](https://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_Release_Build/)
**Linux ppc64le CPU** Nightly | [](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Build/) | [Nightly](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Nightly_Artifact/)
@@ -126,20 +125,23 @@ Build Type
**Linux CPU with Intel® MKL-DNN**
**Supports Python 2.7, 3.4, 3.5, and 3.6** | [](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-release-whl/lastStableBuild) | [1.13.1 pypi](https://pypi.org/project/intel-tensorflow/)
**Red Hat® Enterprise Linux® 7.6 CPU & GPU**
Python 2.7, 3.6 | [](https://jenkins-tensorflow.apps.ci.centos.org/job/tensorflow-rhel7-3.6/2/) | [1.13.1 pypi](https://tensorflow.pypi.thoth-station.ninja/index/)
-## For more information
+## Resources
-* [TensorFlow Website](https://www.tensorflow.org)
-* [TensorFlow Tutorials](https://www.tensorflow.org/tutorials/)
-* [TensorFlow Model Zoo](https://github.com/tensorflow/models)
+* [TensorFlow.org](https://www.tensorflow.org)
+* [TensorFlow tutorials](https://www.tensorflow.org/tutorials/)
+* [TensorFlow official models](https://github.com/tensorflow/models/tree/master/official)
+* [TensorFlow examples](https://github.com/tensorflow/examples)
+* [TensorFlow in Practice from Coursera](https://www.coursera.org/specializations/tensorflow-in-practice)
+* [TensorFlow blog](https://blog.tensorflow.org)
* [TensorFlow Twitter](https://twitter.com/tensorflow)
-* [TensorFlow Blog](https://blog.tensorflow.org)
-* [TensorFlow Course at Stanford](https://web.stanford.edu/class/cs20si)
-* [TensorFlow Roadmap](https://www.tensorflow.org/community/roadmap)
-* [TensorFlow White Papers](https://www.tensorflow.org/about/bib)
-* [TensorFlow YouTube Channel](https://www.youtube.com/channel/UC0rqucBdTuFTjJiefW5t-IQ)
-* [TensorFlow Visualization Toolkit](https://github.com/tensorflow/tensorboard)
+* [TensorFlow YouTube](https://www.youtube.com/channel/UC0rqucBdTuFTjJiefW5t-IQ)
+* [TensorFlow roadmap](https://www.tensorflow.org/community/roadmap)
+* [TensorFlow white papers](https://www.tensorflow.org/about/bib)
+* [TensorBoard visualization toolkit](https://github.com/tensorflow/tensorboard)
-Learn more about the TensorFlow community at the [community page of tensorflow.org](https://www.tensorflow.org/community) for a few ways to participate.
+Learn more about the
+[TensorFlow community](https://www.tensorflow.org/community) and how to
+[contribute](https://www.tensorflow.org/community/contribute).
## License
diff --git a/RELEASE.md b/RELEASE.md
index 6a4c2d6486d..801b9c8a2c8 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -43,6 +43,11 @@
* Transitive dependencies on :pooling_ops were removed. Some users may need to
add explicit dependencies on :pooling_ops if they reference the operators
from that library.
+* tf.keras.optimizers default learning rate changes:
+ * Adadelta: 1.000 to 0.001
+ * Adagrad: 0.01 to 0.001
+ * Adamax: 0.002 to 0.001
+ * NAdam: 0.002 to 0.001
## Bug Fixes and Other Changes
@@ -746,7 +751,7 @@ Ag Ramesh, Alex Wiltschko, Alexander Pantyukhin, Amogh Mannekote, An Jiaoyang, A
and [programmers guide page](http://tensorflow.org/versions/r1.9/programmers_guide/keras).
* Update `tf.keras` to the Keras 2.1.6 API.
* Added [`tf.keras.layers.CuDNNGRU`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/keras/layers/CuDNNGRU) and [`tf.keras.layers.CuDNNLSTM`](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/keras/layers/CuDNNLSTM) layers. [Try it](https://colab.sandbox.google.com/github/tensorflow/tensorflow/blob/master/tensorflow/contrib/eager/python/examples/nmt_with_attention/nmt_with_attention.ipynb?linkId=53292082).
-* Adding support of core [feature columns](https://www.tensorflow.org/get_started/feature_columns) and [losses](https://www.tensorflow.org/api_docs/python/tf/losses) to [gradient boosted trees estimators](https://github.com/tensorflow/models/tree/master/official/boosted_trees).
+* Adding support of core [feature columns](https://www.tensorflow.org/get_started/feature_columns) and [losses](https://www.tensorflow.org/api_docs/python/tf/losses) to [gradient boosted trees estimators](https://github.com/tensorflow/models/tree/master/official/r1/boosted_trees).
* The [python interface](https://www.tensorflow.org/versions/r1.9/api_docs/python/tf/lite)
for the [TFLite Optimizing Converter](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/toco/README.md)
has been expanded, and the command line interface (AKA: `toco`, `tflite_convert`) is once again
diff --git a/WORKSPACE b/WORKSPACE
index 43312f350d6..74ea14d0fd7 100644
--- a/WORKSPACE
+++ b/WORKSPACE
@@ -7,7 +7,7 @@ http_archive(
sha256 = "5b00383d08dd71f28503736db0500b6fb4dda47489ff5fc6bed42557c07c6ba9",
strip_prefix = "rules_closure-308b05b2419edb5c8ee0471b67a40403df940149",
urls = [
- "http://mirror.tensorflow.org/github.com/bazelbuild/rules_closure/archive/308b05b2419edb5c8ee0471b67a40403df940149.tar.gz",
+ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/bazelbuild/rules_closure/archive/308b05b2419edb5c8ee0471b67a40403df940149.tar.gz",
"https://github.com/bazelbuild/rules_closure/archive/308b05b2419edb5c8ee0471b67a40403df940149.tar.gz", # 2019-06-13
],
)
@@ -49,9 +49,14 @@ remote_config_workspace()
# Apple and Swift rules.
http_archive(
name = "build_bazel_rules_apple",
- sha256 = "23792cd999f97fc97284d1c44cb1324bfdd0bc54aa68ad513fa3705aca3b1f9e",
- urls = ["https://github.com/bazelbuild/rules_apple/releases/download/0.15.0/rules_apple.0.15.0.tar.gz"],
+ sha256 = "6efdde60c91724a2be7f89b0c0a64f01138a45e63ba5add2dca2645d981d23a1",
+ urls = ["https://github.com/bazelbuild/rules_apple/releases/download/0.17.2/rules_apple.0.17.2.tar.gz"],
) # https://github.com/bazelbuild/rules_apple/releases
+http_archive(
+ name = "build_bazel_rules_swift",
+ sha256 = "96a86afcbdab215f8363e65a10cf023b752e90b23abf02272c4fc668fcb70311",
+ urls = ["https://github.com/bazelbuild/rules_swift/releases/download/0.11.1/rules_swift.0.11.1.tar.gz"],
+) # https://github.com/bazelbuild/rules_swift/releases
http_archive(
name = "build_bazel_apple_support",
sha256 = "7356dbd44dea71570a929d1d4731e870622151a5f27164d966dda97305f33471",
@@ -62,11 +67,6 @@ http_archive(
sha256 = "2ef429f5d7ce7111263289644d233707dba35e39696377ebab8b0bc701f7818e",
urls = ["https://github.com/bazelbuild/bazel-skylib/releases/download/0.8.0/bazel-skylib.0.8.0.tar.gz"],
) # https://github.com/bazelbuild/bazel-skylib/releases
-http_archive(
- name = "build_bazel_rules_swift",
- sha256 = "9efe9699e9765e6b4a5e063e4a08f6b163cccaf0443f775d935baf5c3cd6ed0e",
- urls = ["https://github.com/bazelbuild/rules_swift/releases/download/0.9.0/rules_swift.0.9.0.tar.gz"],
-) # https://github.com/bazelbuild/rules_swift/releases
http_archive(
name = "com_github_apple_swift_swift_protobuf",
type = "zip",
@@ -104,8 +104,7 @@ http_archive(
build_file = "//:models.BUILD",
sha256 = "7efe12a8363f09bc24d7b7a450304a15655a57a7751929b2c1593a71183bb105",
urls = [
- "http://storage.googleapis.com/download.tensorflow.org/models/inception_v1.zip",
- "http://download.tensorflow.org/models/inception_v1.zip",
+ "https://storage.googleapis.com/download.tensorflow.org/models/inception_v1.zip",
],
)
@@ -114,8 +113,7 @@ http_archive(
build_file = "//:models.BUILD",
sha256 = "bddd81ea5c80a97adfac1c9f770e6f55cbafd7cce4d3bbe15fbeb041e6b8f3e8",
urls = [
- "http://storage.googleapis.com/download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_android_export.zip",
- "http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_android_export.zip",
+ "https://storage.googleapis.com/download.tensorflow.org/models/object_detection/ssd_mobilenet_v1_android_export.zip",
],
)
@@ -124,8 +122,7 @@ http_archive(
build_file = "//:models.BUILD",
sha256 = "859edcddf84dddb974c36c36cfc1f74555148e9c9213dedacf1d6b613ad52b96",
urls = [
- "http://storage.googleapis.com/download.tensorflow.org/models/mobile_multibox_v1a.zip",
- "http://download.tensorflow.org/models/mobile_multibox_v1a.zip",
+ "https://storage.googleapis.com/download.tensorflow.org/models/mobile_multibox_v1a.zip",
],
)
@@ -134,8 +131,7 @@ http_archive(
build_file = "//:models.BUILD",
sha256 = "3d374a730aef330424a356a8d4f04d8a54277c425e274ecb7d9c83aa912c6bfa",
urls = [
- "http://storage.googleapis.com/download.tensorflow.org/models/stylize_v1.zip",
- "http://download.tensorflow.org/models/stylize_v1.zip",
+ "https://storage.googleapis.com/download.tensorflow.org/models/stylize_v1.zip",
],
)
@@ -144,7 +140,6 @@ http_archive(
build_file = "//:models.BUILD",
sha256 = "c3ec4fea3158eb111f1d932336351edfe8bd515bb6e87aad4f25dbad0a600d0c",
urls = [
- "http://storage.googleapis.com/download.tensorflow.org/models/speech_commands_v0.01.zip",
- "http://download.tensorflow.org/models/speech_commands_v0.01.zip",
+ "https://storage.googleapis.com/download.tensorflow.org/models/speech_commands_v0.01.zip",
],
)
diff --git a/configure.py b/configure.py
index 64022101e97..a01d952bb1e 100644
--- a/configure.py
+++ b/configure.py
@@ -1145,78 +1145,6 @@ def set_trisycl_include_dir(environ_cp):
write_action_env_to_bazelrc('TRISYCL_INCLUDE_DIR', trisycl_include_dir)
-def set_mpi_home(environ_cp):
- """Set MPI_HOME."""
-
- default_mpi_home = which('mpirun') or which('mpiexec') or ''
- default_mpi_home = os.path.dirname(os.path.dirname(default_mpi_home))
-
- def valid_mpi_path(mpi_home):
- exists = (
- os.path.exists(os.path.join(mpi_home, 'include')) and
- (os.path.exists(os.path.join(mpi_home, 'lib')) or
- os.path.exists(os.path.join(mpi_home, 'lib64')) or
- os.path.exists(os.path.join(mpi_home, 'lib32'))))
- if not exists:
- print(
- 'Invalid path to the MPI Toolkit. %s or %s or %s or %s cannot be found'
- % (os.path.join(mpi_home, 'include'),
- os.path.exists(os.path.join(mpi_home, 'lib')),
- os.path.exists(os.path.join(mpi_home, 'lib64')),
- os.path.exists(os.path.join(mpi_home, 'lib32'))))
- return exists
-
- _ = prompt_loop_or_load_from_env(
- environ_cp,
- var_name='MPI_HOME',
- var_default=default_mpi_home,
- ask_for_var='Please specify the MPI toolkit folder.',
- check_success=valid_mpi_path,
- error_msg='',
- suppress_default_error=True)
-
-
-def set_other_mpi_vars(environ_cp):
- """Set other MPI related variables."""
- # Link the MPI header files
- mpi_home = environ_cp.get('MPI_HOME')
- symlink_force('%s/include/mpi.h' % mpi_home, 'third_party/mpi/mpi.h')
-
- # Determine if we use OpenMPI or MVAPICH, these require different header files
- # to be included here to make bazel dependency checker happy
- if os.path.exists(os.path.join(mpi_home, 'include/mpi_portable_platform.h')):
- symlink_force(
- os.path.join(mpi_home, 'include/mpi_portable_platform.h'),
- 'third_party/mpi/mpi_portable_platform.h')
- # TODO(gunan): avoid editing files in configure
- sed_in_place('third_party/mpi/mpi.bzl', 'MPI_LIB_IS_OPENMPI = False',
- 'MPI_LIB_IS_OPENMPI = True')
- else:
- # MVAPICH / MPICH
- symlink_force(
- os.path.join(mpi_home, 'include/mpio.h'), 'third_party/mpi/mpio.h')
- symlink_force(
- os.path.join(mpi_home, 'include/mpicxx.h'), 'third_party/mpi/mpicxx.h')
- # TODO(gunan): avoid editing files in configure
- sed_in_place('third_party/mpi/mpi.bzl', 'MPI_LIB_IS_OPENMPI = True',
- 'MPI_LIB_IS_OPENMPI = False')
-
- if os.path.exists(os.path.join(mpi_home, 'lib/libmpi.so')):
- symlink_force(
- os.path.join(mpi_home, 'lib/libmpi.so'), 'third_party/mpi/libmpi.so')
- elif os.path.exists(os.path.join(mpi_home, 'lib64/libmpi.so')):
- symlink_force(
- os.path.join(mpi_home, 'lib64/libmpi.so'), 'third_party/mpi/libmpi.so')
- elif os.path.exists(os.path.join(mpi_home, 'lib32/libmpi.so')):
- symlink_force(
- os.path.join(mpi_home, 'lib32/libmpi.so'), 'third_party/mpi/libmpi.so')
-
- else:
- raise ValueError(
- 'Cannot find the MPI library file in %s/lib or %s/lib64 or %s/lib32' %
- (mpi_home, mpi_home, mpi_home))
-
-
def system_specific_test_config(env):
"""Add default build and test flags required for TF tests to bazelrc."""
write_to_bazelrc('test --flaky_test_attempts=3')
@@ -1549,11 +1477,6 @@ def main():
raise UserInputError('SYCL / CUDA / ROCm are mututally exclusive. '
'At most 1 GPU platform can be configured.')
- set_build_var(environ_cp, 'TF_NEED_MPI', 'MPI', 'with_mpi_support', False)
- if environ_cp.get('TF_NEED_MPI') == '1':
- set_mpi_home(environ_cp)
- set_other_mpi_vars(environ_cp)
-
set_cc_opt_flags(environ_cp)
set_system_libs_flag(environ_cp)
if is_windows():
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 61539c5e586..4d34f9849b7 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -7,7 +7,7 @@ load("//tensorflow:tensorflow.bzl", "tf_cc_shared_object")
load("//tensorflow:tensorflow.bzl", "tf_custom_op_library_additional_deps_impl")
load("//tensorflow:tensorflow.bzl", "tf_native_cc_binary")
load(
- "//tensorflow/core:platform/default/build_config.bzl",
+ "//tensorflow/core/platform:default/build_config.bzl",
"tf_additional_binary_deps",
)
load(
@@ -356,6 +356,15 @@ config_setting(
},
)
+# Flag to indicate open source build, .bazelrc always has it set to be true
+config_setting(
+ name = "oss",
+ define_values = {
+ "open_source_build": "true",
+ },
+ visibility = ["//visibility:public"],
+)
+
config_setting(
name = "using_cuda_clang_with_dynamic_build",
define_values = {
@@ -364,11 +373,20 @@ config_setting(
},
)
+config_setting(
+ name = "build_oss_using_cuda_clang",
+ define_values = {
+ "using_cuda_clang": "true",
+ "open_source_build": "true",
+ },
+)
+
# Setting to use when loading kernels dynamically
config_setting(
name = "dynamic_loaded_kernels",
define_values = {
"dynamic_loaded_kernels": "true",
+ "framework_shared_object": "true",
},
visibility = ["//visibility:public"],
)
@@ -389,16 +407,18 @@ config_setting(
)
config_setting(
- name = "using_rocm_hipcc",
+ name = "build_oss_using_cuda_nvcc",
define_values = {
- "using_rocm_hipcc": "true",
+ "using_cuda_nvcc": "true",
+ "open_source_build": "true",
},
)
config_setting(
- name = "with_mpi_support",
- values = {"define": "with_mpi_support=true"},
- visibility = ["//visibility:public"],
+ name = "using_rocm_hipcc",
+ define_values = {
+ "using_rocm_hipcc": "true",
+ },
)
config_setting(
@@ -444,6 +464,7 @@ config_setting(
package_group(
name = "internal",
packages = [
+ "//perftools/accelerators/xprof/api/...",
"//tensorflow/...",
"//tensorflow_estimator/python/estimator/...",
"//tensorflow_models/official/...",
@@ -607,6 +628,7 @@ tf_cc_shared_object(
"//tensorflow/c:version_script.lds",
"//tensorflow/c/eager:c_api",
"//tensorflow/core:tensorflow",
+ "//tensorflow/core/distributed_runtime/rpc:grpc_session",
],
)
@@ -750,8 +772,8 @@ genrule(
mkdir $@
for f in $(SRCS); do
d="$${f%/*}"
- d="$${d#bazel-out*genfiles/}"
- d="$${d#*external/eigen_archive/}"
+ d="$${d#bazel-out/*/genfiles/}"
+ d="$${d#bazel-out/*/bin/}"
if [[ $${d} == *local_config_* ]]; then
continue
@@ -763,6 +785,9 @@ genrule(
if [[ $${TF_SYSTEM_LIBS:-} == *$${extname}* ]]; then
continue
fi
+
+ d="$${d#*external/farmhash_archive/src}"
+ d="$${d#*external/$${extname}/}"
fi
mkdir -p "$@/$${d}"
diff --git a/tensorflow/api_template_v1.__init__.py b/tensorflow/api_template_v1.__init__.py
index 6d1c40a2428..2962a7a60e2 100644
--- a/tensorflow/api_template_v1.__init__.py
+++ b/tensorflow/api_template_v1.__init__.py
@@ -27,11 +27,27 @@ import sys as _sys
# pylint: disable=g-bad-import-order
from tensorflow.python import pywrap_tensorflow # pylint: disable=unused-import
from tensorflow.python.tools import module_util as _module_util
+from tensorflow.python.platform import tf_logging as _logging
# API IMPORTS PLACEHOLDER
# WRAPPER_PLACEHOLDER
+if "dev" in __version__: # pylint: disable=undefined-variable
+ _logging.warning("""
+
+ TensorFlow's `tf-nightly` package will soon be updated to TensorFlow 2.0.
+
+ Please upgrade your code to TensorFlow 2.0:
+ * https://www.tensorflow.org/beta/guide/migration_guide
+
+ Or install the latest stable TensorFlow 1.X release:
+ * `pip install -U "tensorflow==1.*"`
+
+ Otherwise your code may be broken by the change.
+
+ """)
+
# Make sure directory containing top level submodules is in
# the __path__ so that "from tensorflow.foo import bar" works.
# We're using bitwise, but there's nothing special about that.
diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD
index dd5a3a08765..ffc457de4aa 100644
--- a/tensorflow/c/BUILD
+++ b/tensorflow/c/BUILD
@@ -73,7 +73,7 @@ tf_cuda_library(
"//tensorflow/core:core_cpu",
"//tensorflow/core:framework",
"//tensorflow/core:lib",
- "//tensorflow/core:lib_platform",
+ "//tensorflow/core/platform:platform",
"//tensorflow/core:op_gen_lib",
"//tensorflow/core/distributed_runtime:server_lib",
],
@@ -264,10 +264,10 @@ tf_cuda_library(
"//tensorflow/core:core_cpu",
"//tensorflow/core:framework",
"//tensorflow/core:lib",
- "//tensorflow/core:lib_platform",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core/common_runtime/eager:attr_builder",
"//tensorflow/core/distributed_runtime/rpc:grpc_server_lib",
+ "//tensorflow/core/platform",
"@com_google_absl//absl/strings",
],
)
@@ -355,6 +355,7 @@ tf_cuda_library(
deps = [
":tf_status",
":tf_status_helper",
+ ":tf_tensor_internal",
] + select({
"//tensorflow:android": [
":c_api_internal",
@@ -467,7 +468,6 @@ tf_cuda_cc_test(
"//tensorflow/core:math_ops_op_lib",
"//tensorflow/core:nn_ops_op_lib",
"//tensorflow/core:no_op_op_lib",
- "//tensorflow/core:proto_text",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:sendrecv_ops_op_lib",
"//tensorflow/core:spectral_ops_op_lib",
@@ -503,6 +503,7 @@ tf_cc_test(
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
+ "@com_google_absl//absl/types:optional",
],
)
@@ -579,7 +580,7 @@ tf_cuda_cc_test(
"//tensorflow:macos": ["-headerpad_max_install_names"],
"//conditions:default": [],
}),
- tags = ["noasan"],
+ tags = ["no_cuda_on_cpu_tap"],
# We must ensure that the dependencies can be dynamically linked since
# the shared library must be able to use core:framework.
# linkstatic = tf_kernel_tests_linkstatic(),
@@ -588,10 +589,11 @@ tf_cuda_cc_test(
":kernels",
"//tensorflow/core:framework",
"//tensorflow/core:lib",
- "//tensorflow/core:proto_text",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
+ "//tensorflow/core/kernels:ops_testutil",
+ "//third_party/eigen3",
],
)
diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc
index 62b2504a26d..ed4f10e0f77 100644
--- a/tensorflow/c/c_api.cc
+++ b/tensorflow/c/c_api.cc
@@ -1024,7 +1024,7 @@ void TF_SetAttrValueProto(TF_OperationDescription* desc, const char* attr_name,
desc->colocation_constraints.insert(location);
}
} else {
- desc->node_builder.Attr(attr_name, attr_value);
+ desc->node_builder.Attr(attr_name, std::move(attr_value));
}
status->status = Status::OK();
@@ -1045,7 +1045,8 @@ static TF_Operation* TF_FinishOperationLocked(TF_OperationDescription* desc,
std::vector(desc->colocation_constraints.begin(),
desc->colocation_constraints.end()));
}
- status->status = desc->node_builder.Finalize(&desc->graph->graph, &ret);
+ status->status = desc->node_builder.Finalize(&desc->graph->graph, &ret,
+ /*consume=*/true);
if (TF_GetCode(status) == TF_OK) {
// Run shape inference function for newly added node.
diff --git a/tensorflow/c/c_api_experimental.cc b/tensorflow/c/c_api_experimental.cc
index ad0c4068d45..f04f0175696 100644
--- a/tensorflow/c/c_api_experimental.cc
+++ b/tensorflow/c/c_api_experimental.cc
@@ -24,6 +24,8 @@ limitations under the License.
#include "tensorflow/compiler/jit/flags.h"
#include "tensorflow/core/common_runtime/eager/attr_builder.h"
#include "tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/shape_inference.h"
#include "tensorflow/core/framework/tensor.pb.h"
#include "tensorflow/core/graph/graph.h"
#include "tensorflow/core/graph/node_builder.h"
@@ -596,7 +598,10 @@ struct TF_CheckpointReader : public tensorflow::checkpoint::CheckpointReader {
TF_CheckpointReader* TF_NewCheckpointReader(const char* filename,
TF_Status* status) {
TF_CheckpointReader* reader = new TF_CheckpointReader(filename, status);
- if (!status->status.ok()) return nullptr;
+ if (!status->status.ok()) {
+ TF_DeleteCheckpointReader(reader);
+ return nullptr;
+ }
const auto& m = reader->GetVariableToDataTypeMap();
for (auto it = m.begin(); it != m.end(); ++it)
reader->variable_list.push_back(it->first);
@@ -995,3 +1000,170 @@ TFE_TensorHandle* TFE_ConsumeInputConcreteTensorFromTraceContext(
<< handle->DebugString();
return ret;
}
+
+TF_ShapeAndTypeList* TF_NewShapeAndTypeList(int num_items) {
+ TF_ShapeAndTypeList* result = new TF_ShapeAndTypeList;
+ result->num_items = num_items;
+ result->items = (num_items == 0) ? nullptr : new TF_ShapeAndType[num_items]();
+ return result;
+}
+
+void TF_ShapeAndTypeListSetShape(TF_ShapeAndTypeList* shape_list, int index,
+ const int64_t* dims, int num_dims) {
+ DCHECK(index >= 0 && index < shape_list->num_items);
+ TF_ShapeAndType& shape = shape_list->items[index];
+ DCHECK(shape.dims == nullptr) << "Shape at " << index << " is already set!";
+ DCHECK(num_dims >= 0) << "Number of dimensions cannot be negative!";
+ shape.num_dims = num_dims;
+ shape.dims = new int64_t[num_dims];
+ memcpy(shape.dims, dims, sizeof(int64_t) * num_dims);
+}
+
+void TF_ShapeAndTypeListSetUnknownShape(TF_ShapeAndTypeList* shape_list,
+ int index) {
+ DCHECK(index >= 0 && index < shape_list->num_items);
+ TF_ShapeAndType& shape = shape_list->items[index];
+ DCHECK(shape.dims == nullptr) << "Shape at " << index << " is already set!";
+ shape.num_dims = -1;
+ shape.dims = nullptr;
+}
+
+void TF_ShapeAndTypeListSetDtype(TF_ShapeAndTypeList* shape_list, int index,
+ TF_DataType dtype) {
+ DCHECK(index >= 0 && index < shape_list->num_items);
+ TF_ShapeAndType& shape_and_type = shape_list->items[index];
+ shape_and_type.dtype = dtype;
+}
+
+void TF_DeleteShapeAndTypeList(TF_ShapeAndTypeList* shape_list) {
+ if (shape_list == nullptr) return;
+ for (size_t i = 0; i < shape_list->num_items; ++i) {
+ delete[] shape_list->items[i].dims;
+ }
+ delete[] shape_list->items;
+ delete shape_list;
+}
+
+void TF_DeleteShapeAndTypeListArray(TF_ShapeAndTypeList** shape_list_array,
+ int num_items) {
+ if (shape_list_array == nullptr) return;
+ for (int i = 0; i < num_items; ++i) {
+ TF_DeleteShapeAndTypeList(shape_list_array[i]);
+ }
+ delete[] shape_list_array;
+}
+
+namespace tensorflow {
+Status TF_TensorToTensor(const TF_Tensor* src, Tensor* dst);
+} // namespace tensorflow
+
+void TFE_InferShapes(TFE_Op* tfe_op, TF_ShapeAndTypeList* input_shapes,
+ TF_Tensor** input_tensors,
+ TF_ShapeAndTypeList* input_tensors_as_shapes,
+ TF_ShapeAndTypeList** input_resource_shapes_and_types,
+ TF_ShapeAndTypeList** output_shapes,
+ TF_ShapeAndTypeList*** output_resource_shapes_and_types,
+ TF_Status* status) {
+ using tensorflow::NodeDef;
+ using tensorflow::OpRegistrationData;
+ using tensorflow::Tensor;
+ using tensorflow::shape_inference::DimensionHandle;
+ using tensorflow::shape_inference::InferenceContext;
+ using tensorflow::shape_inference::ShapeAndType;
+ using tensorflow::shape_inference::ShapeHandle;
+
+ const int num_inputs = input_shapes->num_items;
+ NodeDef node_def;
+ node_def.set_name(tfe_op->operation.Name());
+ node_def.set_op(tfe_op->operation.Name());
+ for (int i = 0; i < num_inputs; ++i) {
+ node_def.add_input("dummy_input");
+ }
+ tfe_op->operation.Attrs().FillAttrValueMap(node_def.mutable_attr());
+
+ const tensorflow::OpRegistrationData* op_reg_data;
+ status->status =
+ tensorflow::OpRegistry::Global()->LookUp(node_def.op(), &op_reg_data);
+ if (!status->status.ok()) return;
+
+ // Initialize a input_tensor vector with `nullptr` values.
+ std::vector input_tensors_vector(num_inputs, nullptr);
+ // A vector to keep track of newly created `tf::Tensor` objects.
+ std::vector all_input_tensors;
+ // Update the vector with information from `input_tensors` if provided.
+ if (input_tensors != nullptr) {
+ // Note that we take the address of the elements in `all_input_tensors`
+ // below. Allocate enough space so that no reallocation happens, which will
+ // make the pointers invalid.
+ all_input_tensors.reserve(num_inputs);
+ for (int i = 0; i < num_inputs; ++i) {
+ if (input_tensors[i] == nullptr) continue;
+ all_input_tensors.emplace_back();
+ Tensor& input_tensor = all_input_tensors.back();
+ status->status = TF_TensorToTensor(input_tensors[i], &input_tensor);
+ if (!status->status.ok()) return;
+ input_tensors_vector[i] = &input_tensor;
+ }
+ }
+
+ // Create an inference context with dummy values, which will be updated later.
+ InferenceContext c(TF_GRAPH_DEF_VERSION, &node_def, op_reg_data->op_def,
+ std::vector(num_inputs), input_tensors_vector,
+ {},
+ std::vector>>());
+
+ // Set input_shapes.
+ for (int i = 0; i < num_inputs; ++i) {
+ std::vector dims;
+ const TF_ShapeAndType& input_shape = input_shapes->items[i];
+ if (input_shape.num_dims == InferenceContext::kUnknownRank) {
+ c.SetInput(i, c.UnknownShape());
+ continue;
+ }
+ for (int j = 0; j < input_shape.num_dims; ++j) {
+ dims.push_back(c.MakeDim(input_shape.dims[j]));
+ }
+ c.SetInput(i, c.MakeShape(dims));
+ }
+
+ // TODO(bgogul): Handle input_tensors_as_shapes.
+ // TODO(bgogul): Handle input_resource_shapes_and_types.
+
+ status->status = c.construction_status();
+ if (!status->status.ok()) return;
+
+ if (op_reg_data->shape_inference_fn == nullptr) {
+ status->status =
+ InvalidArgument("No shape inference function exists for op '",
+ node_def.op(), "', did you forget to define it?");
+ return;
+ }
+
+ status->status = c.Run(op_reg_data->shape_inference_fn);
+ if (!status->status.ok()) return;
+
+ // Set output_shapes.
+ TF_ShapeAndTypeList* output_shapes_result =
+ TF_NewShapeAndTypeList(c.num_outputs());
+ for (int i = 0; i < c.num_outputs(); ++i) {
+ ShapeHandle shape_handle = c.output(i);
+ TF_ShapeAndType& shape = output_shapes_result->items[i];
+ shape.num_dims = c.Rank(shape_handle);
+ if (shape.num_dims == InferenceContext::kUnknownRank) {
+ shape.dims = nullptr;
+ continue;
+ }
+ shape.dims = new int64_t[shape.num_dims];
+ for (size_t j = 0; j < shape.num_dims; ++j) {
+ shape.dims[j] = c.Value(c.Dim(shape_handle, j));
+ }
+ }
+ if (output_shapes != nullptr) *output_shapes = output_shapes_result;
+
+ // TODO(bgogul): Set output_resource_shapes_and_types.
+}
+
+void TF_ImportGraphDefOptionsSetValidateColocationConstraints(
+ TF_ImportGraphDefOptions* opts, unsigned char enable) {
+ opts->opts.validate_colocation_constraints = enable;
+}
diff --git a/tensorflow/c/c_api_experimental.h b/tensorflow/c/c_api_experimental.h
index d91f3ab8b05..126db2640f6 100644
--- a/tensorflow/c/c_api_experimental.h
+++ b/tensorflow/c/c_api_experimental.h
@@ -343,6 +343,65 @@ TF_CAPI_EXPORT extern TFE_TensorHandle*
TFE_ConsumeInputConcreteTensorFromTraceContext(TFE_TraceContext* trace_ctx,
unsigned int idx);
+// Information about the shape of a Tensor and its type.
+struct TF_ShapeAndType {
+ // Number of dimensions. -1 indicates unknown rank.
+ int num_dims;
+ // Array of dimensions. -1 indicates unknown dim.
+ int64_t* dims;
+ // The data type. May be 0 to denote unknown type.
+ TF_DataType dtype;
+};
+
+typedef struct TF_ShapeAndType TF_ShapeAndType;
+
+// A list of TF_ShapeAndType elements..
+struct TF_ShapeAndTypeList {
+ int num_items;
+ TF_ShapeAndType* items;
+};
+typedef struct TF_ShapeAndTypeList TF_ShapeAndTypeList;
+
+// API for manipulating TF_ShapeAndTypeList objects.
+//
+TF_CAPI_EXPORT extern TF_ShapeAndTypeList* TF_NewShapeAndTypeList(
+ int num_shapes);
+TF_CAPI_EXPORT extern void TF_ShapeAndTypeListSetShape(
+ TF_ShapeAndTypeList* shape_list, int index, const int64_t* dims,
+ int num_dims);
+TF_CAPI_EXPORT extern void TF_ShapeAndTypeListSetUnknownShape(
+ TF_ShapeAndTypeList* shape_list, int index);
+TF_CAPI_EXPORT extern void TF_ShapeAndTypeListSetDtype(
+ TF_ShapeAndTypeList* shape_list, int index, TF_DataType dtype);
+TF_CAPI_EXPORT extern void TF_DeleteShapeAndTypeList(
+ TF_ShapeAndTypeList* shape_list);
+TF_CAPI_EXPORT extern void TF_DeleteShapeAndTypeListArray(
+ TF_ShapeAndTypeList** shape_list_array, int num_items);
+
+// Infer shapes for the given `op`. The arguments mimic the arguments of the
+// `shape_inference::InferenceContext` constructor. Note the following:
+// - The inputs of the `op` are not used for shape inference. So, it is
+// OK to not have the inputs properly set in `op`. See `input_tensors`
+// if you want shape inference to consider the input tensors of the
+// op for shape inference.
+// - The types need not be set in `input_shapes` as it is not used.
+// - The number of `input_tensors` should be the same as the number of items
+// in `input_shapes`.
+//
+// The results are returned in `output_shapes` and
+// `output_resource_shapes_and_types`. The caller is responsible for freeing the
+// memory in these buffers by calling `TF_DeleteShapeAndTypeList`.
+TF_CAPI_EXPORT extern void TFE_InferShapes(
+ TFE_Op* op, TF_ShapeAndTypeList* input_shapes, TF_Tensor** input_tensors,
+ TF_ShapeAndTypeList* input_tensor_as_shapes,
+ TF_ShapeAndTypeList** input_resource_shapes_and_types,
+ TF_ShapeAndTypeList** output_shapes,
+ TF_ShapeAndTypeList*** output_resource_shapes_and_types, TF_Status* status);
+
+TF_CAPI_EXPORT extern void
+TF_ImportGraphDefOptionsSetValidateColocationConstraints(
+ TF_ImportGraphDefOptions* opts, unsigned char enable);
+
#ifdef __cplusplus
} /* end extern "C" */
#endif
diff --git a/tensorflow/c/c_api_experimental_test.cc b/tensorflow/c/c_api_experimental_test.cc
index 55f3a8599fd..ed0ab7c26f8 100644
--- a/tensorflow/c/c_api_experimental_test.cc
+++ b/tensorflow/c/c_api_experimental_test.cc
@@ -14,6 +14,8 @@ limitations under the License.
==============================================================================*/
#include "tensorflow/c/c_api_experimental.h"
+
+#include "absl/types/optional.h"
#include "tensorflow/c/c_api_internal.h"
#include "tensorflow/c/c_test_util.h"
#include "tensorflow/c/eager/c_api.h"
@@ -431,5 +433,155 @@ TEST_F(AddEagerOpToGraphTest,
TFE_DeleteTensorHandle(matrix);
}
+class ShapeInferenceTest : public ::testing::Test {
+ protected:
+ ShapeInferenceTest()
+ : status_(TF_NewStatus()), tfe_context_options_(TFE_NewContextOptions()) {
+ tfe_context_ = TFE_NewContext(tfe_context_options_, status_);
+ CHECK_EQ(TF_OK, TF_GetCode(status_)) << TF_Message(status_);
+ }
+
+ ~ShapeInferenceTest() override {
+ TFE_DeleteContextOptions(tfe_context_options_);
+ TFE_DeleteContext(tfe_context_);
+ TF_DeleteStatus(status_);
+ }
+
+ // Checks the expected result of shape inference for the given `op`.
+ void CheckOutputShapes(
+ TFE_Op* op,
+ const std::vector>>& input_shapes_vec,
+ const std::vector& input_tensors,
+ const absl::optional>& expected_shape) {
+ // Create input_shapes.
+ TF_ShapeAndTypeList* input_shapes =
+ TF_NewShapeAndTypeList(input_shapes_vec.size());
+ for (size_t i = 0; i < input_shapes_vec.size(); ++i) {
+ const auto& input_shape = input_shapes_vec[i];
+ if (input_shape.has_value()) {
+ TF_ShapeAndTypeListSetShape(input_shapes, i, input_shape->data(),
+ input_shape->size());
+ } else {
+ TF_ShapeAndTypeListSetUnknownShape(input_shapes, i);
+ }
+ }
+ TF_ShapeAndTypeList* output_shapes;
+ TFE_InferShapes(op, input_shapes,
+ input_tensors.empty()
+ ? nullptr
+ : const_cast(input_tensors.data()),
+ /*input_tensors_as_shapes*/ nullptr,
+ /*input_resource_shapes_and_types*/ nullptr, &output_shapes,
+ /*output_resource_shapes_and_types*/ nullptr, status_);
+ CHECK_EQ(TF_OK, TF_GetCode(status_)) << TF_Message(status_);
+ CHECK_EQ(output_shapes->num_items, 1);
+
+ int num_dims = output_shapes->items[0].num_dims;
+ int64_t* dims = output_shapes->items[0].dims;
+
+ if (!expected_shape.has_value()) {
+ EXPECT_EQ(num_dims, -1);
+ EXPECT_EQ(dims, nullptr);
+ return;
+ }
+
+ EXPECT_EQ(num_dims, expected_shape->size());
+ for (size_t i = 0; i < num_dims; ++i) {
+ EXPECT_EQ(dims[i], (*expected_shape)[i]);
+ }
+ TF_DeleteShapeAndTypeList(input_shapes);
+ TF_DeleteShapeAndTypeList(output_shapes);
+ }
+
+ absl::optional> make_shape(
+ std::vector&& dims) const {
+ return absl::make_optional(dims);
+ }
+
+ absl::optional> unknown_shape() const {
+ return absl::nullopt;
+ }
+
+ static constexpr int64_t kUnknownDim =
+ shape_inference::InferenceContext::kUnknownDim;
+ TF_Status* status_;
+ TFE_ContextOptions* tfe_context_options_;
+ TFE_Context* tfe_context_;
+};
+
+TEST_F(ShapeInferenceTest, InfersShapesFromInputShapes) {
+ TFE_Op* matmul_op;
+ matmul_op = TFE_NewOp(tfe_context_, "MatMul", status_);
+ CHECK_EQ(TF_OK, TF_GetCode(status_)) << TF_Message(status_);
+
+ // Infer shape when everything is known.
+ CheckOutputShapes(matmul_op,
+ /*input_shapes*/ {make_shape({3, 2}), make_shape({2, 4})},
+ /*input_tensors*/ {},
+ /*expected_shape*/ make_shape({3, 4}));
+
+ // Infer shape when second operand has unknown shape.
+ CheckOutputShapes(matmul_op,
+ /*input_shapes*/ {make_shape({3, 2}), unknown_shape()},
+ /*input_tensors*/ {},
+ /*expected_shape*/ make_shape({3, kUnknownDim}));
+
+ // Infer shape when some dimensions are unknown.
+ CheckOutputShapes(
+ matmul_op,
+ /*input_shapes*/ {make_shape({kUnknownDim, 2}), make_shape({2, 4})},
+ /*input_tensors*/ {},
+ /*expected_shape*/ make_shape({kUnknownDim, 4}));
+
+ // Infer shape when everything is unknown.
+ CheckOutputShapes(matmul_op,
+ /*input_shapes*/ {unknown_shape(), unknown_shape()},
+ /*input_tensors*/ {},
+ /*expected_shape*/ make_shape({kUnknownDim, kUnknownDim}));
+
+ TFE_DeleteOp(matmul_op);
+ // TODO(bgogul): Add some death tests where status is not OK.
+}
+
+TEST_F(ShapeInferenceTest, InfersShapesFromInputTensors) {
+ // Prepare some tensors for shape.
+ TF_Tensor* tensor_1X6 = Int32Tensor({1, 6});
+ CHECK_EQ(TF_OK, TF_GetCode(status_)) << TF_Message(status_);
+ TF_Tensor* tensor_1X1X6 = Int32Tensor({1, 1, 6});
+ CHECK_EQ(TF_OK, TF_GetCode(status_)) << TF_Message(status_);
+
+ TFE_Op* reshape_op = TFE_NewOp(tfe_context_, "Reshape", status_);
+ CHECK_EQ(TF_OK, TF_GetCode(status_)) << TF_Message(status_);
+ TFE_OpSetAttrType(reshape_op, "T", TF_FLOAT);
+ TFE_OpSetAttrType(reshape_op, "Tshape", TF_INT32);
+ CheckOutputShapes(reshape_op,
+ /* input_shapes*/ {unknown_shape(), unknown_shape()},
+ /* input_tensors*/ {nullptr, tensor_1X6},
+ /*expected_shape*/ make_shape({1, 6}));
+ TFE_DeleteOp(reshape_op);
+ reshape_op = nullptr;
+
+ TFE_Op* fill_op = TFE_NewOp(tfe_context_, "Fill", status_);
+ CHECK_EQ(TF_OK, TF_GetCode(status_)) << TF_Message(status_);
+ TFE_OpSetAttrType(fill_op, "T", TF_FLOAT);
+ TFE_OpSetAttrType(fill_op, "Tshape", TF_INT32);
+
+ float five = 5.0;
+ TFE_TensorHandle* scalar = TestScalarTensorHandle(five);
+ TF_Tensor* scalarTensor = TFE_TensorHandleResolve(scalar, status_);
+ CHECK_EQ(TF_OK, TF_GetCode(status_)) << TF_Message(status_);
+ CheckOutputShapes(fill_op,
+ /* input_shapes*/ {unknown_shape(), unknown_shape()},
+ /* input_tensors*/ {tensor_1X1X6, scalarTensor},
+ /*expected_shape*/ make_shape({1, 1, 6}));
+ TFE_DeleteOp(fill_op);
+ fill_op = nullptr;
+
+ TFE_DeleteTensorHandle(scalar);
+ TF_DeleteTensor(scalarTensor);
+ TF_DeleteTensor(tensor_1X1X6);
+ TF_DeleteTensor(tensor_1X6);
+}
+
} // namespace
} // namespace tensorflow
diff --git a/tensorflow/c/c_api_function.cc b/tensorflow/c/c_api_function.cc
index 20815813d06..bb2be3db087 100644
--- a/tensorflow/c/c_api_function.cc
+++ b/tensorflow/c/c_api_function.cc
@@ -41,6 +41,7 @@ namespace {
// node names, so if necessary we add a suffix to make
// names unique. If we have an input named "A" and a node in the function
// body named "a", they will be renamed to "a" and "a_0".
+// TODO(b/139886381) Unify this and the one in graph_to_functiondef.cc
class NodeNameMapping {
public:
NodeNameMapping() = default;
@@ -64,14 +65,14 @@ class NodeNameMapping {
string Lookup(const string& name) const;
private:
- string UniquifyHelper(const string& name) const;
+ string UniquifyHelper(const string& name);
static string Normalize(string name);
// The normalized/uniquified names already used as
// input names (in signature), output names (in signature), and node names
// (in node_def).
// This is a superset of values in name_mapping_.
- std::unordered_set used_names_;
+ std::unordered_map used_names_;
// Mapping from original node name from the graph to the normalized
// and uniquified version of it.
std::unordered_map name_mapping_;
@@ -102,13 +103,16 @@ string NodeNameMapping::Normalize(string name) {
return i == n ? "unknown" : name.substr(i);
}
-string NodeNameMapping::UniquifyHelper(const string& name) const {
+string NodeNameMapping::UniquifyHelper(const string& name) {
+ auto it = used_names_.emplace(name, 0);
// If the name hasn't been used yet, use it as-is.
- if (used_names_.find(name) == used_names_.end()) return name;
+ if (it.second) return name;
+
// Add a suffix to name to make it unique.
- for (int i = 0;; ++i) {
- const string candidate = strings::StrCat(name, "_", i);
- if (used_names_.find(candidate) == used_names_.end()) return candidate;
+ while (true) {
+ const string candidate = strings::StrCat(name, "_", it.first->second);
+ it.first->second++;
+ if (used_names_.emplace(candidate, 0).second) return candidate;
}
}
@@ -120,16 +124,13 @@ string NodeNameMapping::GetInputName(const string& name) {
string NodeNameMapping::GetOutputName(const string& name) {
const string& input_name = UniquifyHelper(Normalize(name));
- // Record that we used this name, but don't add it to name_mapping_
- // since this name is not for a node.
- used_names_.insert(input_name);
+ // Don't add it to name_mapping_ since this name is not for a node.
return input_name;
}
string NodeNameMapping::Uniquify(const string& name) {
const string uniqued = UniquifyHelper(name);
name_mapping_[name] = uniqued;
- used_names_.insert(uniqued);
return uniqued;
}
@@ -139,7 +140,7 @@ Status NodeNameMapping::UseOutputName(const string& name) {
return InvalidArgument("Cannot have duplicate output names. Name '", name,
"' appears more than once in 'output_names' array.");
}
- used_names_.insert(iter, name);
+ used_names_.emplace(name, 0);
return Status::OK();
}
diff --git a/tensorflow/c/c_api_test.cc b/tensorflow/c/c_api_test.cc
index 49076039fa7..c97fa93e3a5 100644
--- a/tensorflow/c/c_api_test.cc
+++ b/tensorflow/c/c_api_test.cc
@@ -22,15 +22,16 @@ limitations under the License.
#include
#include "tensorflow/c/c_test_util.h"
+#include "tensorflow/c/tf_status.h"
#include "tensorflow/cc/saved_model/signature_constants.h"
#include "tensorflow/cc/saved_model/tag_constants.h"
#include "tensorflow/core/example/example.pb.h"
#include "tensorflow/core/example/feature.pb.h"
#include "tensorflow/core/framework/api_def.pb.h"
#include "tensorflow/core/framework/common_shape_fns.h"
-#include "tensorflow/core/framework/graph.pb_text.h"
+#include "tensorflow/core/framework/graph.pb.h"
#include "tensorflow/core/framework/kernel_def.pb.h"
-#include "tensorflow/core/framework/node_def.pb_text.h"
+#include "tensorflow/core/framework/node_def.pb.h"
#include "tensorflow/core/framework/node_def_util.h"
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/op_def.pb.h"
@@ -233,7 +234,7 @@ void TestEncodeDecode(int line, const std::vector& data) {
// Create C++ Tensor
Tensor src(tensorflow::DT_STRING, TensorShape(dims));
for (tensorflow::int64 i = 0; i < src.NumElements(); ++i) {
- src.flat()(i) = data[i];
+ src.flat()(i) = data[i];
}
TF_Tensor* dst = TF_TensorFromTensor(src, status);
ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
@@ -243,7 +244,7 @@ void TestEncodeDecode(int line, const std::vector& data) {
ASSERT_EQ(Status::OK(), TF_TensorToTensor(dst, &output)) << line;
ASSERT_EQ(src.NumElements(), output.NumElements()) << line;
for (tensorflow::int64 i = 0; i < src.NumElements(); ++i) {
- ASSERT_EQ(data[i], output.flat()(i)) << line;
+ ASSERT_EQ(data[i], output.flat()(i)) << line;
}
TF_DeleteTensor(dst);
@@ -556,7 +557,7 @@ TEST(CAPI, Graph) {
EXPECT_FALSE(found_add);
found_add = true;
} else {
- ADD_FAILURE() << "Unexpected NodeDef: " << ProtoDebugString(n);
+ ADD_FAILURE() << "Unexpected NodeDef: " << n.DebugString();
}
}
EXPECT_TRUE(found_placeholder);
@@ -581,20 +582,20 @@ TEST(CAPI, Graph) {
// Compare with first GraphDef + added NodeDef.
NodeDef* added_node = graph_def.add_node();
*added_node = node_def;
- EXPECT_EQ(ProtoDebugString(graph_def), ProtoDebugString(graph_def2));
+ EXPECT_EQ(graph_def.DebugString(), graph_def2.DebugString());
// Look up some nodes by name.
TF_Operation* neg2 = TF_GraphOperationByName(graph, "neg");
EXPECT_TRUE(neg == neg2);
NodeDef node_def2;
ASSERT_TRUE(GetNodeDef(neg2, &node_def2));
- EXPECT_EQ(ProtoDebugString(node_def), ProtoDebugString(node_def2));
+ EXPECT_EQ(node_def.DebugString(), node_def2.DebugString());
TF_Operation* feed2 = TF_GraphOperationByName(graph, "feed");
EXPECT_TRUE(feed == feed2);
ASSERT_TRUE(GetNodeDef(feed, &node_def));
ASSERT_TRUE(GetNodeDef(feed2, &node_def2));
- EXPECT_EQ(ProtoDebugString(node_def), ProtoDebugString(node_def2));
+ EXPECT_EQ(node_def.DebugString(), node_def2.DebugString());
// Test iterating through the nodes of a graph.
found_placeholder = false;
@@ -618,7 +619,7 @@ TEST(CAPI, Graph) {
found_neg = true;
} else {
ASSERT_TRUE(GetNodeDef(oper, &node_def));
- ADD_FAILURE() << "Unexpected Node: " << ProtoDebugString(node_def);
+ ADD_FAILURE() << "Unexpected Node: " << node_def.DebugString();
}
}
EXPECT_TRUE(found_placeholder);
@@ -1385,7 +1386,7 @@ TEST(CAPI, SavedModel) {
tensorflow::Example example;
auto* feature_map = example.mutable_features()->mutable_feature();
(*feature_map)["x"].mutable_float_list()->add_value(i);
- input.flat()(i) = example.SerializeAsString();
+ input.flat()(i) = example.SerializeAsString();
}
const tensorflow::string input_op_name(
@@ -2498,6 +2499,38 @@ TEST(TestKernel, TestGetRegisteredKernelsForOpNoKernels) {
#undef EXPECT_TF_META
+TEST(CAPI, TestTensorAligned) {
+ int64_t dim = 7;
+ size_t tensor_size_bytes = dim * TF_DataTypeSize(TF_FLOAT);
+ TF_Tensor* a = TF_AllocateTensor(
+ /*dtype=*/TF_FLOAT, /*dims=*/&dim, /*num_dims=*/1,
+ /*len=*/tensor_size_bytes);
+ float* data = reinterpret_cast(TF_TensorData(a));
+ for (int i = 0; i < dim; ++i) {
+ data[i] = 0;
+ }
+ if (EIGEN_MAX_ALIGN_BYTES > 0) {
+ EXPECT_TRUE(TF_TensorIsAligned(a));
+ }
+ TF_DeleteTensor(a);
+}
+
+TEST(CAPI, TestTensorIsNotAligned) {
+ // Test unaligned access via a Slice.
+ Tensor x(DT_FLOAT, TensorShape({30}));
+ x.flat().setConstant(0.0);
+
+ // Take an unaligned slice.
+ Tensor y = x.Slice(1, 13);
+ TF_Status* status = TF_NewStatus();
+ TF_Tensor* a = TF_TensorFromTensor(y, status);
+ if (EIGEN_MAX_ALIGN_BYTES > 0) {
+ EXPECT_FALSE(TF_TensorIsAligned(a));
+ }
+ TF_DeleteStatus(status);
+ TF_DeleteTensor(a);
+}
+
} // namespace
} // namespace tensorflow
diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD
index 7eddc17a8e5..5c42e508f71 100644
--- a/tensorflow/c/eager/BUILD
+++ b/tensorflow/c/eager/BUILD
@@ -8,12 +8,12 @@ load(
"tfe_xla_copts",
)
load(
- "//tensorflow/core:platform/default/build_config.bzl",
+ "//tensorflow/core/platform:default/build_config.bzl",
"tf_additional_device_tracer_test_flags",
"tf_kernel_tests_linkstatic",
)
load(
- "//tensorflow/core:platform/default/build_config_root.bzl",
+ "//tensorflow/core/platform:default/build_config_root.bzl",
"tf_cuda_tests_tags",
)
@@ -156,6 +156,7 @@ tf_cuda_cc_test(
],
deps = [
":c_api",
+ ":c_api_experimental",
":c_api_internal",
":c_api_test_util",
"//tensorflow/c:c_test_util",
@@ -235,9 +236,11 @@ tf_cuda_cc_test(
],
args =
["--heap_check=local"] + tf_additional_device_tracer_test_flags(),
+ extra_copts = tfe_xla_copts(),
linkstatic = tf_kernel_tests_linkstatic(),
tags = tf_cuda_tests_tags() + ["nomac"],
deps = [
+ ":c_api",
":c_api_experimental",
":c_api_test_util",
"//tensorflow/c:c_test_util",
diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc
index 22c1f219f38..b70f40cc46a 100644
--- a/tensorflow/c/eager/c_api.cc
+++ b/tensorflow/c/eager/c_api.cc
@@ -202,9 +202,11 @@ tensorflow::Status UpdateTFE_ContextWithServerDef(
"Currently, TFE_NewContext only supports tensorflow::GrpcServer."));
}
- LOG_AND_RETURN_IF_ERROR(grpc_server->Start());
-
- tensorflow::uint64 context_id = tensorflow::random::New64();
+ tensorflow::uint64 context_id = tensorflow::EagerContext::NewContextId();
+ // Make master eager context accessible by local eager service, which might
+ // receive send tensor requests from remote workers.
+ LOG_AND_RETURN_IF_ERROR(grpc_server->AddMasterEagerContextToEagerService(
+ context_id, ctx->context));
std::vector remote_workers;
grpc_server->master_env()->worker_cache->ListWorkers(&remote_workers);
@@ -240,9 +242,11 @@ tensorflow::Status UpdateTFE_ContextWithServerDef(
&remote_eager_workers));
// Initialize remote eager workers.
- LOG_AND_RETURN_IF_ERROR(CreateRemoteContexts(
- remote_workers, context_id, keep_alive_secs, server_def,
- remote_eager_workers.get(), ctx->context->Async(), base_request));
+ // TODO(b/138847548) Create remote eager contexts in async mode by default.
+ LOG_AND_RETURN_IF_ERROR(
+ CreateRemoteContexts(remote_workers, context_id, keep_alive_secs,
+ server_def, remote_eager_workers.get(),
+ ctx->context->Executor()->Async(), base_request));
tensorflow::RemoteRendezvous* r =
grpc_server->worker_env()->rendezvous_mgr->Find(context_id);
@@ -261,15 +265,21 @@ tensorflow::Status UpdateTFE_ContextWithServerDef(
TF_RETURN_IF_ERROR(r->Initialize(worker_session.get()));
auto* device_mgr = grpc_server->worker_env()->device_mgr;
- auto remote_mgr =
- absl::make_unique(/*is_master=*/true);
+ auto remote_mgr = absl::make_unique(
+ /*is_master=*/true, ctx->context);
- return ctx->context->InitializeRemoteMaster(
+ LOG_AND_RETURN_IF_ERROR(ctx->context->InitializeRemoteMaster(
std::move(server), grpc_server->worker_env(), worker_session,
std::move(remote_eager_workers), std::move(remote_device_mgr),
remote_workers, context_id, r, device_mgr, keep_alive_secs,
- worker_session->cluster_flr.get(), std::move(remote_mgr));
+ worker_session->cluster_flr.get(), std::move(remote_mgr)));
+
+ // NOTE: We start the server after all other initialization, because the
+ // GrpcServer cannot be destroyed after it is started.
+ LOG_AND_RETURN_IF_ERROR(grpc_server->Start());
#undef LOG_AND_RETURN_IF_ERROR
+
+ return tensorflow::Status::OK();
}
#endif // !IS_MOBILE_PLATFORM
@@ -365,12 +375,6 @@ void TFE_ContextOptionsSetDevicePlacementPolicy(
options->device_placement_policy = policy;
}
-TF_CAPI_EXPORT extern void TFE_ContextSetAsyncForThread(TFE_Context* ctx,
- unsigned char enable,
- TF_Status* status) {
- status->status = ctx->context->SetAsyncForThread(enable);
-}
-
void TFE_DeleteContextOptions(TFE_ContextOptions* options) { delete options; }
TFE_Context* TFE_NewContext(const TFE_ContextOptions* opts, TF_Status* status) {
@@ -455,18 +459,6 @@ extern TFE_ContextDevicePlacementPolicy TFE_ContextGetDevicePlacementPolicy(
ctx->context->GetDevicePlacementPolicy());
}
-void TFE_ContextAsyncWait(TFE_Context* ctx, TF_Status* status) {
- status->status = ctx->context->AsyncWait();
-}
-
-void TFE_ContextGetStatus(TFE_Context* ctx, TF_Status* status) {
- status->status = ctx->context->GetStatus();
-}
-
-void TFE_ContextAsyncClearError(TFE_Context* ctx) {
- ctx->context->ClearAsyncError();
-}
-
TFE_TensorHandle* TFE_NewTensorHandle(TF_Tensor* t, TF_Status* status) {
tensorflow::Tensor tensor;
status->status = tensorflow::TF_TensorToTensor(t, &tensor);
@@ -571,7 +563,8 @@ TF_Tensor* TFE_TensorHandleResolve(TFE_TensorHandle* h, TF_Status* status) {
const tensorflow::Tensor* t = nullptr;
tensorflow::TensorHandle* h_cpu = nullptr;
status->status = EagerCopyToDevice(
- handle, handle->Context(), handle->Context()->HostCPU(), false, &h_cpu);
+ handle, handle->Context(), handle->Context()->Executor(),
+ handle->Context()->HostCPU(), false, &h_cpu);
if (!status->status.ok()) {
return nullptr;
}
@@ -671,7 +664,7 @@ void TFE_OpAddInputList(TFE_Op* op, TFE_TensorHandle** inputs, int num_inputs,
TF_AttrType TFE_OpGetAttrType(TFE_Op* op, const char* attr_name,
unsigned char* is_list, TF_Status* status) {
- TF_AttrType ret;
+ TF_AttrType ret = TF_ATTR_INT;
status->status = tensorflow::AttrTypeByName(*op->operation.AttrTypes(),
attr_name, &ret, is_list);
return ret;
@@ -683,10 +676,11 @@ TF_AttrType TFE_OpNameGetAttrType(TFE_Context* ctx,
TF_Status* status) {
TF_AttrType ret;
TFE_Op* op = TFE_NewOp(ctx, op_or_function_name, status);
- if (!status->status.ok()) {
- return TF_ATTR_INT; // Same dummy return as TFE_OpGetAttrType.
+ if (status->status.ok()) {
+ ret = TFE_OpGetAttrType(op, attr_name, is_list, status);
+ } else {
+ ret = TF_ATTR_INT; // Same dummy return as TFE_OpGetAttrType.
}
- ret = TFE_OpGetAttrType(op, attr_name, is_list, status);
TFE_DeleteOp(op);
return ret;
}
@@ -922,6 +916,7 @@ TFE_TensorHandle* TFE_TensorHandleCopyToDevice(TFE_TensorHandle* h,
return nullptr;
}
status->status = tensorflow::EagerCopyToDevice(h->handle, ctx->context,
+ ctx->context->Executor(),
device, false, &handle);
if (status->status.ok()) {
return new TFE_TensorHandle(handle);
@@ -957,12 +952,10 @@ unsigned char TFE_ContextHasFunction(TFE_Context* ctx, const char* name) {
void TFE_ContextEnableRunMetadata(TFE_Context* ctx) {
ctx->context->SetShouldStoreGraphs(true);
- ctx->context->SetShouldStoreStepStats(true);
}
void TFE_ContextDisableRunMetadata(TFE_Context* ctx) {
ctx->context->SetShouldStoreGraphs(false);
- ctx->context->SetShouldStoreStepStats(false);
}
} // extern "C"
@@ -974,7 +967,7 @@ TFE_TensorHandle* TFE_NewTensorHandle(const tensorflow::Tensor& t,
void TFE_ContextExportRunMetadata(TFE_Context* ctx, TF_Buffer* buf,
TF_Status* status) {
- TFE_ContextAsyncWait(ctx, status);
+ status->status = ctx->context->Executor()->WaitForAllPendingNodes();
if (!status->status.ok()) return;
tensorflow::mutex_lock ml(*ctx->context->MetadataMu());
status->status = MessageToBuffer(*ctx->context->RunMetadataProto(), buf);
diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h
old mode 100755
new mode 100644
index f6850118b89..d29e66dc1b8
--- a/tensorflow/c/eager/c_api.h
+++ b/tensorflow/c/eager/c_api.h
@@ -77,7 +77,7 @@ typedef enum TFE_ContextDevicePlacementPolicy {
// LINT.ThenChange(//tensorflow/core/common_runtime/eager/context.h)
// Sets the default execution mode (sync/async). Note that this can be
-// overridden per thread using TFE_ContextSetAsyncForThread.
+// overridden per thread using TFE_ContextSetExecutorForThread.
TF_CAPI_EXPORT extern void TFE_ContextOptionsSetAsync(TFE_ContextOptions*,
unsigned char enable);
@@ -89,6 +89,9 @@ TF_CAPI_EXPORT extern void TFE_DeleteContextOptions(TFE_ContextOptions*);
// "Context" under which operations/functions are executed. It encapsulates
// things like the available devices, resource manager etc.
+// TFE_Context must outlive all tensor handles created using it. In other
+// words, TFE_DeleteContext() must be called after all tensor handles have
+// been deleted (with TFE_DeleteTensorHandle).
//
// TODO(ashankar): Merge with TF_Session?
typedef struct TFE_Context TFE_Context;
@@ -115,11 +118,6 @@ TF_CAPI_EXPORT extern void TFE_ContextSetThreadLocalDevicePlacementPolicy(
TF_CAPI_EXPORT extern TFE_ContextDevicePlacementPolicy
TFE_ContextGetDevicePlacementPolicy(TFE_Context* ctx);
-// Overrides the execution mode (sync/async) for the current thread.
-TF_CAPI_EXPORT extern void TFE_ContextSetAsyncForThread(TFE_Context* ctx,
- unsigned char enable,
- TF_Status* status);
-
// A tensorflow.ServerDef specifies remote workers (in addition to the current
// workers name). Operations created on this context can then be executed on
// any of these remote workers by setting an appropriate device.
@@ -132,25 +130,6 @@ TF_CAPI_EXPORT extern void TFE_ContextSetServerDef(TFE_Context* ctx,
size_t proto_len,
TF_Status* status);
-// Causes the calling thread to block till all ops dispatched in async mode
-// have been executed. Note that "execution" here refers to kernel execution /
-// scheduling of copies, etc. Similar to sync execution, it doesn't guarantee
-// that lower level device queues (like GPU streams) have been flushed.
-//
-// This call may not block for execution of ops enqueued concurrently with this
-// call.
-TF_CAPI_EXPORT extern void TFE_ContextAsyncWait(TFE_Context*,
- TF_Status* status);
-
-// When an error happens, any pending operations are discarded and newly issued
-// ops return an error. This call clears the error state and re-enables
-// execution of newly issued ops.
-//
-// Note that outputs of discarded ops remain in a corrupt state and should not
-// be used for future calls.
-// TODO(agarwal): mark the affected handles and raise errors if they are used.
-TF_CAPI_EXPORT extern void TFE_ContextAsyncClearError(TFE_Context*);
-
// A handle to a tensor on a device.
//
// Like a TF_Tensor, a TFE_TensorHandle refers to a tensor with a value, shape,
diff --git a/tensorflow/c/eager/c_api_experimental.cc b/tensorflow/c/eager/c_api_experimental.cc
index 32f28a0712c..a9ad77198e7 100644
--- a/tensorflow/c/eager/c_api_experimental.cc
+++ b/tensorflow/c/eager/c_api_experimental.cc
@@ -32,9 +32,7 @@ void TFE_OpConsumeInput(TFE_Op* op, TFE_TensorHandle* h, TF_Status* status) {
op->operation.ConsumeInput(h->handle);
}
-TFE_Profiler* TFE_NewProfiler(TFE_ProfilerContext* ctx) {
- return new TFE_Profiler(ctx);
-}
+TFE_Profiler* TFE_NewProfiler() { return new TFE_Profiler(); }
bool TFE_ProfilerIsOk(TFE_Profiler* profiler) {
return profiler->profiler->Status().ok();
@@ -55,23 +53,10 @@ void TFE_ProfilerSerializeToString(TFE_Profiler* profiler, TF_Buffer* buf,
};
}
-TFE_ProfilerContext* TFE_NewProfilerContext() {
- return new TFE_ProfilerContext;
-}
-
-void TFE_ProfilerContextSetEagerContext(TFE_ProfilerContext* profiler_context,
- TFE_Context* eager_context) {
- profiler_context->profiler_context.eager_context = eager_context->context;
-}
-
-void TFE_DeleteProfilerContext(TFE_ProfilerContext* profiler_context) {
- delete profiler_context;
-}
-
-void TFE_StartProfilerServer(TFE_ProfilerContext* context, int port) {
- // Release child thread intentionally. The child thread can be terminate by
+void TFE_StartProfilerServer(int port) {
+ // Release child thread intentionally. The child thread can be terminated by
// terminating the main thread.
- tensorflow::StartProfilerServer(&context->profiler_context, port).release();
+ tensorflow::StartProfilerServer(port).release();
}
void TFE_ContextEnableGraphCollection(TFE_Context* ctx) {
@@ -587,3 +572,30 @@ void TFE_OpSetCancellationManager(TFE_Op* op,
op->operation.SetCancellationManager(
&cancellation_manager->cancellation_manager);
}
+
+TFE_Executor* TFE_NewExecutor(bool is_async) {
+ return new TFE_Executor(is_async);
+}
+
+void TFE_DeleteExecutor(TFE_Executor* executor) { delete executor; }
+
+bool TFE_ExecutorIsAsync(TFE_Executor* executor) {
+ return executor->executor()->Async();
+}
+
+void TFE_ExecutorWaitForAllPendingNodes(TFE_Executor* executor,
+ TF_Status* status) {
+ status->status = executor->executor()->WaitForAllPendingNodes();
+}
+
+void TFE_ExecutorClearError(TFE_Executor* executor) {
+ executor->executor()->ClearError();
+}
+
+void TFE_ContextSetExecutorForThread(TFE_Context* ctx, TFE_Executor* executor) {
+ ctx->context->SetExecutorForThread(executor->executor());
+}
+
+TFE_Executor* TFE_ContextGetExecutorForThread(TFE_Context* ctx) {
+ return new TFE_Executor(ctx->context->Executor());
+}
diff --git a/tensorflow/c/eager/c_api_experimental.h b/tensorflow/c/eager/c_api_experimental.h
index cdf1492c0bc..e5a9459faff 100644
--- a/tensorflow/c/eager/c_api_experimental.h
+++ b/tensorflow/c/eager/c_api_experimental.h
@@ -25,8 +25,6 @@ extern "C" {
TF_CAPI_EXPORT extern void TFE_OpConsumeInput(TFE_Op* op, TFE_TensorHandle* h,
TF_Status* status);
-typedef struct TFE_ProfilerContext TFE_ProfilerContext;
-
// A profiler which will start profiling when creating the object and will stop
// when the object is destroyed. It will profile all operations run under the
// given TFE_Context. Multiple instance of it can be created, but at most one
@@ -34,7 +32,7 @@ typedef struct TFE_ProfilerContext TFE_ProfilerContext;
// Thread-safety: TFE_Profiler is thread-safe.
typedef struct TFE_Profiler TFE_Profiler;
-TF_CAPI_EXPORT extern TFE_Profiler* TFE_NewProfiler(TFE_ProfilerContext* ctx);
+TF_CAPI_EXPORT extern TFE_Profiler* TFE_NewProfiler();
TF_CAPI_EXPORT extern bool TFE_ProfilerIsOk(TFE_Profiler* profiler);
TF_CAPI_EXPORT extern void TFE_DeleteProfiler(TFE_Profiler* profiler);
@@ -44,27 +42,14 @@ TF_CAPI_EXPORT extern void TFE_ProfilerSerializeToString(TFE_Profiler* profiler,
TF_Buffer* buf,
TF_Status* status);
-// Return a new profiler context object.
-TF_CAPI_EXPORT extern TFE_ProfilerContext* TFE_NewProfilerContext(void);
-
-// Set the eager context in TFE_ProfilerServerOptions
-TF_CAPI_EXPORT extern void TFE_ProfilerContextSetEagerContext(
- TFE_ProfilerContext* profiler_context, TFE_Context* eager_context);
-
-// Destroy a profiler context object.
-TF_CAPI_EXPORT extern void TFE_DeleteProfilerContext(
- TFE_ProfilerContext* profiler_context);
-
// Start a profiler grpc server which listens to specified port. It will start
// the server on its own thread. It can be shutdown by terminating tensorflow.
// It can be used in both Eager mode and graph mode. Creating multiple profiler
// server is allowed. The service defined in
// tensorflow/contrib/tpu/profiler/tpu_profiler.proto. Please use
-// tensorflow/contrib/tpu/profiler/capture_tpu_profile to capture tracable
-// file following
-// https://cloud.google.com/tpu/docs/cloud-tpu-tools#capture_trace.
-TF_CAPI_EXPORT extern void TFE_StartProfilerServer(TFE_ProfilerContext* context,
- int port);
+// tensorflow/contrib/tpu/profiler/capture_tpu_profile to capture trace file
+// following https://cloud.google.com/tpu/docs/cloud-tpu-tools#capture_trace.
+TF_CAPI_EXPORT extern void TFE_StartProfilerServer(int port);
// Enables only graph collection in RunMetadata on the functions executed from
// this context.
@@ -367,6 +352,51 @@ TF_CAPI_EXPORT extern void TFE_OpSetCancellationManager(
TFE_Op* op, TFE_CancellationManager* cancellation_manager,
TF_Status* status);
+// -----------------------------------------------------------------------------
+// Eager Executor APIs.
+typedef struct TFE_Executor TFE_Executor;
+
+// Creates a new eager Executor. Nodes in one executor are guaranteed to be
+// executed in sequence. Assigning nodes to different executors allows executing
+// nodes in parallel.
+TF_CAPI_EXPORT extern TFE_Executor* TFE_NewExecutor(bool is_async);
+
+// Deletes the eager Executor without waiting for enqueued nodes. Please call
+// TFE_ExecutorWaitForAllPendingNodes before calling this API if you want to
+// make sure all nodes are finished.
+TF_CAPI_EXPORT extern void TFE_DeleteExecutor(TFE_Executor*);
+
+// Returns true if the executor is in async mode.
+TF_CAPI_EXPORT extern bool TFE_ExecutorIsAsync(TFE_Executor*);
+
+// Causes the calling thread to block till all ops dispatched in this executor
+// have been executed. Note that "execution" here refers to kernel execution /
+// scheduling of copies, etc. Similar to sync execution, it doesn't guarantee
+// that lower level device queues (like GPU streams) have been flushed.
+//
+// This call may not block for execution of ops enqueued concurrently with this
+// call.
+TF_CAPI_EXPORT extern void TFE_ExecutorWaitForAllPendingNodes(
+ TFE_Executor*, TF_Status* status);
+
+// When an error happens, any pending operations are discarded and newly issued
+// ops return an error. This call clears the error state and re-enables
+// execution of newly issued ops.
+//
+// Note that outputs of discarded ops remain in a corrupt state and should not
+// be used for future calls.
+// TODO(agarwal): mark the affected handles and raise errors if they are used.
+TF_CAPI_EXPORT extern void TFE_ExecutorClearError(TFE_Executor*);
+
+// Sets a custom Executor for current thread. All nodes created by this thread
+// will be added to this Executor. It will override current executor.
+TF_CAPI_EXPORT extern void TFE_ContextSetExecutorForThread(TFE_Context*,
+ TFE_Executor*);
+
+// Returns the Executor for current thread.
+TF_CAPI_EXPORT extern TFE_Executor* TFE_ContextGetExecutorForThread(
+ TFE_Context*);
+
#ifdef __cplusplus
} /* end extern "C" */
#endif
diff --git a/tensorflow/c/eager/c_api_experimental_test.cc b/tensorflow/c/eager/c_api_experimental_test.cc
index 249d6c8960b..ab76ad10adc 100644
--- a/tensorflow/c/eager/c_api_experimental_test.cc
+++ b/tensorflow/c/eager/c_api_experimental_test.cc
@@ -17,6 +17,7 @@ limitations under the License.
#include
+#include "tensorflow/c/eager/c_api.h"
#include "tensorflow/c/eager/c_api_test_util.h"
#include "tensorflow/cc/profiler/profiler.h"
#include "tensorflow/core/lib/monitoring/collection_registry.h"
@@ -43,12 +44,9 @@ void ExecuteWithProfiling(bool async) {
TFE_ContextOptions* opts = TFE_NewContextOptions();
TFE_ContextOptionsSetAsync(opts, static_cast(async));
TFE_Context* ctx = TFE_NewContext(opts, status);
- TFE_ProfilerContext* profiler_context = TFE_NewProfilerContext();
- TFE_ProfilerContextSetEagerContext(profiler_context, ctx);
- TFE_Profiler* profiler = TFE_NewProfiler(profiler_context);
+ TFE_Profiler* profiler = TFE_NewProfiler();
CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
TFE_DeleteContextOptions(opts);
- TFE_DeleteProfilerContext(profiler_context);
TFE_TensorHandle* m = TestMatrixTensorHandle();
TFE_Op* matmul = MatMulOp(ctx, m, m);
@@ -71,8 +69,10 @@ void ExecuteWithProfiling(bool async) {
ASSERT_EQ(1, num_retvals);
TF_Buffer* profiler_result = TF_NewBuffer();
if (async) {
- TFE_ContextAsyncWait(ctx, status);
+ TFE_Executor* executor = TFE_ContextGetExecutorForThread(ctx);
+ TFE_ExecutorWaitForAllPendingNodes(executor, status);
ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ TFE_DeleteExecutor(executor);
}
TFE_ProfilerSerializeToString(profiler, profiler_result, status);
TFE_DeleteProfiler(profiler);
@@ -85,7 +85,10 @@ void ExecuteWithProfiling(bool async) {
if (!gpu_device_name.empty()) {
EXPECT_TRUE(HasSubstr(profile_proto_str, "/device:GPU:0"));
// device name with "stream:all" is collected by Device Tracer.
+#ifndef TENSORFLOW_USE_ROCM
+ // ROCm platform does not yet support stream level tracing
EXPECT_TRUE(HasSubstr(profile_proto_str, "stream:all"));
+#endif
}
// "/host:CPU" is collected by TraceMe
EXPECT_TRUE(HasSubstr(profile_proto_str, "/host:CPU"));
@@ -110,27 +113,14 @@ TEST(CAPI, ExecuteWithTracing) { ExecuteWithProfiling(false); }
TEST(CAPI, ExecuteWithTracingAsync) { ExecuteWithProfiling(true); }
TEST(CAPI, MultipleProfilerSession) {
- TF_Status* status = TF_NewStatus();
- TFE_ContextOptions* opts = TFE_NewContextOptions();
- TFE_ContextOptionsSetAsync(opts, static_cast(false));
- TFE_Context* ctx = TFE_NewContext(opts, status);
- CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
- TFE_DeleteContextOptions(opts);
-
- TFE_ProfilerContext* profiler_context = TFE_NewProfilerContext();
- TFE_ProfilerContextSetEagerContext(profiler_context, ctx);
-
- TFE_Profiler* profiler1 = TFE_NewProfiler(profiler_context);
+ TFE_Profiler* profiler1 = TFE_NewProfiler();
EXPECT_TRUE(TFE_ProfilerIsOk(profiler1));
- TFE_Profiler* profiler2 = TFE_NewProfiler(profiler_context);
+ TFE_Profiler* profiler2 = TFE_NewProfiler();
EXPECT_FALSE(TFE_ProfilerIsOk(profiler2));
TFE_DeleteProfiler(profiler1);
TFE_DeleteProfiler(profiler2);
- TFE_DeleteProfilerContext(profiler_context);
- TFE_DeleteContext(ctx);
- TF_DeleteStatus(status);
}
TEST(CAPI, MonitoringCounter0) {
@@ -307,5 +297,205 @@ TEST(CAPI, CancellationManager) {
TFE_DeleteCancellationManager(c_mgr);
}
+TEST(CAPI, Function_ident_CPU) {
+ // First create a simple identity function.
+ TF_Graph* function_graph = TF_NewGraph();
+ TF_OperationDescription* arg_descr =
+ TF_NewOperation(function_graph, "Placeholder", "arg");
+ TF_SetAttrType(arg_descr, "dtype", TF_INT32);
+ TF_Status* status = TF_NewStatus();
+ TF_Operation* arg = TF_FinishOperation(arg_descr, status);
+ ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+ TF_OperationDescription* id_descr =
+ TF_NewOperation(function_graph, "Identity", "id");
+ TF_SetAttrType(id_descr, "T", TF_INT32);
+ TF_AddInput(id_descr, {arg, 0});
+ TF_Operation* id = TF_FinishOperation(id_descr, status);
+ ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+ TF_Output input{arg, 0};
+ TF_Output output{id, 0};
+ TF_Function* fn =
+ TF_GraphToFunction(function_graph, "ident", 0, 1, &id, 1, &input, 1,
+ &output, nullptr, nullptr, "test", status);
+ ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+ TF_DeleteGraph(function_graph);
+ TFE_ContextOptions* opts = TFE_NewContextOptions();
+ TFE_Context* ctx = TFE_NewContext(opts, status);
+ ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+ TFE_DeleteContextOptions(opts);
+ TFE_ContextAddFunction(ctx, fn, status);
+ ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+ TF_DeleteFunction(fn);
+
+ for (bool async : {false, true, false}) {
+ TFE_Executor* old_executor = TFE_ContextGetExecutorForThread(ctx);
+ TFE_Executor* executor = TFE_NewExecutor(async);
+ TFE_ContextSetExecutorForThread(ctx, executor);
+ CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+
+ TF_Tensor* t =
+ TF_AllocateTensor(TF_INT32, nullptr, 0, 1 * sizeof(tensorflow::int32));
+ *reinterpret_cast(TF_TensorData(t)) = 42;
+ TFE_TensorHandle* h = TFE_NewTensorHandle(t, status);
+ ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+ TF_DeleteTensor(t);
+
+ TFE_Op* op = TFE_NewOp(ctx, "ident", status);
+ ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+ TFE_OpAddInput(op, h, status);
+ ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+
+ std::vector result;
+ result.push_back(nullptr);
+ int num_retvals = 1;
+ TFE_Execute(op, result.data(), &num_retvals, status);
+ TFE_DeleteOp(op);
+ ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+ ASSERT_EQ(num_retvals, 1);
+
+ TF_Tensor* r = TFE_TensorHandleResolve(result[0], status);
+ ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+ EXPECT_EQ(*reinterpret_cast(TF_TensorData(r)), 42);
+ TFE_ContextSetExecutorForThread(ctx, old_executor);
+ TFE_ExecutorWaitForAllPendingNodes(executor, status);
+ ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ TFE_DeleteExecutor(executor);
+ TFE_DeleteExecutor(old_executor);
+ TFE_DeleteTensorHandle(h);
+ TF_DeleteTensor(r);
+ TFE_DeleteTensorHandle(result[0]);
+ }
+ TFE_ContextRemoveFunction(ctx, "ident", status);
+ ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+ TFE_DeleteContext(ctx);
+ ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+ TF_DeleteStatus(status);
+}
+
+#ifdef TENSORFLOW_EAGER_USE_XLA
+TEST(CAPI, Function_ident_XLA_CPU) {
+ // First create a simple identity function.
+ TF_Graph* function_graph = TF_NewGraph();
+ TF_OperationDescription* arg_descr =
+ TF_NewOperation(function_graph, "Placeholder", "arg");
+ TF_SetAttrType(arg_descr, "dtype", TF_INT32);
+ TF_Status* status = TF_NewStatus();
+ TF_Operation* arg = TF_FinishOperation(arg_descr, status);
+ ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+ TF_OperationDescription* id_descr =
+ TF_NewOperation(function_graph, "Identity", "id");
+ TF_SetAttrType(id_descr, "T", TF_INT32);
+ TF_AddInput(id_descr, {arg, 0});
+ TF_Operation* id = TF_FinishOperation(id_descr, status);
+ ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+ TF_Output input{arg, 0};
+ TF_Output output{id, 0};
+ TF_Function* fn =
+ TF_GraphToFunction(function_graph, "ident", 0, 1, &id, 1, &input, 1,
+ &output, nullptr, nullptr, "test", status);
+ ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+ TF_DeleteGraph(function_graph);
+ TFE_ContextOptions* opts = TFE_NewContextOptions();
+ TFE_Context* ctx = TFE_NewContext(opts, status);
+ ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+ TFE_DeleteContextOptions(opts);
+ TFE_ContextAddFunction(ctx, fn, status);
+ ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+ TF_DeleteFunction(fn);
+
+ for (bool async : {false, true, false}) {
+ TFE_Executor* old_executor = TFE_ContextGetExecutorForThread(ctx);
+ TFE_Executor* executor = TFE_NewExecutor(async);
+ TFE_ContextSetExecutorForThread(ctx, executor);
+ CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ ASSERT_TRUE(TF_GetCode(status) == TF_OK);
+ TF_Tensor* t =
+ TF_AllocateTensor(TF_INT32, nullptr, 0, 1 * sizeof(tensorflow::int32));
+ *reinterpret_cast(TF_TensorData(t)) = 42;
+ TFE_TensorHandle* h = TFE_NewTensorHandle(t, status);
+ ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+ TF_DeleteTensor(t);
+
+ TFE_Op* op = TFE_NewOp(ctx, "ident", status);
+ ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+ TFE_OpAddInput(op, h, status);
+ ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+
+ // Now run it via XLA.
+ TFE_OpSetXLACompilation(op, true);
+
+ std::vector result;
+ result.push_back(nullptr);
+ int num_retvals = 1;
+ TFE_Execute(op, result.data(), &num_retvals, status);
+ TFE_DeleteOp(op);
+ ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+ ASSERT_EQ(num_retvals, 1);
+
+ TF_Tensor* r = TFE_TensorHandleResolve(result[0], status);
+ ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+ EXPECT_EQ(*reinterpret_cast(TF_TensorData(r)), 42);
+ TFE_ContextSetExecutorForThread(ctx, old_executor);
+ TFE_ExecutorWaitForAllPendingNodes(executor, status);
+ ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ TFE_DeleteExecutor(executor);
+ TFE_DeleteExecutor(old_executor);
+ TFE_DeleteTensorHandle(h);
+ TF_DeleteTensor(r);
+ TFE_DeleteTensorHandle(result[0]);
+ }
+ TFE_ContextRemoveFunction(ctx, "ident", status);
+ ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+ TFE_DeleteContext(ctx);
+ ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
+ TF_DeleteStatus(status);
+}
+#endif // TENSORFLOW_EAGER_USE_XLA
+
+void Executor_MatMul_CPU(bool async) {
+ TF_Status* status = TF_NewStatus();
+ TFE_ContextOptions* opts = TFE_NewContextOptions();
+ TFE_Context* ctx = TFE_NewContext(opts, status);
+ CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ TFE_DeleteContextOptions(opts);
+
+ TFE_Executor* old_executor = TFE_ContextGetExecutorForThread(ctx);
+ TFE_Executor* executor = TFE_NewExecutor(async);
+ TFE_ContextSetExecutorForThread(ctx, executor);
+
+ TFE_TensorHandle* m = TestMatrixTensorHandle();
+ TFE_Op* matmul = MatMulOp(ctx, m, m);
+ TFE_TensorHandle* retvals[2] = {nullptr, nullptr};
+ int num_retvals = 2;
+ TFE_Execute(matmul, &retvals[0], &num_retvals, status);
+ EXPECT_EQ(1, num_retvals);
+ EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ TFE_DeleteOp(matmul);
+ TFE_DeleteTensorHandle(m);
+ ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+
+ TF_Tensor* t = TFE_TensorHandleResolve(retvals[0], status);
+ ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ TFE_DeleteTensorHandle(retvals[0]);
+ TFE_ContextSetExecutorForThread(ctx, old_executor);
+ TFE_ExecutorWaitForAllPendingNodes(executor, status);
+ ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ TFE_DeleteExecutor(executor);
+ TFE_DeleteExecutor(old_executor);
+ TFE_DeleteContext(ctx);
+ ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ float product[4] = {0};
+ EXPECT_EQ(sizeof(product), TF_TensorByteSize(t));
+ memcpy(&product[0], TF_TensorData(t), TF_TensorByteSize(t));
+ TF_DeleteTensor(t);
+ EXPECT_EQ(7, product[0]);
+ EXPECT_EQ(10, product[1]);
+ EXPECT_EQ(15, product[2]);
+ EXPECT_EQ(22, product[3]);
+ TF_DeleteStatus(status);
+}
+TEST(CAPI, Executor_MatMul_CPU) { Executor_MatMul_CPU(false); }
+TEST(CAPI, Executor_MatMul_CPUAsync) { Executor_MatMul_CPU(true); }
+
} // namespace
} // namespace tensorflow
diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h
index fe0c952dacb..5efed2ca76d 100644
--- a/tensorflow/c/eager/c_api_internal.h
+++ b/tensorflow/c/eager/c_api_internal.h
@@ -76,7 +76,14 @@ struct TFE_Context {
async, device_mgr, device_mgr_owned, rendezvous,
custom_kernel_creator)) {}
- ~TFE_Context() { context->Unref(); }
+ ~TFE_Context() {
+ // TODO(iga): Add a separate API method to shutdown TFE_Context so that we
+ // don't send RPCs and block in destructor.
+ context->WaitForAndCloseRemoteContexts();
+ // context->RefCountIsOne() should be true here.
+ // TODO(iga): Remove EagerContext refcounting.
+ context->Unref();
+ }
tensorflow::EagerContext* context;
};
@@ -130,14 +137,8 @@ struct TFE_Op {
std::unique_ptr inference_ctx;
};
-struct TFE_ProfilerContext {
- tensorflow::ProfilerContext profiler_context;
-};
-
struct TFE_Profiler {
- explicit TFE_Profiler(TFE_ProfilerContext* ctx) {
- profiler = tensorflow::ProfilerSession::Create(&ctx->profiler_context);
- }
+ explicit TFE_Profiler() { profiler = tensorflow::ProfilerSession::Create(); }
std::unique_ptr profiler;
};
@@ -291,4 +292,19 @@ struct TFE_CancellationManager {
tensorflow::CancellationManager cancellation_manager;
};
+struct TFE_Executor {
+ explicit TFE_Executor(bool async)
+ : owned_executor(new tensorflow::EagerExecutor(async)) {}
+
+ explicit TFE_Executor(tensorflow::EagerExecutor* executor)
+ : owned_executor(nullptr), unowned_executor(executor) {}
+
+ tensorflow::EagerExecutor* executor() {
+ return owned_executor == nullptr ? unowned_executor : owned_executor.get();
+ }
+
+ std::unique_ptr owned_executor;
+ tensorflow::EagerExecutor* unowned_executor;
+};
+
#endif // TENSORFLOW_C_EAGER_C_API_INTERNAL_H_
diff --git a/tensorflow/c/eager/c_api_test.cc b/tensorflow/c/eager/c_api_test.cc
index e80620c9a64..d3b755fee6e 100644
--- a/tensorflow/c/eager/c_api_test.cc
+++ b/tensorflow/c/eager/c_api_test.cc
@@ -18,6 +18,7 @@ limitations under the License.
#include
#include "absl/strings/match.h"
+#include "tensorflow/c/eager/c_api_experimental.h"
#include "tensorflow/c/eager/c_api_internal.h"
#include "tensorflow/c/eager/c_api_test_util.h"
#include "tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h"
@@ -78,7 +79,10 @@ void BM_Execute(int iters, int async) {
CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
}
if (async) {
- TFE_ContextAsyncWait(ctx, status);
+ TFE_Executor* executor = TFE_ContextGetExecutorForThread(ctx);
+ TFE_ExecutorWaitForAllPendingNodes(executor, status);
+ ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ TFE_DeleteExecutor(executor);
}
tensorflow::testing::StopTiming();
TFE_DeleteOp(matmul);
@@ -89,6 +93,41 @@ void BM_Execute(int iters, int async) {
}
BENCHMARK(BM_Execute)->Arg(0)->Arg(1);
+void BM_Execute_Identity(int iters, int async) {
+ tensorflow::testing::StopTiming();
+ tensorflow::testing::SetLabel(async ? "ExecuteIdentityAsync"
+ : "ExecuteIdentity");
+ TF_Status* status = TF_NewStatus();
+ TFE_ContextOptions* opts = TFE_NewContextOptions();
+ TFE_ContextOptionsSetAsync(opts, static_cast(async));
+ TFE_Context* ctx = TFE_NewContext(opts, status);
+ CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ TFE_DeleteContextOptions(opts);
+
+ TFE_TensorHandle* m = TestMatrixTensorHandle();
+ TFE_Op* identity = IdentityOp(ctx, m);
+ TFE_TensorHandle* retvals[1];
+ int num_retvals = 1;
+ tensorflow::testing::StartTiming();
+ for (int i = 0; i < iters; ++i) {
+ TFE_Execute(identity, &retvals[0], &num_retvals, status);
+ CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ }
+ if (async) {
+ TFE_Executor* executor = TFE_ContextGetExecutorForThread(ctx);
+ TFE_ExecutorWaitForAllPendingNodes(executor, status);
+ ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ TFE_DeleteExecutor(executor);
+ }
+ tensorflow::testing::StopTiming();
+ TFE_DeleteOp(identity);
+ TFE_DeleteTensorHandle(m);
+ TFE_DeleteContext(ctx);
+ CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ TF_DeleteStatus(status);
+}
+BENCHMARK(BM_Execute_Identity)->Arg(0)->Arg(1);
+
TEST(CAPI, Context) {
TF_Status* status = TF_NewStatus();
TFE_ContextOptions* opts = TFE_NewContextOptions();
@@ -196,8 +235,10 @@ void TestRemoteExecute(bool async) {
TFE_DeleteOp(matmul);
- TFE_ContextAsyncWait(ctx, status);
- EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ TFE_Executor* executor = TFE_ContextGetExecutorForThread(ctx);
+ TFE_ExecutorWaitForAllPendingNodes(executor, status);
+ ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ TFE_DeleteExecutor(executor);
TFE_DeleteContext(ctx);
TF_DeleteStatus(status);
@@ -282,9 +323,11 @@ void TestRemoteExecuteSilentCopies(bool async) {
TFE_DeleteOp(matmul);
- TFE_ContextAsyncWait(ctx, status);
+ TFE_Executor* executor = TFE_ContextGetExecutorForThread(ctx);
+ TFE_ExecutorWaitForAllPendingNodes(executor, status);
+ ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ TFE_DeleteExecutor(executor);
TFE_DeleteContext(ctx);
- EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
TF_DeleteStatus(status);
@@ -298,7 +341,7 @@ TEST(CAPI, RemoteExecuteSilentCopiesAsync) {
TestRemoteExecuteSilentCopies(true);
}
-void TestRemoteExecuteDeleteTensorAfterContext(bool async) {
+void TestRemoteExecuteDeleteContextWithOutstandingRPC(bool async) {
tensorflow::ServerDef server_def = GetServerDef(2);
// This server def has the task index set to 0.
@@ -324,33 +367,49 @@ void TestRemoteExecuteDeleteTensorAfterContext(bool async) {
TFE_ContextSetServerDef(ctx, 0, serialized.data(), serialized.size(), status);
EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
- TFE_TensorHandle* h0_task0 = TestMatrixTensorHandle();
+ // Use large matrices so that RPCs don't return before we get a chance
+ // to call TFE_DeleteContext.
+ TFE_TensorHandle* h0_task0 = TestMatrixTensorHandle100x100();
+ TFE_TensorHandle* h1_task0 = TestMatrixTensorHandle100x100();
const char remote_device_name[] =
"/job:localhost/replica:0/task:1/device:CPU:0";
auto* h0_task1 =
TFE_TensorHandleCopyToDevice(h0_task0, ctx, remote_device_name, status);
ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ auto* h1_task1 =
+ TFE_TensorHandleCopyToDevice(h1_task0, ctx, remote_device_name, status);
+ ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+
+ TFE_Op* matmul = MatMulOp(ctx, h0_task1, h1_task1);
+ TFE_OpSetDevice(matmul, remote_device_name, status);
+ EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+
+ TFE_TensorHandle* retvals[1];
+ int num_retvals = 1;
+ TFE_Execute(matmul, &retvals[0], &num_retvals, status);
+ EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ TF_DeleteStatus(status);
TFE_DeleteTensorHandle(h0_task0);
-
- TFE_ContextAsyncWait(ctx, status);
- EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
- TFE_DeleteContext(ctx);
-
- // Delete tensors after context is deleted.
+ TFE_DeleteTensorHandle(h1_task0);
TFE_DeleteTensorHandle(h0_task1);
+ TFE_DeleteTensorHandle(h1_task1);
+ TFE_DeleteTensorHandle(retvals[0]);
- TF_DeleteStatus(status);
+ TFE_DeleteOp(matmul);
+
+ TFE_DeleteContext(ctx);
// TODO(b/136478427): Figure out how to correctly shut the server down.
worker_server.release();
}
-TEST(CAPI, RemoteExecuteDeleteTensorAfterContext) {
- TestRemoteExecuteDeleteTensorAfterContext(false);
+TEST(CAPI, RemoteExecuteDeleteContextWithOutstandingRPC) {
+ TestRemoteExecuteDeleteContextWithOutstandingRPC(false);
}
-TEST(CAPI, RemoteExecuteDeleteTensorAfterContextAsync) {
- TestRemoteExecuteDeleteTensorAfterContext(true);
+
+TEST(CAPI, RemoteExecuteDeleteContextWithOutstandingRPCAsync) {
+ TestRemoteExecuteDeleteContextWithOutstandingRPC(true);
}
void CheckTFE_TensorHandleHasFloats(TFE_TensorHandle* handle,
@@ -397,8 +456,10 @@ void CheckRemoteMatMulExecutesOK(TFE_Context* ctx,
TFE_DeleteOp(matmul);
- TFE_ContextAsyncWait(ctx, status);
- EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ TFE_Executor* executor = TFE_ContextGetExecutorForThread(ctx);
+ TFE_ExecutorWaitForAllPendingNodes(executor, status);
+ ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ TFE_DeleteExecutor(executor);
TF_DeleteStatus(status);
}
@@ -433,8 +494,9 @@ void TestRemoteExecuteChangeServerDef(bool async) {
"/job:localhost/replica:0/task:0/device:CPU:0";
CheckRemoteMatMulExecutesOK(ctx, remote_device_name, local_device_name);
- TFE_ContextAsyncWait(ctx, status);
- EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ TFE_Executor* executor = TFE_ContextGetExecutorForThread(ctx);
+ TFE_ExecutorWaitForAllPendingNodes(executor, status);
+ ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
// TODO(b/136478427): Figure out how to correctly shut the server down.
worker_server.release();
@@ -476,8 +538,9 @@ void TestRemoteExecuteChangeServerDef(bool async) {
CheckRemoteMatMulExecutesOK(ctx, new_remote_device_name,
new_local_device_name);
- TFE_ContextAsyncWait(ctx, status);
- EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ TFE_ExecutorWaitForAllPendingNodes(executor, status);
+ ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ TFE_DeleteExecutor(executor);
TF_DeleteStatus(status);
@@ -610,8 +673,11 @@ void TensorHandleCopyBetweenDevicesError(bool async) {
TFE_TensorHandle* hcopy =
TFE_TensorHandleCopyToDevice(hcpu, ctx, kCPUDevice, status.get());
EXPECT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
- TFE_ContextAsyncWait(ctx, status.get());
- EXPECT_EQ(TF_OK, TF_GetCode(status.get()));
+
+ TFE_Executor* executor = TFE_ContextGetExecutorForThread(ctx);
+ TFE_ExecutorWaitForAllPendingNodes(executor, status.get());
+ EXPECT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+ TFE_DeleteExecutor(executor);
TFE_DeleteTensorHandle(hcopy);
TFE_DeleteTensorHandle(hcpu);
if (hdevice != nullptr) TFE_DeleteTensorHandle(hdevice);
@@ -740,8 +806,10 @@ void TensorHandleSilentCopy(bool async) {
TF_DeleteTensor(t);
TFE_DeleteTensorHandle(hcpu);
- TFE_ContextAsyncWait(ctx, status.get());
- EXPECT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+ TFE_Executor* executor = TFE_ContextGetExecutorForThread(ctx);
+ TFE_ExecutorWaitForAllPendingNodes(executor, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+ TFE_DeleteExecutor(executor);
TFE_DeleteContext(ctx);
}
@@ -786,8 +854,10 @@ void TensorHandleSilentCopyLocal(bool async) {
TF_DeleteTensor(t);
TFE_DeleteTensorHandle(hcpu);
- TFE_ContextAsyncWait(ctx, status.get());
- EXPECT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+ TFE_Executor* executor = TFE_ContextGetExecutorForThread(ctx);
+ TFE_ExecutorWaitForAllPendingNodes(executor, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+ TFE_DeleteExecutor(executor);
TFE_DeleteContext(ctx);
}
TEST(CAPI, TensorHandleSilentCopyLocal) { TensorHandleSilentCopyLocal(false); }
@@ -921,8 +991,10 @@ TEST(CAPI, TensorHandleDevices) {
}
TFE_DeleteTensorHandle(hcpu);
- TFE_ContextAsyncWait(ctx, status.get());
- EXPECT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+ TFE_Executor* executor = TFE_ContextGetExecutorForThread(ctx);
+ TFE_ExecutorWaitForAllPendingNodes(executor, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+ TFE_DeleteExecutor(executor);
TFE_DeleteContext(ctx);
}
@@ -1000,9 +1072,11 @@ void Execute_MatMul_CPU_Runtime_Error(bool async) {
retvals[0] = nullptr;
TFE_Execute(matmul2, &retvals[0], &num_retvals, status);
EXPECT_NE(TF_OK, TF_GetCode(status));
- TFE_ContextAsyncClearError(ctx);
- TFE_ContextAsyncWait(ctx, status);
- EXPECT_EQ(TF_OK, TF_GetCode(status));
+ TFE_Executor* executor = TFE_ContextGetExecutorForThread(ctx);
+ TFE_ExecutorClearError(executor);
+ TFE_ExecutorWaitForAllPendingNodes(executor, status);
+ ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ TFE_DeleteExecutor(executor);
}
// Following works in async mode since TFE_ContextAsyncClearError was called.
TF_SetStatus(status, TF_OK, "");
@@ -1220,147 +1294,6 @@ void ExecuteWithTracing(bool async) {
TEST(CAPI, ExecuteWithTracing) { ExecuteWithTracing(false); }
TEST(CAPI, ExecuteWithTracingAsync) { ExecuteWithTracing(true); }
-TEST(CAPI, Function_ident_CPU) {
- // First create a simple identity function.
- TF_Graph* function_graph = TF_NewGraph();
- TF_OperationDescription* arg_descr =
- TF_NewOperation(function_graph, "Placeholder", "arg");
- TF_SetAttrType(arg_descr, "dtype", TF_INT32);
- TF_Status* status = TF_NewStatus();
- TF_Operation* arg = TF_FinishOperation(arg_descr, status);
- ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
- TF_OperationDescription* id_descr =
- TF_NewOperation(function_graph, "Identity", "id");
- TF_SetAttrType(id_descr, "T", TF_INT32);
- TF_AddInput(id_descr, {arg, 0});
- TF_Operation* id = TF_FinishOperation(id_descr, status);
- ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
- TF_Output input{arg, 0};
- TF_Output output{id, 0};
- TF_Function* fn =
- TF_GraphToFunction(function_graph, "ident", 0, 1, &id, 1, &input, 1,
- &output, nullptr, nullptr, "test", status);
- ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
- TF_DeleteGraph(function_graph);
- TFE_ContextOptions* opts = TFE_NewContextOptions();
- TFE_Context* ctx = TFE_NewContext(opts, status);
- ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
- TFE_DeleteContextOptions(opts);
- TFE_ContextAddFunction(ctx, fn, status);
- ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
- TF_DeleteFunction(fn);
-
- for (bool async : {false, true, false}) {
- TFE_ContextSetAsyncForThread(ctx, static_cast(async),
- status);
- ASSERT_TRUE(TF_GetCode(status) == TF_OK);
- TF_Tensor* t =
- TF_AllocateTensor(TF_INT32, nullptr, 0, 1 * sizeof(tensorflow::int32));
- *reinterpret_cast(TF_TensorData(t)) = 42;
- TFE_TensorHandle* h = TFE_NewTensorHandle(t, status);
- ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
- TF_DeleteTensor(t);
-
- TFE_Op* op = TFE_NewOp(ctx, "ident", status);
- ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
- TFE_OpAddInput(op, h, status);
- ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
-
- std::vector result;
- result.push_back(nullptr);
- int num_retvals = 1;
- TFE_Execute(op, result.data(), &num_retvals, status);
- TFE_DeleteOp(op);
- ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
- ASSERT_EQ(num_retvals, 1);
-
- TF_Tensor* r = TFE_TensorHandleResolve(result[0], status);
- ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
- EXPECT_EQ(*reinterpret_cast(TF_TensorData(r)), 42);
- TFE_DeleteTensorHandle(h);
- TF_DeleteTensor(r);
- TFE_DeleteTensorHandle(result[0]);
- }
- TFE_ContextRemoveFunction(ctx, "ident", status);
- ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
- TFE_DeleteContext(ctx);
- ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
- TF_DeleteStatus(status);
-}
-
-#ifdef TENSORFLOW_EAGER_USE_XLA
-TEST(CAPI, Function_ident_XLA_CPU) {
- // First create a simple identity function.
- TF_Graph* function_graph = TF_NewGraph();
- TF_OperationDescription* arg_descr =
- TF_NewOperation(function_graph, "Placeholder", "arg");
- TF_SetAttrType(arg_descr, "dtype", TF_INT32);
- TF_Status* status = TF_NewStatus();
- TF_Operation* arg = TF_FinishOperation(arg_descr, status);
- ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
- TF_OperationDescription* id_descr =
- TF_NewOperation(function_graph, "Identity", "id");
- TF_SetAttrType(id_descr, "T", TF_INT32);
- TF_AddInput(id_descr, {arg, 0});
- TF_Operation* id = TF_FinishOperation(id_descr, status);
- ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
- TF_Output input{arg, 0};
- TF_Output output{id, 0};
- TF_Function* fn =
- TF_GraphToFunction(function_graph, "ident", 0, 1, &id, 1, &input, 1,
- &output, nullptr, nullptr, "test", status);
- ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
- TF_DeleteGraph(function_graph);
- TFE_ContextOptions* opts = TFE_NewContextOptions();
- TFE_Context* ctx = TFE_NewContext(opts, status);
- ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
- TFE_DeleteContextOptions(opts);
- TFE_ContextAddFunction(ctx, fn, status);
- ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
- TF_DeleteFunction(fn);
-
- for (bool async : {false, true, false}) {
- TFE_ContextSetAsyncForThread(ctx, static_cast(async),
- status);
- ASSERT_TRUE(TF_GetCode(status) == TF_OK);
- TF_Tensor* t =
- TF_AllocateTensor(TF_INT32, nullptr, 0, 1 * sizeof(tensorflow::int32));
- *reinterpret_cast(TF_TensorData(t)) = 42;
- TFE_TensorHandle* h = TFE_NewTensorHandle(t, status);
- ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
- TF_DeleteTensor(t);
-
- TFE_Op* op = TFE_NewOp(ctx, "ident", status);
- ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
- TFE_OpAddInput(op, h, status);
- ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
-
- // Now run it via XLA.
- TFE_OpSetXLACompilation(op, true);
-
- std::vector result;
- result.push_back(nullptr);
- int num_retvals = 1;
- TFE_Execute(op, result.data(), &num_retvals, status);
- TFE_DeleteOp(op);
- ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
- ASSERT_EQ(num_retvals, 1);
-
- TF_Tensor* r = TFE_TensorHandleResolve(result[0], status);
- ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
- EXPECT_EQ(*reinterpret_cast(TF_TensorData(r)), 42);
- TFE_DeleteTensorHandle(h);
- TF_DeleteTensor(r);
- TFE_DeleteTensorHandle(result[0]);
- }
- TFE_ContextRemoveFunction(ctx, "ident", status);
- ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
- TFE_DeleteContext(ctx);
- ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status);
- TF_DeleteStatus(status);
-}
-#endif // TENSORFLOW_EAGER_USE_XLA
-
string MatMulFunction() {
tensorflow::FunctionDef def;
CHECK(tensorflow::protobuf::TextFormat::ParseFromString(
@@ -1474,7 +1407,10 @@ void BM_ExecuteFunction(int iters, int async) {
CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
}
if (async) {
- TFE_ContextAsyncWait(ctx, status);
+ TFE_Executor* executor = TFE_ContextGetExecutorForThread(ctx);
+ TFE_ExecutorWaitForAllPendingNodes(executor, status);
+ ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ TFE_DeleteExecutor(executor);
}
tensorflow::testing::StopTiming();
TFE_DeleteTensorHandle(m);
diff --git a/tensorflow/c/eager/c_api_test_util.cc b/tensorflow/c/eager/c_api_test_util.cc
index 17d17c0b7f7..51566b35a9f 100644
--- a/tensorflow/c/eager/c_api_test_util.cc
+++ b/tensorflow/c/eager/c_api_test_util.cc
@@ -85,6 +85,24 @@ TFE_TensorHandle* TestMatrixTensorHandle() {
return th;
}
+TFE_TensorHandle* TestMatrixTensorHandle100x100() {
+ constexpr int64_t dims[] = {100, 100};
+ constexpr int num_elements = dims[0] * dims[1];
+ float data[num_elements];
+ for (int i = 0; i < num_elements; ++i) {
+ data[i] = 1.0f;
+ }
+ TF_Tensor* t = TF_AllocateTensor(
+ TF_FLOAT, &dims[0], sizeof(dims) / sizeof(int64_t), sizeof(data));
+ memcpy(TF_TensorData(t), &data[0], TF_TensorByteSize(t));
+ TF_Status* status = TF_NewStatus();
+ TFE_TensorHandle* th = TFE_NewTensorHandle(t, status);
+ CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ TF_DeleteTensor(t);
+ TF_DeleteStatus(status);
+ return th;
+}
+
TFE_TensorHandle* DoubleTestMatrixTensorHandle3X2() {
int64_t dims[] = {3, 2};
double data[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
@@ -128,6 +146,19 @@ TFE_Op* MatMulOp(TFE_Context* ctx, TFE_TensorHandle* a, TFE_TensorHandle* b) {
return op;
}
+TFE_Op* IdentityOp(TFE_Context* ctx, TFE_TensorHandle* a) {
+ TF_Status* status = TF_NewStatus();
+
+ TFE_Op* op = TFE_NewOp(ctx, "Identity", status);
+ CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ TFE_OpAddInput(op, a, status);
+ CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ TF_DeleteStatus(status);
+ TFE_OpSetAttrType(op, "T", TFE_TensorHandleDataType(a));
+
+ return op;
+}
+
TFE_Op* ShapeOp(TFE_Context* ctx, TFE_TensorHandle* a) {
TF_Status* status = TF_NewStatus();
diff --git a/tensorflow/c/eager/c_api_test_util.h b/tensorflow/c/eager/c_api_test_util.h
index 4ff3ff4301f..28062222cf0 100644
--- a/tensorflow/c/eager/c_api_test_util.h
+++ b/tensorflow/c/eager/c_api_test_util.h
@@ -16,7 +16,6 @@ limitations under the License.
#define TENSORFLOW_C_EAGER_C_API_TEST_UTIL_H_
#include "tensorflow/c/eager/c_api.h"
-
#include "tensorflow/core/platform/types.h"
// Return a tensor handle containing a float scalar
@@ -34,6 +33,9 @@ TFE_TensorHandle* DoubleTestMatrixTensorHandle();
// Return a tensor handle containing a 2x2 matrix of floats
TFE_TensorHandle* TestMatrixTensorHandle();
+// Return a tensor handle containing a 100x100 matrix of floats
+TFE_TensorHandle* TestMatrixTensorHandle100x100();
+
// Return a tensor handle containing a 3x2 matrix of doubles
TFE_TensorHandle* DoubleTestMatrixTensorHandle3X2();
@@ -43,6 +45,9 @@ TFE_TensorHandle* TestMatrixTensorHandle3X2();
// Return a matmul op multiplying `a` by `b`.
TFE_Op* MatMulOp(TFE_Context* ctx, TFE_TensorHandle* a, TFE_TensorHandle* b);
+// Return an identity op.
+TFE_Op* IdentityOp(TFE_Context* ctx, TFE_TensorHandle* a);
+
// Return a shape op fetching the shape of `a`.
TFE_Op* ShapeOp(TFE_Context* ctx, TFE_TensorHandle* a);
diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h
index 0545e3f7ce0..edb2733ab32 100644
--- a/tensorflow/c/eager/tape.h
+++ b/tensorflow/c/eager/tape.h
@@ -18,6 +18,7 @@ limitations under the License.
// Language-agnostic gradient tape. Does not perform backpropagation, just
// maintains the data structures required to do so.
+#include
#include
#include "tensorflow/core/framework/tensor_shape.h"
@@ -209,7 +210,9 @@ class ForwardAccumulator {
// ForwardAccumulator.
explicit ForwardAccumulator(
const VSpace& vspace)
- : vspace_(vspace), backward_tape_(nullptr), accumulating_(false) {}
+ : vspace_(vspace) {
+ call_state_.emplace(nullptr, false);
+ }
virtual ~ForwardAccumulator() {
for (auto accumulated : accumulated_gradients_) {
@@ -262,6 +265,12 @@ class ForwardAccumulator {
const std::function& backward_function_getter,
const std::function& backward_function_deleter);
+ // Returns true if `Accumulate` is active somewhere above on the stack and
+ // there isn't an intervening PushState. This is useful for ordering
+ // ForwardAccumulators, where more deeply nested accumulators should not see
+ // computations from less deeply nested accumulators.
+ bool BusyAccumulating() const { return call_state_.top().accumulating; }
+
// Fetches the current Jacobian-vector product associated with `tensor_id`, or
// a nullptr if none is available.
//
@@ -276,6 +285,15 @@ class ForwardAccumulator {
bool ShouldRecord(gtl::ArraySlice tensor_ids,
gtl::ArraySlice dtypes);
+ // Temporarily push or pop transient state for this accumulator.
+ //
+ // Allows an accumulator which is currently processing an operation to
+ // temporarily reset its state. Without pushing and poping, accumulators
+ // ignore operations executed as a direct result of their own jvp
+ // computations.
+ void PushState() { call_state_.emplace(nullptr, false); }
+ void PopState() { call_state_.pop(); }
+
private:
// Helper for Accumulate: uses a GradientTape to compute forward gradients
// from a backward gradient function. Fills `out_grads` corresponding to
@@ -283,7 +301,7 @@ class ForwardAccumulator {
//
// Executes the backward function in order to trace its gradient, which will
// waste computation if executing eagerly (when graph building the unneeded
- // computation is pruned). Temporarily sets `backward_tape_` so that
+ // computation is pruned). Temporarily sets `backward_tape` so that
// Accumulate will forward op executions to the tape while the backward
// function is running; this effectively adds the backward tape to the active
// set (but does not require complicated callbacks to the language bindings).
@@ -299,16 +317,26 @@ class ForwardAccumulator {
// Not owned; provides operations on Tensors which are currently only
// available in language bindings (e.g. Python).
const VSpace& vspace_;
- // Set temporarily while in the Accumulate method; if backward_tape_ is not
- // nullptr then we forward op executions to it so Accumulate can compute a
- // backward pass on its backward function.
- //
- // Not owned by the ForwardAccumulator. The method which sets `backward_tape_`
- // keeps ownership.
- GradientTape* backward_tape_;
- // While the Accumulate method is running (accumulating_ is True), any op
- // executions not forwarded to backward_tape_ should be ignored.
- bool accumulating_;
+
+ struct AccumulatorCallState {
+ AccumulatorCallState(
+ GradientTape* backward_tape,
+ bool accumulating)
+ : backward_tape(backward_tape), accumulating(accumulating) {}
+ // Set temporarily while in the Accumulate method; if backward_tape is not
+ // nullptr then we forward op executions to it so Accumulate can compute a
+ // backward pass on its backward function.
+ //
+ // Not owned by the ForwardAccumulator. The method which sets
+ // `backward_tape` keeps ownership.
+ GradientTape* backward_tape;
+ // While the Accumulate method is running (accumulating is True), any op
+ // executions not forwarded to backward_tape should be ignored.
+ bool accumulating;
+ };
+ // A deque-backed stack, whose element references are not invalidated by
+ // pushes and pops at the back.
+ std::stack call_state_;
};
// Template instantiations here
@@ -841,12 +869,12 @@ template
bool ForwardAccumulator::ShouldRecord(
gtl::ArraySlice tensor_ids,
gtl::ArraySlice dtypes) {
- if (backward_tape_ != nullptr) {
- // If we're forwarding Accumulate calls to backward_tape_'s RecordOperation,
+ if (call_state_.top().backward_tape != nullptr) {
+ // If we're forwarding Accumulate calls to backward_tape's RecordOperation,
// we should also delegate ShouldRecord.
- return backward_tape_->ShouldRecord(tensor_ids, dtypes);
+ return call_state_.top().backward_tape->ShouldRecord(tensor_ids, dtypes);
}
- if (accumulating_) {
+ if (call_state_.top().accumulating) {
return false;
}
for (int i = 0; i < tensor_ids.size(); ++i) {
@@ -878,9 +906,10 @@ ForwardAccumulator::ForwardpropFromTape(
*/
std::unique_ptr> tape(
new GradientTape(false));
- backward_tape_ = tape.get();
+ AccumulatorCallState& call_state = call_state_.top();
+ call_state.backward_tape = tape.get();
auto pop_backward_tape =
- gtl::MakeCleanup([this] { this->backward_tape_ = nullptr; });
+ gtl::MakeCleanup([&call_state] { call_state.backward_tape = nullptr; });
std::vector forwardprop_aids;
std::vector sources;
std::unordered_set sources_set;
@@ -955,10 +984,10 @@ Status ForwardAccumulator::Accumulate(
const ForwardFunction* forward_function,
const std::function& backward_function_getter,
const std::function& backward_function_deleter) {
- if (backward_tape_ != nullptr) {
- // If backward_tape_ is not null, then this call to Accumulate is the result
+ if (call_state_.top().backward_tape != nullptr) {
+ // If backward_tape is not null, then this call to Accumulate is the result
// of a still-active call to Accumulate which is running operations. We
- // forward these operations to backward_tape_ so the outer Accumulate call
+ // forward these operations to backward_tape so the outer Accumulate call
// can do its work.
//
// Rather than re-entering and delegating Accumulate like this, we could
@@ -966,9 +995,9 @@ Status ForwardAccumulator::Accumulate(
// (so it can deactivate itself and activate its GradientTape). Currently
// that is managed by the language binding and would require relatively
// messy callbacks.
- backward_tape_->RecordOperation(op_type, output_tensors, input_tensor_id,
- input_dtypes, backward_function_getter,
- backward_function_deleter);
+ call_state_.top().backward_tape->RecordOperation(
+ op_type, output_tensors, input_tensor_id, input_dtypes,
+ backward_function_getter, backward_function_deleter);
return Status::OK();
}
if (!ShouldRecord(input_tensor_id, input_dtypes)) {
@@ -1006,9 +1035,8 @@ Status ForwardAccumulator::Accumulate(
// Avoid infinite recursion. Whichever forward function we run, it'll end up
// executing ops, and we don't want to watch those with this accumulator.
- accumulating_ = true;
- auto reset_accumulating =
- gtl::MakeCleanup([this] { this->accumulating_ = false; });
+ call_state_.emplace(nullptr, true);
+ auto pop_call_state = gtl::MakeCleanup([this] { this->call_state_.pop(); });
std::vector forward_grads;
if (forward_function == nullptr) {
diff --git a/tensorflow/c/experimental/rendezvous.cc b/tensorflow/c/experimental/rendezvous.cc
index 0ee4907b7a4..7a90bde8fe4 100644
--- a/tensorflow/c/experimental/rendezvous.cc
+++ b/tensorflow/c/experimental/rendezvous.cc
@@ -45,6 +45,9 @@ CRemoteRendezvous::CRemoteRendezvous(const WorkerEnv* env, int64 step_id,
void CRemoteRendezvous::RecvFromRemoteAsync(const Rendezvous::ParsedKey& parsed,
const Rendezvous::Args& args,
DoneCallback done) {
+ if (args.cancellation_manager != nullptr) {
+ VLOG(1) << "WARNING: CRemoteRendezvous does not support cancellation.";
+ }
TF_ParsedKey key;
key.src_device = parsed.src_device.data();
key.src_device_len = parsed.src_device.size();
diff --git a/tensorflow/c/generate-pc.sh b/tensorflow/c/generate-pc.sh
index 7184ad68fb7..a4d51a1b3b2 100755
--- a/tensorflow/c/generate-pc.sh
+++ b/tensorflow/c/generate-pc.sh
@@ -63,12 +63,26 @@ cat << EOF > tensorflow.pc
prefix=${TF_PREFIX}
exec_prefix=\${prefix}
libdir=\${exec_prefix}/${LIBDIR}
-includedir=\${prefix}/include
+includedir=\${prefix}/include/tensorflow
Name: TensorFlow
Version: ${TF_VERSION}
Description: Library for computation using data flow graphs for scalable machine learning
Requires:
-Libs: -L\${libdir} -ltensorflow
+Libs: -L\${libdir} -ltensorflow -ltensorflow_framework
+Cflags: -I\${includedir}
+EOF
+
+cat << EOF > tensorflow_cc.pc
+prefix=${TF_PREFIX}
+exec_prefix=\${prefix}
+libdir=\${exec_prefix}/${LIBDIR}
+includedir=\${prefix}/include/tensorflow
+
+Name: TensorFlow
+Version: ${TF_VERSION}
+Description: Library for computation using data flow graphs for scalable machine learning
+Requires:
+Libs: -L\${libdir} -ltensorflow_cc -ltensorflow_framework
Cflags: -I\${includedir}
EOF
diff --git a/tensorflow/c/kernels.cc b/tensorflow/c/kernels.cc
index 94685c8ffaf..b067176f3be 100644
--- a/tensorflow/c/kernels.cc
+++ b/tensorflow/c/kernels.cc
@@ -19,6 +19,7 @@ limitations under the License.
#include "tensorflow/c/c_api_internal.h"
#include "tensorflow/c/tf_status_helper.h"
+#include "tensorflow/c/tf_tensor_internal.h"
#include "tensorflow/core/framework/kernel_def_builder.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/register_types.h"
@@ -189,8 +190,8 @@ void TF_GetInput(TF_OpKernelContext* ctx, int i, TF_Tensor** tensor,
void TF_SetOutput(TF_OpKernelContext* ctx, int i, const TF_Tensor* tensor,
TF_Status* status) {
auto* cc_ctx = reinterpret_cast<::tensorflow::OpKernelContext*>(ctx);
- if (i < 0 || i >= cc_ctx->num_inputs()) {
- TF_SetStatus(status, TF_OUT_OF_RANGE, "input index out of range");
+ if (i < 0 || i >= cc_ctx->num_outputs()) {
+ TF_SetStatus(status, TF_OUT_OF_RANGE, "output index out of range");
return;
}
::tensorflow::Tensor cc_tensor;
@@ -240,3 +241,14 @@ TF_DataType TF_ExpectedOutputDataType(TF_OpKernelContext* ctx, int i) {
int64_t TF_StepId(TF_OpKernelContext* ctx) {
return reinterpret_cast<::tensorflow::OpKernelContext*>(ctx)->step_id();
}
+
+TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context, int index,
+ TF_DataType dtype, int64_t* dims, int num_dims,
+ size_t len) {
+ auto* cc_ctx = reinterpret_cast<::tensorflow::OpKernelContext*>(context);
+ tensorflow::AllocatorAttributes attr = cc_ctx->output_alloc_attr(index);
+ auto* allocator = cc_ctx->get_allocator(attr);
+ void* data = tensorflow::allocate_tensor("TF_AllocateOutput", len, allocator);
+ return TF_NewTensor(dtype, dims, num_dims, data, len,
+ tensorflow::deallocate_buffer, allocator);
+}
diff --git a/tensorflow/c/kernels.h b/tensorflow/c/kernels.h
index a192437a52f..8d0518ae170 100644
--- a/tensorflow/c/kernels.h
+++ b/tensorflow/c/kernels.h
@@ -180,6 +180,16 @@ TF_CAPI_EXPORT extern void TF_OpKernelConstruction_GetAttrInt32(
TF_OpKernelConstruction* ctx, const char* attr_name, int32_t* val,
TF_Status* status);
+// Allocates Tensor for output at given index. Caller takes ownership of
+// returned TF_Tensor and should deallocate it using TF_DeleteTensor(tensor).
+//
+// This function should be used to allocate outputs inside kernel
+// compute function.
+TF_CAPI_EXPORT TF_Tensor* TF_AllocateOutput(TF_OpKernelContext* context,
+ int index, TF_DataType dtype,
+ int64_t* dims, int num_dims,
+ size_t len);
+
#ifdef __cplusplus
} /* end extern "C" */
#endif
diff --git a/tensorflow/c/kernels_test.cc b/tensorflow/c/kernels_test.cc
index 0e65d18ec81..05277b6c12c 100644
--- a/tensorflow/c/kernels_test.cc
+++ b/tensorflow/c/kernels_test.cc
@@ -12,17 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+#define EIGEN_USE_GPU
+#endif
#include "tensorflow/c/kernels.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/c/c_api.h"
#include "tensorflow/core/framework/attr_value.pb.h"
#include "tensorflow/core/framework/kernel_def.pb.h"
-#include "tensorflow/core/framework/node_def.pb_text.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/node_def_builder.h"
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/kernels/ops_testutil.h"
#include "tensorflow/core/lib/core/status_test_util.h"
#include "tensorflow/core/platform/test.h"
@@ -309,4 +315,144 @@ TEST(TestKernel, TestHostMemory) {
TF_DeleteKernelBuilder(builder);
ASSERT_TRUE(delete_called);
}
+
+class DeviceKernelOpTest : public OpsTestBase {
+ protected:
+ void SetupOp(const char* op_name, const char* kernel_name,
+ void (*compute_func)(void*, TF_OpKernelContext*)) {
+ TF_KernelBuilder* builder = TF_NewKernelBuilder(
+ op_name, device_name_, nullptr, compute_func, nullptr);
+ TF_Status* status = TF_NewStatus();
+ TF_RegisterKernelBuilder(kernel_name, builder, status);
+ EXPECT_EQ(TF_OK, TF_GetCode(status));
+ TF_DeleteStatus(status);
+
+#if GOOGLE_CUDA
+ std::unique_ptr device(
+ DeviceFactory::NewDevice(device_name_, {}, "/job:a/replica:0/task:0"));
+ OpsTestBase::SetDevice(DEVICE_GPU, std::move(device));
+#endif
+ TF_ASSERT_OK(NodeDefBuilder(op_name, op_name).Finalize(node_def()));
+ TF_ASSERT_OK(InitOp());
+ }
+
+#if GOOGLE_CUDA
+ const char* device_name_ = tensorflow::DEVICE_GPU;
+#else
+ const char* device_name_ = tensorflow::DEVICE_CPU;
+#endif
+};
+
+REGISTER_OP("AllocateOutputOp1").Output("output1: float");
+
+TEST_F(DeviceKernelOpTest, TestAllocateOutputSizeOne) {
+ auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) {
+ // Allocate output
+ int64_t dim = 1;
+ size_t tensor_size_bytes = TF_DataTypeSize(TF_FLOAT);
+ TF_Tensor* output = TF_AllocateOutput(
+ /*context=*/ctx, /*index=*/0, /*dtype=*/TF_FLOAT, /*dims=*/&dim,
+ /*num_dims=*/1, /*len=*/tensor_size_bytes);
+ EXPECT_EQ(TF_FLOAT, TF_TensorType(output));
+ EXPECT_EQ(1, TF_NumDims(output));
+ EXPECT_EQ(1, TF_Dim(output, 0));
+
+ // Set output to 3
+ float* data = reinterpret_cast(TF_TensorData(output));
+ float value = 3.0f;
+#if GOOGLE_CUDA
+ OpKernelContext* cc_ctx = reinterpret_cast(ctx);
+ cc_ctx->eigen_gpu_device().memcpyHostToDevice(data, &value,
+ tensor_size_bytes);
+#else
+ *data = value;
+#endif
+
+ TF_Status* s = TF_NewStatus();
+ TF_SetOutput(ctx, 0, output, s);
+ EXPECT_EQ(TF_OK, TF_GetCode(s));
+
+ TF_DeleteStatus(s);
+ TF_DeleteTensor(output);
+ };
+
+ SetupOp("AllocateOutputOp1", "AllocateOutput1", my_compute_func);
+
+ TF_ASSERT_OK(RunOpKernel());
+ Tensor* output = GetOutput(0);
+ EXPECT_EQ("Tensor",
+ output->DebugString(100));
+}
+
+REGISTER_OP("AllocateOutputOp0").Output("output1: float");
+
+TEST_F(DeviceKernelOpTest, TestAllocateEmptyOutput) {
+ auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) {
+ // Allocate empty output
+ int64_t dim = 0;
+ TF_Tensor* output = TF_AllocateOutput(
+ /*context=*/ctx, /*index=*/0, /*dtype=*/TF_FLOAT, /*dims=*/&dim,
+ /*num_dims=*/1, /*len=*/0);
+
+ EXPECT_EQ(TF_FLOAT, TF_TensorType(output));
+ EXPECT_EQ(1, TF_NumDims(output));
+ EXPECT_EQ(0, TF_Dim(output, 0));
+
+ TF_Status* s = TF_NewStatus();
+ TF_SetOutput(ctx, 0, output, s);
+ EXPECT_EQ(TF_OK, TF_GetCode(s));
+
+ TF_DeleteStatus(s);
+ TF_DeleteTensor(output);
+ };
+
+ SetupOp("AllocateOutputOp0", "AllocateOutput0", my_compute_func);
+
+ TF_ASSERT_OK(RunOpKernel());
+ Tensor* output = GetOutput(0);
+ EXPECT_EQ("Tensor",
+ output->DebugString(100));
+}
+
+REGISTER_OP("AllocateOutputOp2x3").Output("output1: float");
+
+TEST_F(DeviceKernelOpTest, TestAllocateOutputSize2x3) {
+ auto my_compute_func = [](void* kernel, TF_OpKernelContext* ctx) {
+ // Allocate 2x3 output
+ int64_t dim[2] = {2, 3};
+ size_t tensor_size_bytes = 6 * TF_DataTypeSize(TF_FLOAT);
+ TF_Tensor* output = TF_AllocateOutput(
+ /*context=*/ctx, /*index=*/0, /*dtype=*/TF_FLOAT, /*dims=*/dim,
+ /*num_dims=*/2, /*len=*/tensor_size_bytes);
+ EXPECT_EQ(TF_FLOAT, TF_TensorType(output));
+ EXPECT_EQ(2, TF_NumDims(output));
+ EXPECT_EQ(2, TF_Dim(output, 0));
+ EXPECT_EQ(3, TF_Dim(output, 1));
+
+ // Set output to [1 2 3 4 5 6]
+ void* data = TF_TensorData(output);
+ float value[6] = {1, 2, 3, 4, 5, 6};
+#if GOOGLE_CUDA
+ OpKernelContext* cc_ctx = reinterpret_cast(ctx);
+ cc_ctx->eigen_gpu_device().memcpyHostToDevice(data, value,
+ tensor_size_bytes);
+#else
+ memcpy(data, value, tensor_size_bytes);
+#endif
+
+ TF_Status* s = TF_NewStatus();
+ TF_SetOutput(ctx, 0, output, s);
+ EXPECT_EQ(TF_OK, TF_GetCode(s));
+
+ TF_DeleteStatus(s);
+ TF_DeleteTensor(output);
+ };
+
+ SetupOp("AllocateOutputOp2x3", "AllocateOutput2x3", my_compute_func);
+
+ TF_ASSERT_OK(RunOpKernel());
+ Tensor* output = GetOutput(0);
+ EXPECT_EQ("Tensor",
+ output->DebugString(100));
+}
} // namespace tensorflow
diff --git a/tensorflow/c/tf_tensor.cc b/tensorflow/c/tf_tensor.cc
index deb36166a47..2ad778d6057 100644
--- a/tensorflow/c/tf_tensor.cc
+++ b/tensorflow/c/tf_tensor.cc
@@ -31,6 +31,37 @@ using tensorflow::TensorBuffer;
using tensorflow::errors::FailedPrecondition;
using tensorflow::errors::InvalidArgument;
+namespace tensorflow {
+void* allocate_tensor(const char* operation, size_t len, Allocator* allocator) {
+ void* data = allocator->AllocateRaw(EIGEN_MAX_ALIGN_BYTES, len);
+ if (LogMemory::IsEnabled() && data != nullptr) {
+ LogMemory::RecordRawAllocation(
+ operation, LogMemory::EXTERNAL_TENSOR_ALLOCATION_STEP_ID, len, data,
+ allocator);
+ }
+ return data;
+}
+
+void* allocate_tensor(const char* operation, size_t len) {
+ return allocate_tensor(operation, len, cpu_allocator());
+}
+
+void deallocate_buffer(void* data, size_t len, void* arg) {
+ Allocator* allocator = nullptr;
+ if (arg == nullptr) {
+ allocator = cpu_allocator();
+ } else {
+ allocator = reinterpret_cast(arg);
+ }
+ if (LogMemory::IsEnabled() && data != nullptr) {
+ LogMemory::RecordRawDeallocation(
+ "TensorFlow C Api", LogMemory::EXTERNAL_TENSOR_ALLOCATION_STEP_ID, data,
+ allocator, false);
+ }
+ allocator->DeallocateRaw(data);
+}
+} // namespace tensorflow
+
namespace {
class TF_ManagedBuffer : public TensorBuffer {
public:
@@ -63,36 +94,15 @@ class TF_ManagedBuffer : public TensorBuffer {
bool OwnsMemory() const override { return false; }
};
-void* allocate_tensor(const char* operation, size_t len) {
- void* data =
- tensorflow::cpu_allocator()->AllocateRaw(EIGEN_MAX_ALIGN_BYTES, len);
- if (tensorflow::LogMemory::IsEnabled() && data != nullptr) {
- tensorflow::LogMemory::RecordRawAllocation(
- operation, tensorflow::LogMemory::EXTERNAL_TENSOR_ALLOCATION_STEP_ID,
- len, data, tensorflow::cpu_allocator());
- }
- return data;
-}
-
-void deallocate_buffer(void* data, size_t len, void* arg) {
- if (tensorflow::LogMemory::IsEnabled() && data != nullptr) {
- tensorflow::LogMemory::RecordRawDeallocation(
- "TensorFlow C Api",
- tensorflow::LogMemory::EXTERNAL_TENSOR_ALLOCATION_STEP_ID, data,
- tensorflow::cpu_allocator(), false);
- }
- tensorflow::cpu_allocator()->DeallocateRaw(data);
-}
-
} // namespace
-TF_Tensor::~TF_Tensor() { buffer->Unref(); }
-
TF_Tensor* TF_AllocateTensor(TF_DataType dtype, const int64_t* dims,
int num_dims, size_t len) {
- void* data = allocate_tensor("TF_AllocateTensor", len);
- return TF_NewTensor(dtype, dims, num_dims, data, len, deallocate_buffer,
- nullptr);
+ void* data = tensorflow::allocate_tensor("TF_AllocateTensor", len,
+ tensorflow::cpu_allocator());
+ return TF_NewTensor(dtype, dims, num_dims, data, len,
+ tensorflow::deallocate_buffer,
+ tensorflow::cpu_allocator());
}
TF_Tensor* TF_NewTensor(TF_DataType dtype, const int64_t* dims, int num_dims,
@@ -117,8 +127,8 @@ TF_Tensor* TF_NewTensor(TF_DataType dtype, const int64_t* dims, int num_dims,
//
// Other types have the same representation, so copy only if it is safe to
// do so.
- buf = new TF_ManagedBuffer(allocate_tensor("TF_NewTensor", len), len,
- deallocate_buffer, nullptr);
+ buf = new TF_ManagedBuffer(tensorflow::allocate_tensor("TF_NewTensor", len),
+ len, tensorflow::deallocate_buffer, nullptr);
std::memcpy(buf->data(), data, len);
// Free the original buffer.
deallocator(data, len, deallocator_arg);
@@ -126,9 +136,12 @@ TF_Tensor* TF_NewTensor(TF_DataType dtype, const int64_t* dims, int num_dims,
buf = new TF_ManagedBuffer(data, len, deallocator, deallocator_arg);
}
- TF_Tensor* ret = new TF_Tensor{dtype, tensorflow::TensorShape(dimvec), buf};
+ TF_Tensor* ret =
+ new TF_Tensor{Tensor(static_cast(dtype),
+ tensorflow::TensorShape(dimvec), buf)};
+ buf->Unref();
size_t elem_size = TF_DataTypeSize(dtype);
- if (elem_size > 0 && len < (elem_size * ret->shape.num_elements())) {
+ if (elem_size > 0 && len < (elem_size * ret->tensor.NumElements())) {
delete ret;
return nullptr;
}
@@ -139,7 +152,7 @@ TF_Tensor* TF_TensorMaybeMove(TF_Tensor* tensor) {
// It is safe to move the Tensor if and only if we own the unique reference to
// it. In that case, we might as well not delete and reallocate, but a future
// implementation might need to do so.
- TensorBuffer* buf = tensor->buffer;
+ TensorBuffer* buf = tensorflow::TensorCApi::Buffer(tensor->tensor);
if (buf->RefCountIsOne() && buf->root_buffer()->RefCountIsOne() &&
buf->OwnsMemory()) {
return tensor;
@@ -149,13 +162,23 @@ TF_Tensor* TF_TensorMaybeMove(TF_Tensor* tensor) {
void TF_DeleteTensor(TF_Tensor* t) { delete t; }
-TF_DataType TF_TensorType(const TF_Tensor* t) { return t->dtype; }
-int TF_NumDims(const TF_Tensor* t) { return t->shape.dims(); }
-int64_t TF_Dim(const TF_Tensor* t, int dim_index) {
- return static_cast(t->shape.dim_size(dim_index));
+TF_DataType TF_TensorType(const TF_Tensor* t) {
+ return static_cast(t->tensor.dtype());
+}
+
+int TF_NumDims(const TF_Tensor* t) { return t->tensor.dims(); }
+
+int64_t TF_Dim(const TF_Tensor* t, int dim_index) {
+ return static_cast(t->tensor.dim_size(dim_index));
+}
+
+size_t TF_TensorByteSize(const TF_Tensor* t) {
+ return tensorflow::TensorCApi::Buffer(t->tensor)->size();
+}
+
+void* TF_TensorData(const TF_Tensor* t) {
+ return tensorflow::TensorCApi::Buffer(t->tensor)->data();
}
-size_t TF_TensorByteSize(const TF_Tensor* t) { return t->buffer->size(); }
-void* TF_TensorData(const TF_Tensor* t) { return t->buffer->data(); }
int64_t TF_TensorElementCount(const TF_Tensor* t) {
int64_t result = 1;
@@ -166,63 +189,17 @@ int64_t TF_TensorElementCount(const TF_Tensor* t) {
return result;
}
-// Returns the number of elements that would be present in a tensor with the
-// given shape.
-static int64_t ShapeNumElements(const int64_t* dims, int num_dims) {
- int64_t result = 1;
- for (int dim = 0; dim < num_dims; ++dim) {
- result *= dims[dim];
- }
- return result;
-}
-
-static void UnrefIfNonNull(::tensorflow::TensorBuffer* buf) {
- if (buf != nullptr) {
- buf->Unref();
- }
-}
-
-static void RefIfNonNull(::tensorflow::TensorBuffer* buf) {
- if (buf != nullptr) {
- buf->Ref();
- }
-}
-
void TF_TensorBitcastFrom(const TF_Tensor* from, TF_DataType type,
TF_Tensor* to, const int64_t* new_dims,
int num_new_dims, TF_Status* status) {
TF_SetStatus(status, TF_OK, "");
- size_t in_size = TF_DataTypeSize(TF_TensorType(from));
- if (in_size == 0) {
- TF_SetStatus(status, TF_INVALID_ARGUMENT,
- "input tensor has a zero-sized data type");
- return;
- }
- size_t out_size = TF_DataTypeSize(type);
- if (out_size == 0) {
- TF_SetStatus(status, TF_INVALID_ARGUMENT,
- "output tensor has a zero-sized data type");
- return;
- }
-
- if (ShapeNumElements(new_dims, num_new_dims) * out_size !=
- TF_TensorElementCount(from) * in_size) {
- TF_SetStatus(status, TF_INVALID_ARGUMENT,
- "input tensor is not compatible with output shape");
- return;
- }
-
- tensorflow::TensorShapeProto p;
+ tensorflow::TensorShape s;
for (int i = 0; i < num_new_dims; ++i) {
- p.add_dim()->set_size(new_dims[i]);
- }
- to->shape = tensorflow::TensorShape(p);
- to->dtype = type;
- if (to->buffer != from->buffer) {
- UnrefIfNonNull(to->buffer);
- to->buffer = from->buffer;
- RefIfNonNull(to->buffer);
+ s.AddDim(new_dims[i]);
}
+ Status cc_status(to->tensor.BitcastFrom(
+ from->tensor, static_cast(type), s));
+ Set_TF_Status_from_Status(status, cc_status);
}
// --------------------------------------------------------------------------
@@ -332,17 +309,19 @@ TF_Tensor* TF_TensorFromTensor(const tensorflow::Tensor& src,
return t;
}
if (src.dtype() != tensorflow::DT_STRING) {
- TensorBuffer* buf = tensorflow::TensorCApi::Buffer(src);
- buf->Ref();
- return new TF_Tensor{static_cast(src.dtype()), src.shape(),
- buf};
+ auto* result = new TF_Tensor();
+ if (!result->tensor.CopyFrom(src, src.shape())) {
+ delete result;
+ return nullptr;
+ }
+ return result;
}
// DT_STRING tensors require a copying since TF_Tensor.buffer expects a flatly
// encoded sequence of strings.
// Compute bytes needed for encoding.
size_t size = 0;
- const auto& srcarray = src.flat();
+ const auto& srcarray = src.flat();
for (int i = 0; i < srcarray.size(); ++i) {
const string& s = srcarray(i);
// uint64 starting_offset, TF_StringEncode-d string.
@@ -393,14 +372,14 @@ TF_Tensor* TF_TensorFromTensor(const tensorflow::Tensor& src,
}
Status TF_TensorToTensor(const TF_Tensor* src, Tensor* dst) {
- if (src->dtype == TF_RESOURCE) {
- if (src->shape.dims() != 0) {
+ if (src->tensor.dtype() == DT_RESOURCE) {
+ if (src->tensor.dims() != 0) {
return InvalidArgument(
"Malformed TF_RESOURCE tensor: expected a scalar, got a tensor with "
"shape ",
- src->shape.DebugString());
+ src->tensor.shape().DebugString());
}
- *dst = Tensor(tensorflow::DT_RESOURCE, src->shape);
+ *dst = Tensor(tensorflow::DT_RESOURCE, src->tensor.shape());
if (!dst->scalar()().ParseFromString(
string(static_cast(TF_TensorData(src)),
TF_TensorByteSize(src)))) {
@@ -409,14 +388,13 @@ Status TF_TensorToTensor(const TF_Tensor* src, Tensor* dst) {
}
return Status::OK();
}
- if (src->dtype != TF_STRING) {
- *dst =
- tensorflow::TensorCApi::MakeTensor(src->dtype, src->shape, src->buffer);
+ if (src->tensor.dtype() != DT_STRING) {
+ *dst = src->tensor;
return Status::OK();
}
// TF_STRING tensors require copying since Tensor class expects a sequence of
// string objects.
- const tensorflow::int64 num_elements = src->shape.num_elements();
+ const tensorflow::int64 num_elements = src->tensor.NumElements();
const char* input = reinterpret_cast(TF_TensorData(src));
const size_t src_size = TF_TensorByteSize(src);
if (static_cast(src_size / sizeof(tensorflow::uint64)) <
@@ -427,8 +405,8 @@ Status TF_TensorToTensor(const TF_Tensor* src, Tensor* dst) {
const char* data_start = input + sizeof(tensorflow::uint64) * num_elements;
const char* limit = input + src_size;
- *dst = Tensor(static_cast(src->dtype), src->shape);
- auto dstarray = dst->flat();
+ *dst = Tensor(src->tensor.dtype(), src->tensor.shape());
+ auto dstarray = dst->flat();
for (tensorflow::int64 i = 0; i < num_elements; ++i) {
tensorflow::uint64 offset =
reinterpret_cast(input)[i];
@@ -447,3 +425,7 @@ Status TF_TensorToTensor(const TF_Tensor* src, Tensor* dst) {
}
} // namespace tensorflow
+
+bool TF_TensorIsAligned(const TF_Tensor* tensor) {
+ return tensor->tensor.IsAligned();
+}
diff --git a/tensorflow/c/tf_tensor.h b/tensorflow/c/tf_tensor.h
index 5d4f70c1b6b..462fdc8b497 100644
--- a/tensorflow/c/tf_tensor.h
+++ b/tensorflow/c/tf_tensor.h
@@ -16,6 +16,7 @@ limitations under the License.
#ifndef TENSORFLOW_C_TF_TENSOR_H_
#define TENSORFLOW_C_TF_TENSOR_H_
+#include
#include
#include "tensorflow/c/tf_datatype.h"
@@ -175,6 +176,9 @@ TF_CAPI_EXPORT extern size_t TF_StringDecode(const char* src, size_t src_len,
// TF_STRING tensor.
TF_CAPI_EXPORT extern size_t TF_StringEncodedSize(size_t len);
+// Returns bool iff this tensor is aligned.
+TF_CAPI_EXPORT extern bool TF_TensorIsAligned(const TF_Tensor*);
+
#ifdef __cplusplus
} /* end extern "C" */
#endif
diff --git a/tensorflow/c/tf_tensor_internal.h b/tensorflow/c/tf_tensor_internal.h
index 6def66c9412..ea7d49b5966 100644
--- a/tensorflow/c/tf_tensor_internal.h
+++ b/tensorflow/c/tf_tensor_internal.h
@@ -23,13 +23,12 @@ limitations under the License.
// Internal structures used by the C API. These are likely to change and should
// not be depended on.
-struct TF_Tensor {
- ~TF_Tensor();
-
- TF_DataType dtype;
- tensorflow::TensorShape shape;
- tensorflow::TensorBuffer* buffer;
-};
+// This struct forms part of the C API's public interface. It must strictly be
+// passed to or returned from C functions *by pointer*. Otherwise, changes to
+// its internal structure will break the C API's binary interface.
+typedef struct TF_Tensor {
+ ::tensorflow::Tensor tensor;
+} TF_Tensor;
namespace tensorflow {
@@ -42,5 +41,13 @@ class TensorCApi {
}
};
+// Allocates tensor data buffer using specified allocator.
+// `operation` is a name for this operation.
+void* allocate_tensor(const char* operation, size_t len, Allocator* allocator);
+
+// Deallocates tensor data buffer.
+// Defaults to deallocating using CPU allocator. You can pass pointer to
+// a different Allocator as `arg`.
+void deallocate_buffer(void* data, size_t len, void* arg);
} // namespace tensorflow
#endif // TENSORFLOW_C_TF_TENSOR_INTERNAL_H_
diff --git a/tensorflow/cc/BUILD b/tensorflow/cc/BUILD
index 07de89f997e..40b182c8acf 100644
--- a/tensorflow/cc/BUILD
+++ b/tensorflow/cc/BUILD
@@ -649,7 +649,6 @@ cc_library(
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core:op_gen_lib",
- "//tensorflow/core:proto_text",
"//tensorflow/core:protos_all_cc",
"@com_google_absl//absl/strings",
],
@@ -667,7 +666,6 @@ tf_cc_test(
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core:op_gen_lib",
- "//tensorflow/core:proto_text",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
diff --git a/tensorflow/cc/framework/cc_op_gen.cc b/tensorflow/cc/framework/cc_op_gen.cc
index a0353bf17a6..919e2dfc638 100644
--- a/tensorflow/cc/framework/cc_op_gen.cc
+++ b/tensorflow/cc/framework/cc_op_gen.cc
@@ -27,7 +27,7 @@ limitations under the License.
#include "tensorflow/core/framework/op_gen_lib.h"
#include "tensorflow/core/framework/tensor.pb.h"
#include "tensorflow/core/framework/tensor_shape.pb.h"
-#include "tensorflow/core/framework/types.pb_text.h"
+#include "tensorflow/core/framework/types.pb.h"
#include "tensorflow/core/lib/gtl/map_util.h"
#include "tensorflow/core/lib/gtl/stl_util.h"
#include "tensorflow/core/lib/hash/hash.h"
@@ -193,12 +193,12 @@ string PrintTensor(const TensorProto& tensor_proto) {
string ret;
for (int64 i = 0; i < num_elts; ++i) {
if (i > 0) strings::StrAppend(&ret, " ");
- strings::StrAppend(&ret, absl::CEscape(t.flat()(i)));
+ strings::StrAppend(&ret, absl::CEscape(t.flat()(i)));
}
return ret;
}
default: {
- LOG(FATAL) << "Not handling type " << EnumName_DataType(t.dtype());
+ LOG(FATAL) << "Not handling type " << DataType_Name(t.dtype());
return string();
}
}
@@ -223,7 +223,7 @@ string PrintAttrValue(const string& op, const AttrValue& attr_value) {
case AttrValue::kB:
return attr_value.b() ? "true" : "false";
case AttrValue::kType:
- return EnumName_DataType(attr_value.type());
+ return DataType_Name(attr_value.type());
case AttrValue::kShape:
return PrintTensorShape(attr_value.shape());
case AttrValue::kTensor:
@@ -254,8 +254,7 @@ string PrintAttrValue(const string& op, const AttrValue& attr_value) {
} else if (attr_value.list().type_size() > 0) {
for (int i = 0; i < attr_value.list().type_size(); ++i) {
if (i > 0) strings::StrAppend(&ret, ", ");
- strings::StrAppend(&ret,
- EnumName_DataType(attr_value.list().type(i)));
+ strings::StrAppend(&ret, DataType_Name(attr_value.list().type(i)));
}
} else if (attr_value.list().shape_size() > 0) {
for (int i = 0; i < attr_value.list().shape_size(); ++i) {
diff --git a/tensorflow/cc/framework/cc_ops_test.cc b/tensorflow/cc/framework/cc_ops_test.cc
index ac05e3cf95b..178b4da972a 100644
--- a/tensorflow/cc/framework/cc_ops_test.cc
+++ b/tensorflow/cc/framework/cc_ops_test.cc
@@ -200,10 +200,10 @@ TEST(CCOpTest, TemplatedConst) {
test::ExpectTensorEqual(
out, test::AsTensor({3.f, 2.f, -1.f, 0.f}, {2, 2}));
- auto c2 = ops::Const(root, {{"this"}, {"is"}, {"a"}, {"constant"}});
+ auto c2 = ops::Const(root, {{"this"}, {"is"}, {"a"}, {"constant"}});
test::GetTensor(root, c2, &out);
- test::ExpectTensorEqual(
- out, test::AsTensor({"this", "is", "a", "constant"}, {4, 1}));
+ test::ExpectTensorEqual(
+ out, test::AsTensor({"this", "is", "a", "constant"}, {4, 1}));
}
TEST(CCOpTest, EmptyConst) {
diff --git a/tensorflow/cc/framework/ops.cc b/tensorflow/cc/framework/ops.cc
index 920a8e79556..8516dfd7a29 100644
--- a/tensorflow/cc/framework/ops.cc
+++ b/tensorflow/cc/framework/ops.cc
@@ -97,7 +97,7 @@ Input::Initializer::Initializer(
Tensor elem = e.tensor;
if (first.tensor.dtype() == DT_STRING) {
for (int i = 0; i < elem.NumElements(); ++i) {
- t.flat()(offset + i) = elem.flat()(i);
+ t.flat()(offset + i) = elem.flat()(i);
}
offset += elem.NumElements();
} else {
diff --git a/tensorflow/cc/framework/ops.h b/tensorflow/cc/framework/ops.h
index 0717e7dd4b3..1414e861002 100644
--- a/tensorflow/cc/framework/ops.h
+++ b/tensorflow/cc/framework/ops.h
@@ -111,7 +111,7 @@ class Input {
Initializer(const T& v) { // NOLINT(runtime/explicit)
typedef typename RealType::type RealT;
Tensor t(DataTypeToEnum::v(), TensorShape());
- t.flat()(0) = RealT(v);
+ t.flat()(0) = RealT(v);
tensor = t;
}
@@ -125,7 +125,7 @@ class Input {
typedef typename RealType::type RealT;
Tensor t(DataTypeToEnum::v(), shape);
for (int64 i = 0; i < t.NumElements(); ++i) {
- t.flat()(i) = RealT(v);
+ t.flat()(i) = RealT(v);
}
tensor = t;
}
@@ -170,7 +170,7 @@ class Input {
// START_SKIP_DOXYGEN
template ::value>
struct RealType {
- typedef string type;
+ typedef tstring type;
};
template
diff --git a/tensorflow/cc/framework/scope.cc b/tensorflow/cc/framework/scope.cc
index e93ca8633e6..b5cac5fec28 100644
--- a/tensorflow/cc/framework/scope.cc
+++ b/tensorflow/cc/framework/scope.cc
@@ -272,7 +272,7 @@ std::unordered_set Scope::Impl::GetColocationConstraints(
std::unordered_set current_constraints(colocation_constraints_);
const AttrSlice attrs = colocate_with_op.node()->attrs();
std::vector node_constraints;
- if (GetNodeAttr(attrs, kColocationAttrName, &node_constraints).ok()) {
+ if (TryGetNodeAttr(attrs, kColocationAttrName, &node_constraints)) {
for (const string& entry : node_constraints) {
StringPiece s(entry);
if (absl::ConsumePrefix(&s, kColocationGroupPrefix)) {
@@ -299,7 +299,7 @@ const std::vector& Scope::control_deps() const {
return impl()->control_deps_;
}
-void Scope::UpdateStatus(const Status s) const {
+void Scope::UpdateStatus(const Status& s) const {
impl()->status_->Update(s);
if (impl()->exit_on_error_ && !ok()) {
LOG(FATAL) << *impl()->status_;
@@ -318,7 +318,7 @@ Status Scope::ToGraph(Graph* g, GraphConstructorOptions opts) const {
if (ok()) {
GraphDef graph_def;
graph()->ToGraphDef(&graph_def);
- UpdateStatus(ConvertGraphDefToGraph(opts, graph_def, g));
+ UpdateStatus(ConvertGraphDefToGraph(opts, std::move(graph_def), g));
}
return *impl()->status_;
}
diff --git a/tensorflow/cc/framework/scope.h b/tensorflow/cc/framework/scope.h
index ef2daff1357..63a555b7217 100644
--- a/tensorflow/cc/framework/scope.h
+++ b/tensorflow/cc/framework/scope.h
@@ -177,7 +177,7 @@ class Scope {
/// Note: The status object is shared between all children of this scope.
/// If the resulting status is not Status::OK() and exit_on_error_ is set on
/// this scope, this function exits by calling LOG(FATAL).
- void UpdateStatus(const Status s) const;
+ void UpdateStatus(const Status& s) const;
// START_SKIP_DOXYGEN
diff --git a/tensorflow/cc/ops/const_op_test.cc b/tensorflow/cc/ops/const_op_test.cc
index 69b5d7fd47c..345cd23b9ec 100644
--- a/tensorflow/cc/ops/const_op_test.cc
+++ b/tensorflow/cc/ops/const_op_test.cc
@@ -97,7 +97,7 @@ TEST(ConstOpTest, WithExplicitShape) {
auto d = ops::Const(root, {"1", "2", "3", "4", "5", "6"}, {2, 3});
TF_CHECK_OK(root.status());
EXPECT_EQ(d.op().output_type(0), DT_STRING);
- ExpectNodeEqual(d.node(), {"1", "2", "3", "4", "5", "6"}, {2, 3});
+ ExpectNodeEqual(d.node(), {"1", "2", "3", "4", "5", "6"}, {2, 3});
}
TEST(ConstOpTest, FromProto) {
@@ -144,7 +144,7 @@ TEST(ConstOpTest, TemplatedConst) {
auto c1 = ops::Const(root, {1, 2});
ExpectTypeAndShape(c1.node(), DT_INT32, {2});
- auto c2 = ops::Const(root, {{"this"}, {"is"}, {"a"}, {"constant"}});
+ auto c2 = ops::Const(root, {{"this"}, {"is"}, {"a"}, {"constant"}});
ExpectTypeAndShape(c2.node(), DT_STRING, {4, 1});
}
diff --git a/tensorflow/cc/profiler/BUILD b/tensorflow/cc/profiler/BUILD
index d18a0bcab0c..5b4a105eb28 100644
--- a/tensorflow/cc/profiler/BUILD
+++ b/tensorflow/cc/profiler/BUILD
@@ -9,6 +9,7 @@ tf_cuda_cc_test(
name = "profiler_test",
srcs = ["profiler_test.cc"],
tags = [
+ "no_rocm", # stream level tracing not supported on ROCm
"nogpu", # b/77649654
],
deps = [
diff --git a/tensorflow/cc/saved_model/BUILD b/tensorflow/cc/saved_model/BUILD
index 01752b65f2f..39b84922d13 100644
--- a/tensorflow/cc/saved_model/BUILD
+++ b/tensorflow/cc/saved_model/BUILD
@@ -10,7 +10,7 @@ load(
"tf_cc_test",
)
load(
- "//tensorflow/core:platform/default/build_config_root.bzl",
+ "//tensorflow/core/platform:default/build_config_root.bzl",
"if_static",
"if_static_and_not_mobile",
)
diff --git a/tensorflow/cc/saved_model/loader.cc b/tensorflow/cc/saved_model/loader.cc
index dfc7ccd9542..a3b80fbdba5 100644
--- a/tensorflow/cc/saved_model/loader.cc
+++ b/tensorflow/cc/saved_model/loader.cc
@@ -75,7 +75,7 @@ Status LoadMetaGraphIntoSession(const MetaGraphDef& meta_graph_def,
Tensor CreateStringTensor(const string& value) {
Tensor tensor(DT_STRING, TensorShape({}));
- tensor.scalar()() = value;
+ tensor.scalar()() = value;
return tensor;
}
@@ -219,7 +219,7 @@ Status RunRestore(const RunOptions& run_options, const string& export_dir,
// Add variables to the graph.
Tensor variables_path_tensor(DT_STRING, TensorShape({}));
- variables_path_tensor.scalar()() = variables_path;
+ variables_path_tensor.scalar()() = variables_path;
std::vector> inputs = {
{string(variable_filename_const_op_name), variables_path_tensor}};
diff --git a/tensorflow/cc/saved_model/loader_test.cc b/tensorflow/cc/saved_model/loader_test.cc
index 422994ba07c..aa2031d17d2 100644
--- a/tensorflow/cc/saved_model/loader_test.cc
+++ b/tensorflow/cc/saved_model/loader_test.cc
@@ -63,8 +63,8 @@ class LoaderTest : public ::testing::Test {
bundle.session->Run({}, {"filename_tensor:0"}, {}, &path_outputs));
ASSERT_EQ(1, path_outputs.size());
- test::ExpectTensorEqual(
- test::AsTensor({"foo.txt"}, TensorShape({})), path_outputs[0]);
+ test::ExpectTensorEqual(
+ test::AsTensor({"foo.txt"}, TensorShape({})), path_outputs[0]);
}
void CheckSavedModelBundle(const string& export_dir,
@@ -78,14 +78,14 @@ class LoaderTest : public ::testing::Test {
const string output_name =
signature_def.outputs().at(kRegressOutputs).name();
- std::vector serialized_examples;
+ std::vector serialized_examples;
for (float x : {0, 1, 2, 3}) {
serialized_examples.push_back(MakeSerializedExample(x));
}
// Validate the half plus two behavior.
Tensor input =
- test::AsTensor(serialized_examples, TensorShape({4}));
+ test::AsTensor(serialized_examples, TensorShape({4}));
std::vector outputs;
TF_ASSERT_OK(bundle.session->Run({{input_name, input}}, {output_name}, {},
&outputs));
diff --git a/tensorflow/cc/saved_model/python/BUILD b/tensorflow/cc/saved_model/python/BUILD
index fca45c869fd..b1440655c72 100644
--- a/tensorflow/cc/saved_model/python/BUILD
+++ b/tensorflow/cc/saved_model/python/BUILD
@@ -1,7 +1,7 @@
# Description:
# CLIF wrappers for TensorFlow SavedModels.
-load("//tensorflow/core:platform/default/build_config.bzl", "tf_py_clif_cc")
+load("//tensorflow/core/platform:default/build_config.bzl", "tf_py_clif_cc")
package(
default_visibility = ["//visibility:public"],
diff --git a/tensorflow/cc/saved_model/reader.cc b/tensorflow/cc/saved_model/reader.cc
index 799856f7fd4..d6d99229372 100644
--- a/tensorflow/cc/saved_model/reader.cc
+++ b/tensorflow/cc/saved_model/reader.cc
@@ -48,12 +48,12 @@ Status ReadSavedModel(const string& export_dir, SavedModel* saved_model_proto) {
export_dir);
}
-Status FindMetaGraphDef(const SavedModel& saved_model_proto,
- const std::unordered_set& tags,
+Status FindMetaGraphDef(const std::unordered_set& tags,
+ SavedModel* saved_model_proto,
MetaGraphDef* meta_graph_def) {
LOG(INFO) << "Reading meta graph with tags { " << absl::StrJoin(tags, " ")
<< " }";
- for (const MetaGraphDef& graph_def : saved_model_proto.meta_graphs()) {
+ for (MetaGraphDef& graph_def : *saved_model_proto->mutable_meta_graphs()) {
// Get tags from the graph_def.
std::unordered_set graph_tags;
for (const string& tag : graph_def.meta_info_def().tags()) {
@@ -61,7 +61,7 @@ Status FindMetaGraphDef(const SavedModel& saved_model_proto,
}
// Match with the set of tags provided.
if (graph_tags == tags) {
- *meta_graph_def = graph_def;
+ *meta_graph_def = std::move(graph_def);
return Status::OK();
}
}
@@ -81,7 +81,8 @@ Status ReadMetaGraphDefFromSavedModel(const string& export_dir,
MetaGraphDef* const meta_graph_def) {
SavedModel saved_model_proto;
TF_RETURN_IF_ERROR(ReadSavedModel(export_dir, &saved_model_proto));
- TF_RETURN_IF_ERROR(FindMetaGraphDef(saved_model_proto, tags, meta_graph_def));
+ TF_RETURN_IF_ERROR(
+ FindMetaGraphDef(tags, &saved_model_proto, meta_graph_def));
return Status::OK();
}
diff --git a/tensorflow/cc/tools/freeze_saved_model.cc b/tensorflow/cc/tools/freeze_saved_model.cc
index eeb91017890..0ec48ec9357 100644
--- a/tensorflow/cc/tools/freeze_saved_model.cc
+++ b/tensorflow/cc/tools/freeze_saved_model.cc
@@ -42,6 +42,10 @@ void GetTensorNamesFromTensorInfo(const TensorInfo& tensor_info,
tensor_names->insert(coo_sparse.values_tensor_name());
tensor_names->insert(coo_sparse.indices_tensor_name());
tensor_names->insert(coo_sparse.dense_shape_tensor_name());
+ } else if (tensor_info.has_composite_tensor()) {
+ for (const auto& component : tensor_info.composite_tensor().components()) {
+ tensor_names->insert(component.name());
+ }
} else {
tensor_names->insert(tensor_info.name());
}
diff --git a/tensorflow/cc/tools/freeze_saved_model_test.cc b/tensorflow/cc/tools/freeze_saved_model_test.cc
index 979b23c3fc5..274a1630a05 100644
--- a/tensorflow/cc/tools/freeze_saved_model_test.cc
+++ b/tensorflow/cc/tools/freeze_saved_model_test.cc
@@ -425,5 +425,63 @@ TEST_F(FreezeTest, GraphDefWithAndWithoutDependentResourceVariables) {
TestFreezeGraphWithAndWithoutDependentVariables(true);
}
+TEST_F(FreezeTest, InputsAndOutputsCompositeTensorSignatureDef) {
+ // Test that inputs and outputs get correctly populated for a
+ // SignatureDef containing composite tensor inputs and outputs.
+ SavedModelBundle saved_model_bundle;
+ SignatureDef signature_def;
+
+ TensorInfo& in = (*signature_def.mutable_inputs())["input_arg"];
+ in.mutable_composite_tensor()->add_components()->set_name("input1:0");
+ in.mutable_composite_tensor()->add_components()->set_name("input2:0");
+
+ TensorInfo& out = (*signature_def.mutable_outputs())["output_arg"];
+ out.mutable_composite_tensor()->add_components()->set_name("output2:0");
+ out.mutable_composite_tensor()->add_components()->set_name("output1:0");
+
+ AddSignatureDefToSavedModelBundle(signature_def, "signature_def",
+ &saved_model_bundle);
+ GraphDef frozen_graph_def;
+ std::unordered_set inputs;
+ std::unordered_set outputs;
+ TF_ASSERT_OK(FreezeSavedModel(saved_model_bundle, &frozen_graph_def, &inputs,
+ &outputs));
+ std::unordered_set expected_inputs = {"input1:0", "input2:0"};
+ std::unordered_set expected_outputs = {"output1:0", "output2:0"};
+ EXPECT_EQ(expected_inputs, inputs);
+ EXPECT_EQ(expected_outputs, outputs);
+}
+
+TEST_F(FreezeTest, InputsAndOutputsSparseCooSignatureDef) {
+ // Test that inputs and outputs get correctly populated for a
+ // SignatureDef containing composite tensor inputs and outputs.
+ SavedModelBundle saved_model_bundle;
+ SignatureDef signature_def;
+
+ TensorInfo& in = (*signature_def.mutable_inputs())["input_arg"];
+ in.mutable_coo_sparse()->set_values_tensor_name("input1:0");
+ in.mutable_coo_sparse()->set_indices_tensor_name("input2:0");
+ in.mutable_coo_sparse()->set_dense_shape_tensor_name("input3:0");
+
+ TensorInfo& out = (*signature_def.mutable_outputs())["output_arg"];
+ out.mutable_coo_sparse()->set_values_tensor_name("output1:0");
+ out.mutable_coo_sparse()->set_indices_tensor_name("output2:0");
+ out.mutable_coo_sparse()->set_dense_shape_tensor_name("output3:0");
+
+ AddSignatureDefToSavedModelBundle(signature_def, "signature_def",
+ &saved_model_bundle);
+ GraphDef frozen_graph_def;
+ std::unordered_set inputs;
+ std::unordered_set outputs;
+ TF_ASSERT_OK(FreezeSavedModel(saved_model_bundle, &frozen_graph_def, &inputs,
+ &outputs));
+ std::unordered_set expected_inputs = {"input1:0", "input2:0",
+ "input3:0"};
+ std::unordered_set expected_outputs = {"output1:0", "output2:0",
+ "output3:0"};
+ EXPECT_EQ(expected_inputs, inputs);
+ EXPECT_EQ(expected_outputs, outputs);
+}
+
} // namespace
} // namespace tensorflow
diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD
index 88b00cb2eea..bff56bdda89 100644
--- a/tensorflow/compiler/jit/BUILD
+++ b/tensorflow/compiler/jit/BUILD
@@ -1,7 +1,7 @@
load("//tensorflow:tensorflow.bzl", "tf_cc_test", "cc_header_only_library")
load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library", "tf_jit_compilation_passes_extra_deps")
-load("//tensorflow/core:platform/default/build_config.bzl", "tf_additional_all_protos", "tf_proto_library")
+load("//tensorflow/core/platform:default/build_config.bzl", "tf_additional_all_protos", "tf_proto_library")
package(
default_visibility = [
@@ -144,8 +144,57 @@ cc_library(
],
)
+XLA_DEVICE_DEPS = [
+ ":common",
+ ":xla_launch_util",
+ ":xla_tensor",
+ "@com_google_absl//absl/memory",
+ "@com_google_absl//absl/synchronization",
+ "@com_google_absl//absl/types:optional",
+ "//tensorflow/compiler/jit/ops:xla_ops",
+ "//tensorflow/compiler/tf2xla:common",
+ "//tensorflow/compiler/tf2xla:tf2xla_util",
+ "//tensorflow/compiler/tf2xla:xla_compiler",
+ "//tensorflow/compiler/tf2xla/kernels:xla_ops",
+ "//tensorflow/compiler/xla:util",
+ "//tensorflow/compiler/xla/client:client_library",
+ "//tensorflow/compiler/xla/client:global_data",
+ "//tensorflow/compiler/xla/client:local_client",
+ "//tensorflow/compiler/xla/service:stream_pool",
+ "//tensorflow/core:array_ops_op_lib",
+ "//tensorflow/core:control_flow_ops_op_lib",
+ "//tensorflow/core:core_cpu",
+ "//tensorflow/core:core_cpu_internal",
+ "//tensorflow/core:dataset_ops_op_lib",
+ "//tensorflow/core:framework",
+ "//tensorflow/core:functional_ops_op_lib",
+ "//tensorflow/core:lib",
+ "//tensorflow/core:lib_internal",
+ "//tensorflow/core:math_ops_op_lib",
+ "//tensorflow/core:nn_ops_op_lib",
+ "//tensorflow/core:no_op_op_lib",
+ "//tensorflow/core:protos_all_cc",
+ "//tensorflow/core:resource_variable_ops_op_lib",
+ "//tensorflow/core:sendrecv_ops_op_lib",
+ "//tensorflow/core:state_ops_op_lib",
+ "//tensorflow/core:stream_executor_no_cuda",
+ "//tensorflow/core/kernels:constant_op",
+ "//tensorflow/core/kernels:fifo_queue",
+ "//tensorflow/core/kernels:function_ops",
+ "//tensorflow/core/kernels:identity_op",
+ "//tensorflow/core/kernels:resource_variable_ops",
+ "//tensorflow/core/kernels:shape_ops",
+ "//tensorflow/core/kernels:variable_ops",
+ "//tensorflow/core/kernels/data:generator_dataset_op",
+ "//tensorflow/core/kernels/data:iterator_ops",
+ "//tensorflow/core/kernels/data:optional_ops",
+ "//tensorflow/core/kernels/data:prefetch_dataset_op",
+ "//tensorflow/core/profiler/lib:traceme",
+ "//tensorflow/stream_executor/platform",
+]
+
cc_library(
- name = "xla_device",
+ name = "xla_device_no_jit_rewrite_registration",
srcs = [
"xla_compile_on_demand_op.cc",
"xla_device.cc",
@@ -158,56 +207,22 @@ cc_library(
"xla_device_context.h",
"xla_device_ops.h",
],
+ deps = XLA_DEVICE_DEPS,
+)
+
+cc_library(
+ name = "xla_device",
+ hdrs = [
+ "xla_compile_on_demand_op.h",
+ "xla_device.h",
+ "xla_device_context.h",
+ "xla_device_ops.h",
+ ],
# Public visibility is needed for external TF/XLA backends.
visibility = ["//visibility:public"],
- deps = [
- ":common",
+ deps = XLA_DEVICE_DEPS + [
":jit_compilation_passes",
- ":xla_launch_util",
- ":xla_tensor",
- "//tensorflow/compiler/jit/ops:xla_ops",
- "//tensorflow/compiler/tf2xla:common",
- "//tensorflow/compiler/tf2xla:tf2xla_util",
- "//tensorflow/compiler/tf2xla:xla_compiler",
- "//tensorflow/compiler/tf2xla/kernels:xla_ops",
- "//tensorflow/compiler/xla:util",
- "//tensorflow/compiler/xla/client:client_library",
- "//tensorflow/compiler/xla/client:global_data",
- "//tensorflow/compiler/xla/client:local_client",
- "//tensorflow/compiler/xla/service:stream_pool",
- "//tensorflow/core:array_ops_op_lib",
- "//tensorflow/core:control_flow_ops_op_lib",
- "//tensorflow/core:core_cpu",
- "//tensorflow/core:core_cpu_internal",
- "//tensorflow/core:dataset_ops_op_lib",
- "//tensorflow/core:framework",
- "//tensorflow/core:functional_ops_op_lib",
- "//tensorflow/core:lib",
- "//tensorflow/core:lib_internal",
- "//tensorflow/core:math_ops_op_lib",
- "//tensorflow/core:nn_ops_op_lib",
- "//tensorflow/core:no_op_op_lib",
- "//tensorflow/core:protos_all_cc",
- "//tensorflow/core:resource_variable_ops_op_lib",
- "//tensorflow/core:sendrecv_ops_op_lib",
- "//tensorflow/core:state_ops_op_lib",
- "//tensorflow/core:stream_executor_no_cuda",
- "//tensorflow/core/kernels:constant_op",
- "//tensorflow/core/kernels:fifo_queue",
- "//tensorflow/core/kernels:function_ops",
- "//tensorflow/core/kernels:identity_op",
- "//tensorflow/core/kernels:resource_variable_ops",
- "//tensorflow/core/kernels:shape_ops",
- "//tensorflow/core/kernels:variable_ops",
- "//tensorflow/core/kernels/data:generator_dataset_op",
- "//tensorflow/core/kernels/data:iterator_ops",
- "//tensorflow/core/kernels/data:optional_ops",
- "//tensorflow/core/kernels/data:prefetch_dataset_op",
- "//tensorflow/core/profiler/lib:traceme",
- "//tensorflow/stream_executor/platform",
- "@com_google_absl//absl/memory",
- "@com_google_absl//absl/synchronization",
- "@com_google_absl//absl/types:optional",
+ ":xla_device_no_jit_rewrite_registration",
],
)
@@ -281,6 +296,7 @@ cc_library(
hdrs = ["xla_compilation_cache.h"],
deps = [
":xla_activity_listener",
+ ":xla_activity_proto_cc",
"//tensorflow/compiler/tf2xla:common",
"//tensorflow/compiler/tf2xla:xla_compiler",
"//tensorflow/compiler/xla:statusor",
@@ -292,6 +308,8 @@ cc_library(
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core:protos_all_cc",
+ "//tensorflow/core/platform:logging",
+ "@com_google_absl//absl/base",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/types:optional",
@@ -324,17 +342,21 @@ cc_library(
alwayslink = 1,
)
+# Linked by tensorflow core, without registration of jit compilation passes
+# which is not necessary to create and run a XlaLocalLaunchBase kernel.
+# Linking jit compilation passes could cause programs stuck right now (b/140069592).
cc_library(
- name = "xla_kernel_creator",
+ name = "xla_kernel_creator_util",
srcs = [
- "xla_kernel_creator.cc",
- "xla_kernel_creator.h",
+ "xla_kernel_creator_util.cc",
],
+ hdrs = ["xla_kernel_creator_util.h"],
+ visibility = ["//tensorflow/core/common_runtime/eager:__pkg__"],
deps = [
":common",
":compilability_check_util",
":compilation_passes",
- "//tensorflow/compiler/jit/kernels:xla_ops",
+ "//tensorflow/compiler/jit/kernels:xla_ops_no_jit_rewrite_registration",
"//tensorflow/compiler/tf2xla:xla_compiler",
"//tensorflow/core:core_cpu_internal",
"//tensorflow/core:framework",
@@ -347,6 +369,23 @@ cc_library(
alwayslink = 1,
)
+cc_library(
+ name = "xla_kernel_creator",
+ srcs = [
+ "xla_kernel_creator.cc",
+ "xla_kernel_creator.h",
+ ],
+ deps = [
+ ":jit_compilation_passes",
+ ":xla_kernel_creator_util",
+ "//tensorflow/core:core_cpu_internal",
+ "//tensorflow/core:framework",
+ "//tensorflow/core:lib",
+ "//tensorflow/core:protos_all_cc",
+ ],
+ alwayslink = 1,
+)
+
tf_cc_test(
name = "xla_kernel_creator_test",
srcs = [
@@ -498,6 +537,7 @@ cc_library(
srcs = [
"build_xla_ops_pass.cc",
"clone_constants_for_better_clustering.cc",
+ "cluster_scoping_pass.cc",
"deadness_analysis.cc",
"deadness_analysis_internal.h",
"encapsulate_subgraphs_pass.cc",
@@ -513,6 +553,7 @@ cc_library(
hdrs = [
"build_xla_ops_pass.h",
"clone_constants_for_better_clustering.h",
+ "cluster_scoping_pass.h",
"deadness_analysis.h",
"encapsulate_subgraphs_pass.h",
"encapsulate_xla_computations_pass.h",
@@ -677,6 +718,7 @@ tf_cc_test(
srcs = [
"build_xla_ops_pass_test.cc",
"clone_constants_for_better_clustering_test.cc",
+ "cluster_scoping_pass_test.cc",
"encapsulate_subgraphs_pass_test.cc",
"encapsulate_xla_computations_pass_test.cc",
"extract_outside_compilation_pass_test.cc",
@@ -800,6 +842,8 @@ cc_library(
":flags",
":resource_operation_safety_analysis",
":union_find",
+ ":xla_activity_listener",
+ ":xla_activity_proto_cc",
":xla_cluster_util",
"//tensorflow/compiler/jit/graphcycles",
"//tensorflow/compiler/tf2xla:resource_operation_table",
@@ -837,6 +881,7 @@ tf_cc_test(
"//tensorflow/core:core_cpu",
"//tensorflow/core:framework",
"//tensorflow/core:ops",
+ "//tensorflow/core:protos_all_proto_cc",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"@com_google_absl//absl/memory",
@@ -901,6 +946,7 @@ cc_library(
srcs = ["xla_activity_logging_listener.cc"],
deps = [
":xla_activity_listener",
+ ":xla_activity_proto_cc",
"//tensorflow/core:logger",
"@com_google_absl//absl/memory",
],
diff --git a/tensorflow/compiler/jit/build_xla_ops_pass.cc b/tensorflow/compiler/jit/build_xla_ops_pass.cc
index 1265ff9138a..61695d532d1 100644
--- a/tensorflow/compiler/jit/build_xla_ops_pass.cc
+++ b/tensorflow/compiler/jit/build_xla_ops_pass.cc
@@ -48,6 +48,19 @@ limitations under the License.
namespace tensorflow {
namespace {
+struct DebuggingOpts {
+ // If true, insert Print nodes to print every output from an XLA cluster.
+ bool print_outputs;
+
+ // If true, insert CheckNumerics nodes for every floating point typed input to
+ // an XLA cluster.
+ bool check_input_numerics;
+
+ // If true, insert CheckNumerics nodes for every floating point typed output
+ // from an XLA cluster.
+ bool check_output_numerics;
+};
+
void MoveOutgoingEdges(Graph* g, Node* old_node, Node* new_node) {
std::vector out_edges(old_node->out_edges().begin(),
old_node->out_edges().end());
@@ -78,7 +91,8 @@ Operation DataToControl(const Scope& scope, Output data) {
// Replaces each outgoing edge from `old_node` with a merge node that merges in
// the corresponding output from `new_node`.
void MergeOutgoingDataEdges(const Scope& s, Node* old_node, Node* new_node,
- bool insert_print_nodes) {
+ absl::string_view cluster_name,
+ const DebuggingOpts& debugging_opts) {
if (!s.status().ok()) {
return;
}
@@ -93,23 +107,36 @@ void MergeOutgoingDataEdges(const Scope& s, Node* old_node, Node* new_node,
int oidx = e->src_output();
Output merged_output = merged_outputs[oidx];
if (merged_output.node() == nullptr) {
- ops::Merge merge_op(s.WithOpName(absl::StrCat("merge_oidx_", oidx)),
- {Output(old_node, oidx), Output(new_node, oidx)});
- if (insert_print_nodes) {
+ Output new_output(new_node, oidx);
+ if (debugging_opts.print_outputs) {
string cpu_device = "/job:localhost/replica:0/task:0/device:CPU:0";
- ops::Print print_op(s.WithOpName(absl::StrCat("print_", oidx))
+ ops::Print print_op(s.WithOpName("print_", oidx)
.WithDevice(cpu_device)
.WithAssignedDevice(cpu_device),
- merge_op.output, {merge_op.output},
+ new_output, {new_output},
ops::Print::Attrs{}
.Message(absl::StrCat("output ", oidx, " from ",
old_node->name(), " is "))
.FirstN(1000)
.Summarize(-1));
- merged_output = merged_outputs[oidx] = print_op;
- } else {
- merged_output = merged_outputs[oidx] = merge_op.output;
+ new_output = print_op;
}
+
+ if (debugging_opts.check_output_numerics &&
+ DataTypeIsFloating(new_output.type())) {
+ ops::CheckNumerics check_numerics_op(
+ s.WithOpName("check_output_", oidx)
+ .WithDevice(new_node->requested_device())
+ .WithAssignedDevice(new_node->assigned_device_name()),
+ new_output,
+ absl::StrCat("CheckNumerics failed for output ", oidx, "(",
+ new_output.name(), ") from cluster ", cluster_name));
+ new_output = check_numerics_op;
+ }
+
+ ops::Merge merge_op(s.WithOpName("merge_oidx_", oidx),
+ {Output(old_node, oidx), new_output});
+ merged_output = merged_outputs[oidx] = merge_op.output;
}
Node* dst = e->dst();
@@ -324,11 +351,34 @@ xla::StatusOr InferDeviceForCluster(
return result;
}
+std::vector