diff --git a/.bazelrc b/.bazelrc
index 1dd928acdb4..1b9f5e87c6b 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -18,8 +18,10 @@
#
# Compiler options:
# cuda_clang: Use clang when building CUDA code.
-# c++17: Build with C++17 options
-# c++1z: Build with C++17 options
+# c++17: Build with C++17 options (links with libc++)
+# c++1z: Build with C++17 options (links with libc++)
+# c++17_gcc: Build with C++17 options (links with stdlibc++)
+# c++1z_gcc: Build with C++17 options (links with stdlibc++)
# avx_linux: Build with avx instruction set on linux.
# avx2_linux: Build with avx2 instruction set on linux.
# native_arch_linux: Build with instruction sets available to the host machine on linux
@@ -28,6 +30,7 @@
#
# Other build options:
# short_logs: Only log errors during build, skip warnings.
+# verbose_logs: Show all compiler warnings during build.
# monolithic: Build all TF C++ code into a single shared object.
# dynamic_kernels: Try to link all kernels dynamically (experimental).
# libc++: Link against libc++ instead of stdlibc++
@@ -78,7 +81,16 @@
# elinux: General Embedded Linux options shared by all flavors.
# elinux_aarch64: Embedded Linux options for aarch64 (ARM64) CPU support.
# elinux_armhf: Embedded Linux options for armhf (ARMv7) CPU support.
-
+#
+# Release build options (for all operating systems)
+# release_common: Common options for all builds on all operating systems.
+# release_windows_common: Common options for all builds on Windows.
+# release_gpu_common: Common options for GPU builds on Linux and Windows.
+# release_cpu_linux: Toolchain and CUDA options for Linux CPU builds.
+# release_cpu_macos: Toolchain and CUDA options for MacOS CPU builds.
+# release_gpu_linux: Toolchain and CUDA options for Linux GPU builds.
+# release_cpu_windows: Toolchain and CUDA options for Windows CPU builds.
+# release_gpu_windows: Toolchain and CUDA options for Windows GPU builds.
# Allow builds using libc++ as a linker library
# This is mostly for OSSFuzz, so we also pass in the flags from environment to clean build file
@@ -155,14 +167,29 @@ build:mkl -c opt
# config to build OneDNN backend with a user specified threadpool.
build:mkl_threadpool --define=build_with_mkl=true --define=enable_mkl=true
build:mkl_threadpool --define=tensorflow_mkldnn_contraction_kernel=0
+build:mkl_threadpool --define=build_with_mkl_dnn_v1_only=true
+build:mkl_threadpool --define=build_with_mkl_opensource=true
build:mkl_threadpool --define=build_with_mkldnn_threadpool=true
build:mkl_threadpool -c opt
+
+# Config setting to build with oneDNN and without the binary blob
+build:mkl_opensource_only --define=build_with_mkl=true --define=enable_mkl=true
+build:mkl_opensource_only --define=tensorflow_mkldnn_contraction_kernel=0
+build:mkl_opensource_only --define=build_with_mkl_dnn_v1_only=true
+build:mkl_opensource_only --define=build_with_mkl_opensource=true
+build:mkl_opensource_only -c opt
+
# This config refers to building with CUDA available. It does not necessarily
# mean that we build CUDA op kernels.
build:using_cuda --define=using_cuda=true
build:using_cuda --action_env TF_NEED_CUDA=1
build:using_cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
+# Enable the mlir generated GPU kernels only for cuda builds.
+build --define=tensorflow_enable_mlir_generated_gpu_kernels=0
+# This is a more specific option, so it takes precedence over the line above for cuda builds.
+build:using_cuda --define=tensorflow_enable_mlir_generated_gpu_kernels=1
+
# This config refers to building CUDA op kernels with nvcc.
build:cuda --config=using_cuda
build:cuda --define=using_cuda_nvcc=true
@@ -253,6 +280,8 @@ build:dynamic_kernels --copt=-DAUTOLOAD_DYNAMIC_KERNELS
build:c++17 --cxxopt=-std=c++1z
build:c++17 --cxxopt=-stdlib=libc++
build:c++1z --config=c++17
+build:c++17_gcc --cxxopt=-std=c++1z
+build:c++1z_gcc --config=c++17_gcc
# Enable using platform specific build settings, except when cross-compiling for
# mobile platforms.
@@ -322,6 +351,8 @@ build:windows --distinct_host_configuration=false
# Suppress all warning messages.
build:short_logs --output_filter=DONT_MATCH_ANYTHING
+build:verbose_logs --output_filter=
+build --config=short_logs
# Instruction set optimizations
# TODO(gunan): Create a feature in toolchains for avx/avx2 to
@@ -341,7 +372,6 @@ build --config=v2
test --config=v2
# Enable XLA
-build:xla --action_env=TF_ENABLE_XLA=1
build:xla --define=with_xla_support=true
# BEGIN TF REMOTE BUILD EXECUTION OPTIONS
@@ -534,3 +564,43 @@ try-import %workspace%/.tf_configure.bazelrc
# Put user-specific options in .bazelrc.user
try-import %workspace%/.bazelrc.user
+
+# Here are bazelrc configs for release builds
+build:release_common --config=opt
+build:release_common --config=v2
+build:release_common --distinct_host_configuration=false
+build:release_common --action_env TF_CONFIGURE_IOS="0"
+
+build:release_cpu_linux --config=release_common
+build:release_cpu_linux --config=avx_linux
+# We use the same toolchain for CPU/GPU packages.
+# Did not add this to the defaults in case this changes.
+build:release_cpu_linux --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain
+
+build:release_cpu_macos --config=release_common
+build:release_cpu_macos --config=avx_linux
+
+build:release_gpu_common --config=release_common
+build:release_gpu_common --config=cuda
+build:release_gpu_common --config=tensorrt
+build:release_gpu_common --action_env CUDA_TOOLKIT_PATH="/usr/local/cuda-10.1"
+build:release_gpu_common --action_env=TF_CUDA_VERSION="10"
+build:release_gpu_common --action_env=TF_CUDNN_VERSION="7"
+build:release_gpu_common --action_env=TF_NEED_TENSORRT="1"
+build:release_gpu_common --action_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_35,sm_37,sm_52,sm_60,sm_61,compute_70"
+build:release_gpu_common --action_env=TENSORRT_INSTALL_PATH="/usr/local/tensorrt"
+build:release_gpu_common --action_env=LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/tensorrt/lib"
+build:release_gpu_common --action_env=GCC_HOST_COMPILER_PATH="/usr/bin/gcc-5"
+
+
+build:release_gpu_linux --config=release_gpu_common
+build:release_gpu_linux --config=avx_linux
+build:release_gpu_linux --crosstool_top=//third_party/toolchains/preconfig/ubuntu16.04/gcc7_manylinux2010-nvcc-cuda10.1:toolchain
+
+build:release_windows_common --config=release_common
+build:release_windows_common --define=no_tensorflow_py_deps=true
+build:release_windows_common --announce_rc
+
+build:release_cpu_windows --config=release_windows_common
+
+build:release_gpu_windows --config=release_windows_common
diff --git a/README.md b/README.md
index 9cf595bbf61..6398e8e27a1 100644
--- a/README.md
+++ b/README.md
@@ -123,20 +123,21 @@ Build Type | Status
### Community Supported Builds
-Build Type | Status | Artifacts
------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------
-**Linux AMD ROCm GPU** Nightly | [](http://ml-ci.amd.com:21096/job/tensorflow-rocm-nightly) | [Nightly](http://ml-ci.amd.com:21096/job/tensorflow-rocm-nightly/lastSuccessfulBuild/)
-**Linux AMD ROCm GPU** Stable Release | [](http://ml-ci.amd.com:21096/job/tensorflow-rocm-release/) | Release [1.15](http://ml-ci.amd.com:21096/job/tensorflow-rocm-release/lastSuccessfulBuild/) / [2.x](http://ml-ci.amd.com:21096/job/tensorflow-rocm-v2-release/lastSuccessfulBuild/)
-**Linux s390x** Nightly | [](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/) | [Nightly](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/)
-**Linux s390x CPU** Stable Release | [](https://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_Release_Build/) | [Release](https://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_Release_Build/)
-**Linux ppc64le CPU** Nightly | [](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Build/) | [Nightly](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Nightly_Artifact/)
-**Linux ppc64le CPU** Stable Release | [](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Release_Build/) | Release [1.15](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Release_Build/) / [2.x](https://powerci.osuosl.org/job/TensorFlow2_PPC64LE_CPU_Release_Build/)
-**Linux ppc64le GPU** Nightly | [](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Build/) | [Nightly](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/)
-**Linux ppc64le GPU** Stable Release | [](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/) | Release [1.15](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/) / [2.x](https://powerci.osuosl.org/job/TensorFlow2_PPC64LE_GPU_Release_Build/)
-**Linux aarch64 CPU** Nightly
Python 3.6 | [](https://status.openlabtesting.org/builds/builds?project=tensorflow%2Ftensorflow) | [Nightly](https://status.openlabtesting.org/builds/builds?project=tensorflow%2Ftensorflow&job_name=tensorflow-arm64-build-daily-master)
-**Linux CPU with Intel oneAPI Deep Neural Network Library (oneDNN)** Nightly | [](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-whl-nightly/) | [Nightly](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-whl-nightly/)
-**Linux CPU with Intel oneAPI Deep Neural Network Library (oneDNN)** Stable Release |  | Release [1.15](https://pypi.org/project/intel-tensorflow/1.15.0/) / [2.x](https://pypi.org/project/intel-tensorflow/)
-**Red Hat® Enterprise Linux® 7.6 CPU & GPU**
Python 2.7, 3.6 | [](https://jenkins-tensorflow.apps.ci.centos.org/job/tensorflow-rhel7-3.6/2/) | [1.13.1 PyPI](https://tensorflow.pypi.thoth-station.ninja/index/)
+Build Type | Status | Artifacts
+----------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------
+**Linux AMD ROCm GPU** Nightly | [](http://ml-ci.amd.com:21096/job/tensorflow-rocm-nightly) | [Nightly](http://ml-ci.amd.com:21096/job/tensorflow-rocm-nightly/lastSuccessfulBuild/)
+**Linux AMD ROCm GPU** Stable Release | [](http://ml-ci.amd.com:21096/job/tensorflow-rocm-release/) | Release [1.15](http://ml-ci.amd.com:21096/job/tensorflow-rocm-release/lastSuccessfulBuild/) / [2.x](http://ml-ci.amd.com:21096/job/tensorflow-rocm-v2-release/lastSuccessfulBuild/)
+**Linux s390x** Nightly | [](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/) | [Nightly](http://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_CI/)
+**Linux s390x CPU** Stable Release | [](https://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_Release_Build/) | [Release](https://ibmz-ci.osuosl.org/job/TensorFlow_IBMZ_Release_Build/)
+**Linux ppc64le CPU** Nightly | [](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Build/) | [Nightly](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Nightly_Artifact/)
+**Linux ppc64le CPU** Stable Release | [](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Release_Build/) | Release [1.15](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_CPU_Release_Build/) / [2.x](https://powerci.osuosl.org/job/TensorFlow2_PPC64LE_CPU_Release_Build/)
+**Linux ppc64le GPU** Nightly | [](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Build/) | [Nightly](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Nightly_Artifact/)
+**Linux ppc64le GPU** Stable Release | [](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/) | Release [1.15](https://powerci.osuosl.org/job/TensorFlow_PPC64LE_GPU_Release_Build/) / [2.x](https://powerci.osuosl.org/job/TensorFlow2_PPC64LE_GPU_Release_Build/)
+**Linux aarch64 CPU** Nightly
Python 3.6 | [](https://status.openlabtesting.org/builds/builds?project=tensorflow%2Ftensorflow&job_name=tensorflow-arm64-build-daily-master) | [Nightly](https://status.openlabtesting.org/builds/builds?project=tensorflow%2Ftensorflow&job_name=tensorflow-arm64-build-daily-master)
+**Linux aarch64 CPU** Stable Release | [](http://status.openlabtesting.org/builds?project=tensorflow%2Ftensorflow&job_name=tensorflow-v2.1.0-cpu-arm64-release-build-show&job_name=tensorflow-v1.15.3-cpu-arm64-release-build-show) | Release [1.15](http://status.openlabtesting.org/builds?project=tensorflow%2Ftensorflow&job_name=tensorflow-v1.15.3-cpu-arm64-release-build-show) / [2.x](http://status.openlabtesting.org/builds?project=tensorflow%2Ftensorflow&job_name=tensorflow-v2.1.0-cpu-arm64-release-build-show)
+**Linux CPU with Intel oneAPI Deep Neural Network Library (oneDNN)** Nightly | [](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-whl-nightly/) | [Nightly](https://tensorflow-ci.intel.com/job/tensorflow-mkl-build-whl-nightly/)
+**Linux CPU with Intel oneAPI Deep Neural Network Library (oneDNN)** Stable Release |  | Release [1.15](https://pypi.org/project/intel-tensorflow/1.15.0/) / [2.x](https://pypi.org/project/intel-tensorflow/)
+**Red Hat® Enterprise Linux® 7.6 CPU & GPU**
Python 2.7, 3.6 | [](https://jenkins-tensorflow.apps.ci.centos.org/job/tensorflow-rhel7-3.6/2/) | [1.13.1 PyPI](https://tensorflow.pypi.thoth-station.ninja/index/)
## Resources
diff --git a/RELEASE.md b/RELEASE.md
index 69eca82c5f2..430e1b83885 100644
--- a/RELEASE.md
+++ b/RELEASE.md
@@ -11,10 +11,28 @@
* C-API functions `TF_StringDecode`, `TF_StringEncode`, and
`TF_StringEncodedSize` are no longer relevant and have been removed; see
core/platform/ctstring.h for string access/modification in C.
-* In batching library, rename parameter
- SharedBatchScheduler::QueueOptions::max_batch_size to a more accurate name
- (input_batch_size_limit) for a recent feature to enable split of large batch
- sizes.
+* Removed `tf.distribute.Strategy.experimental_run_v2` method, which was deprecated in TF 2.2.
+* `tensorflow.python`, `tensorflow.core` and `tensorflow.compiler` modules are
+ now hidden. These modules are not part of TensorFlow public API.
+* A major refactoring of the internals of the Keras Functional API may affect code that is relying on certain internal details:
+ * Code that uses `isinstance(x, tf.Tensor)` instead of `tf.is_tensor` when checking Keras symbolic inputs/outputs should switch to using `tf.is_tensor`.
+ * Code that is overly dependent on the exact names attached to symbolic tensors (e.g. assumes there will be ":0" at the end of the inputs, treats names as unique identifiers instead of using `tensor.ref()`, etc.)
+ * Code that uses `get_concrete_function` to trace Keras symbolic inputs directly should switch to building matching `tf.TensorSpec`s directly and tracing the `TensorSpec` objects.
+ * Code that relies on the exact number and names of the op layers that TensorFlow operations were converted into. These may have changed.
+ * Code that uses `tf.map_fn`/`tf.cond`/`tf.while_loop`/control flow as op layers and happens to work before TF 2.4. These will explicitly be unsupported now. Converting these ops to Functional API op layers was unreliable before TF 2.4, and prone to erroring incomprehensibly or being silently buggy.
+ * Code that directly asserts on a Keras symbolic value in cases where ops like `tf.rank` used to return a static or symbolic value depending on if the input had a fully static shape or not. Now these ops always return symbolic values.
+ * Code already susceptible to leaking tensors outside of graphs becomes slightly more likely to do so now.
+ * Code that requires very tricky shape manipulation via converted op layers in order to work, where the Keras symbolic shape inference proves insufficient.
+ * Code that tries manually walking a `tf.keras.Model` layer by layer and assumes layers only ever have one positional argument. This assumption doesn't hold true before TF 2.4 either, but is more likely to cause issues know.
+ * Code that manually enters `keras.backend.get_graph()` before building a functional model. This is no longer needed.
+* Start enforcing input shape assumptions when calling Functional API Keras
+ models. This may potentially break some users, in case there is a mismatch
+ between the shape used when creating `Input` objects in a Functional model,
+ and the shape of the data passed to that model. You can fix this mismatch by
+ either calling the model with correctly-shaped data, or by relaxing `Input`
+ shape assumptions (note that you can pass shapes with `None` entries for axes
+ that are meant to be dynamic). You can also disable the input checking
+ entirely by setting `model.input_spec = None`.
## Known Caveats
@@ -24,6 +42,8 @@
*
*
+* A new module named `tf.experimental.numpy` is added, which is a NumPy-compatible API for writing TF programs. This module provides class `ndarray`, which mimics the `ndarray` class in NumPy, and wraps an immutable `tf.Tensor` under the hood. A subset of NumPy functions (e.g. `numpy.add`) are provided. Their inter-operation with TF facilities is seamless in most cases. See tensorflow/python/ops/numpy_ops/README.md for details of what are supported and what are the differences with NumPy.
+* A major refactoring of the internals of the Keras Functional API has been completed, that should improve the reliability, stability, and performance of constructing Functional models.
## Bug Fixes and Other Changes
@@ -31,36 +51,106 @@
*
*
* TF Core:
- *
- * `tf.Tensor` is now a subclass of `typing.Generic`, allowing type annotations
- to be parameterized by dtype: `tf.Tensor[tf.Int32]`. This requires Python 3,
- and will become fully compatible with static type checkers in the future.
-
+ * `tf.types.experimental.TensorLike` is a new `Union` type that can be used as
+ type annotation for variables representing a Tensor or a value that can be
+ converted to Tensor by `tf.convert_to_tensor`.
+ * Calling ops with a python constants or numpy values is now consistent with
+ tf.convert_to_tensor behavior. This avoids operations like tf.reshape
+ truncating inputs such as from int64 to int32.
+ * Added `tf.sparse.map_values` to apply a function to the `.value`s of `SparseTensror` arguments.
+ * The Python bitwise operators for `Tensor` (`__and__`, `__or__`, `__xor__`
+ and `__invert__` now support non-`bool` arguments and apply the
+ corresponding bitwise ops. `bool` arguments continue to be supported and
+ dispatch to logical ops. This brings them more in line with Python and NumPy
+ benavior.
+ * Added `tf.SparseTensor.with_values`. This returns a new SparseTensor with
+ the same sparsity pattern, but with new provided values. It is similar to
+ the `with_values` function of `RaggedTensor`.
+ * Added `StatelessCase` op, and uses it if none of case branches has stateful ops.
* `tf.data`:
+ * Added new `tf.data.experimental.service.register_dataset` and
+ `tf.data.experimental.service.from_dataset_id` APIs to enable one process
+ to register a dataset with the tf.data service, and another process to
+ consume data from the dataset.
+ * Added support for tf.data service dispatcher fault tolerance. To enable
+ fault tolerance, configure a `work_dir` when running your dispatcher
+ server and set `dispatcher_fault_tolerance=True`. The dispatcher will
+ store its state to `work_dir`, so that on restart it can continue from its
+ previous state after restart.
* Added optional `exclude_cols` parameter to CsvDataset. This parameter is
- the complement of `select_cols`; at most one of these should be specified.
+ the complement of `select_cols`; at most one of these should be specified.
+ * We have implemented an optimization which reorders data-discarding
+ transformations such as `take` and `shard` to happen earlier in the
+ dataset when it is safe to do so. The optimization can be disabled via
+ the `experimental_optimization.reorder_data_discarding_ops` dataset
+ option.
+* `tf.image`:
+ * Added deterministic `tf.image.stateless_random_*` functions for each
+ `tf.image.random_*` function. Added a new op
+ `stateless_sample_distorted_bounding_box` which is a determinstic
+ version of `sample_distorted_bounding_box` op. Given the same seed, these
+ stateless functions/ops produce the same results independent of how many
+ times the function is called, and independent of global seed settings.
* `tf.distribute`:
*
-* `tf.keras`:
- *
-* `tf.function`/AutoGraph:
- *
+* `tf.keras`:
+ * Improvements from the functional API refactoring:
+ * Functional model construction does not need to maintain a global workspace graph, removing memory leaks especially when building many models or very large models.
+ * Functional model construction should be ~8-10% faster on average.
+ * Functional models can now contain non-symbolic values in their call inputs inside of the first positional argument.
+ * Several classes of TF ops that were not reliably converted to Keras layers during functional API construction should now work, e.g. `tf.image.ssim_multiscale`
+ * Error messages when Functional API construction goes wrong (and when ops cannot be converted to Keras layers automatically) should be clearer and easier to understand.
+ * `Optimizer.minimize` can now accept a loss `Tensor` and a `GradientTape`
+ as an alternative to accepting a `callable` loss.
+ * Added `beta` parameter to FTRL optimizer to match paper.
+ * Added `mobilenet_v3` to keras application model.
+* `tf.function` / AutoGraph:
+ * Added `experimental_follow_type_hints` argument for `tf.function`. When
+ True, the function may use type annotations to optimize the tracing
+ performance.
+ * Added support for `iter(DistributedDataset)` in AutoGraph `for` loops.
+ * AutoGraph now allows creating new symbols inside a TensorFLow loop, if
+ the values of these symbols at an iteration does not depend on the previous
+ iteration. These types of loops must run at least one iteration, and will
+ raise a runtime error otherwise.
+
+ Example:
+
+ ```
+ for batch in data:
+ outputs = train_step(batch)
+ tf.print('final outputs', outputs)
+ ```
+ See tensorflow/python/autograph/g3doc/reference/limitations.md for more
+ info.
* `tf.lite`:
+ * `DynamicBuffer::AddJoinedString()` will now add a separator if the first
+ string to be joined is empty.
+ * `TFLiteConverter`:
+ * Support optional flags `inference_input_type` and `inference_output_type` for full integer quantized models. This allows users to modify the model input and output type to integer types (`tf.int8`, `tf.uint8`) instead of defaulting to float type (`tf.float32`).
+ * Deprecate `Interpreter::UseNNAPI(bool)` C++ API
+ * Prefer using `NnApiDelegate()` and related delegate configuration methods directly.
+ * Add NNAPI Delegation support for requantization use cases by converting the operation into a dequantize-quantize pair.
*
* `tf.random`:
*
* Math and Linear Algebra:
*
* TPU Enhancements:
+ * Added support for the `beta` parameter of the FTRL optimizer for TPU
+ embeddings. Users of other TensorFlow platforms can implement equivalent
+ behavior by adjusting the `l2` parameter.
*
* XLA Support:
+ * xla.experimental.compile is deprecated, use
+ `tf.function(experimental_compile=True)` instead
*
* Tracing and Debugging:
*
* Other:
- * We have replaced uses of "whitelist" with "allowlist" where possible.
- Please see https://developers.google.com/style/word-list#blacklist for more
- context.
+ * We have replaced uses of "whitelist" and "blacklist" with "allowlist"
+ and "denylist" where possible. Please see
+ https://developers.google.com/style/word-list#blacklist for more context.
*
## Thanks to our Contributors
@@ -71,19 +161,206 @@ stjohnso98, , , , ,
# Release 2.3.0
-## Breaking Changes
+## Major Features and Improvements
+ * `tf.data` adds two new mechanisms to solve input pipeline bottlenecks and save resources:
+ * [snapshot](https://www.tensorflow.org/api_docs/python/tf/data/experimental/snapshot)
+ * [tf.data service](https://www.tensorflow.org/api_docs/python/tf/data/experimental/service).
-* `tf.image.extract_glimpse` has been updated to correctly process the case
- where `centered=False` and `normalized=False`. This is a breaking change as
- the output is different from (incorrect) previous versions. Note this
- breaking change only impacts `tf.image.extract_glimpse` and
- `tf.compat.v2.image.extract_glimpse` API endpoints. The behavior of
- `tf.compat.v1.image.extract_glimpse` does not change. The behavior of
- exsiting C++ kernel `ExtractGlimpse` does not change as well, so saved
- models will not be impacted.
+ In addition checkout the detailed [guide](https://www.tensorflow.org/guide/data_performance_analysis) for analyzing input pipeline performance with TF Profiler.
+
+ * [`tf.distribute.TPUStrategy`](https://www.tensorflow.org/api_docs/python/tf/distribute/TPUStrategy) is now a stable API and no longer considered experimental for TensorFlow. (earlier `tf.distribute.experimental.TPUStrategy`).
+
+ * [TF Profiler](https://www.tensorflow.org/guide/profiler) introduces two new tools: a memory profiler to visualize your model’s memory usage over time and a [python tracer](https://www.tensorflow.org/guide/profiler#events) which allows you to trace python function calls in your model. Usability improvements include better diagnostic messages and [profile options](https://tensorflow.org/guide/profiler#collect_performance_data) to customize the host and device trace verbosity level.
+
+ * Introduces experimental support for Keras Preprocessing Layers API ([`tf.keras.layers.experimental.preprocessing.*`](https://www.tensorflow.org/api_docs/python/tf/keras/layers/experimental/preprocessing?version=nightly)) to handle data preprocessing operations, with support for composite tensor inputs. Please see below for additional details on these layers.
+
+ * TFLite now properly supports dynamic shapes during conversion and inference. We’ve also added opt-in support on Android and iOS for [XNNPACK](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/delegates/xnnpack), a highly optimized set of CPU kernels, as well as opt-in support for [executing quantized models on the GPU](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/g3doc/performance/gpu_advanced.md#running-quantized-models-experimental).
+
+ * Libtensorflow packages are available in GCS starting this release. We have also started to [release a nightly version of these packages](https://github.com/tensorflow/tensorflow#official-builds).
+
+ * The experimental Python API [`tf.debugging.experimental.enable_dump_debug_info()`](https://www.tensorflow.org/api_docs/python/tf/debugging/experimental/enable_dump_debug_info) now allows you to instrument a TensorFlow program and dump debugging information to a directory on the file system. The directory can be read and visualized by a new interactive dashboard in TensorBoard 2.3 called [Debugger V2](https://www.tensorflow.org/tensorboard/debugger_v2), which reveals the details of the TensorFlow program including graph structures, history of op executions at the Python (eager) and intra-graph levels, the runtime dtype, shape, and numerical composistion of tensors, as well as their code locations.
+
+## Breaking Changes
+* Increases the **minimum bazel version** required to build TF to **3.1.0**.
+* `tf.data`
+ * Makes the following (breaking) changes to the `tf.data`.
+ * C++ API: - `IteratorBase::RestoreInternal`, `IteratorBase::SaveInternal`, and `DatasetBase::CheckExternalState` become pure-virtual and subclasses are now expected to provide an implementation.
+ * The deprecated `DatasetBase::IsStateful` method is removed in favor of `DatasetBase::CheckExternalState`.
+ * Deprecated overrides of `DatasetBase::MakeIterator` and `MakeIteratorFromInputElement` are removed.
+ * The signature of `tensorflow::data::IteratorBase::SaveInternal` and `tensorflow::data::IteratorBase::SaveInput` has been extended with `SerializationContext` argument to enable overriding the default policy for the handling external state during iterator checkpointing. This is not a backwards compatible change and all subclasses of `IteratorBase` *need to be updated* accordingly.
+* `tf.keras`
+ * Add a new `BackupAndRestore` callback for handling distributed training failures & restarts. Please take a look at this [tutorial](https://www.tensorflow.org/tutorials/distribute/multi_worker_with_keras) for details on how to use the callback.
+* `tf.image.extract_glimpse` has been updated to correctly process the case
+ where `centered=False` and `normalized=False`. This is a breaking change as
+ the output is different from (incorrect) previous versions. Note this
+ breaking change only impacts `tf.image.extract_glimpse` and
+ `tf.compat.v2.image.extract_glimpse` API endpoints. The behavior of
+ `tf.compat.v1.image.extract_glimpse` does not change. The behavior of
+ exsiting C++ kernel `ExtractGlimpse` does not change either, so saved
+ models using `tf.raw_ops.ExtractGlimpse` will not be impacted.
+
+## Known Caveats
+ * `tf.lite`
+ * Keras-based LSTM models must be converted with an explicit batch size in the input layer.
## Bug Fixes and Other Changes
-* Mutable tables now restore checkpointed values when loaded from SavedModel.
+
+### TF Core:
+ * Set `tf2_behavior` to 1 to enable V2 for early loading cases.
+ * Add `execute_fn_for_device function` to dynamically choose the implementation based on underlying device placement.
+ * Eager:
+ * Add `reduce_logsumexp` benchmark with experiment compile.
+ * Give `EagerTensor`s a meaningful `__array__` implementation.
+ * Add another version of defun matmul for performance analysis.
+ * `tf.function`/AutoGraph:
+ * `AutoGraph` now includes into TensorFlow loops any variables that are closed over by local functions. Previously, such variables were sometimes incorrectly ignored.
+ * functions returned by the `get_concrete_function` method of `tf.function` objects can now be called with arguments consistent with the original arguments or type specs passed to `get_concrete_function`. This calling convention is now the preferred way to use concrete functions with nested values and composite tensors. Please check the [guide](https://www.tensorflow.org/guide/concrete_function) for more details on `concrete_ function`.
+ * Update `tf.function`'s `experimental_relax_shapes` to handle composite tensors appropriately.
+ * Optimize `tf.function` invocation, by removing redundant list converter.
+ * `tf.function` will retrace when called with a different variable instead of simply using the `dtype` & `shape`.
+ * [Improve support](https://github.com/tensorflow/tensorflow/issues/33862) for dynamically-sized TensorArray inside `tf.function`.
+ * `tf.math`:
+ * Narrow down `argmin`/`argmax` contract to always return the smallest index for ties.
+ * `tf.math.reduce_variance` and `tf.math.reduce_std` return correct computation for complex types and no longer support integer types.
+ * Add Bessel functions of order 0,1 to `tf.math.special`.
+ * `tf.divide` now always returns a tensor to be consistent with documentation and other APIs.
+ * `tf.image`:
+ * Replaced [`tf.image.non_max_suppression_padded`](https://www.tensorflow.org/versions/r2.3/api_docs/python/tf/image/non_max_suppression_padded?hl=en) with a new implementation that supports batched inputs, which is considerably faster on TPUs and GPUs. Boxes with area=0 will be ignored. Existing usage with single inputs should still work as before.
+ * `tf.linalg`
+ * Add `tf.linalg.banded_triangular_solve`.
+ * `tf.random`:
+ * Add `tf.random.stateless_parameterized_truncated_normal`.
+ * `tf.ragged`:
+ * Add `tf.ragged.cross` and `tf.ragged.cross_hashed` operations.
+ * `tf.RaggedTensor`:
+ * `RaggedTensor.to_tensor()` now preserves static shape.
+ * Add `tf.strings.format()` and `tf.print()` to support RaggedTensors.
+ * `tf.saved_model`:
+ * `@tf.function` from SavedModel no longer ignores args after a `RaggedTensor` when selecting the concrete function to run.
+ * Fix save model issue for ops with a list of functions.
+ * Add `tf.saved_model.LoadOptions` with [`experimental_io_device`](https://www.tensorflow.org/versions/r2.3/api_docs/python/tf/saved_model/LoadOptions?hl=en) as arg with default value `None` to choose the I/O device for loading models and weights.
+ * Update `tf.saved_model.SaveOptions` with [`experimental_io_device`](https://www.tensorflow.org/versions/r2.3/api_docs/python/tf/saved_model/SaveOptions?hl=en) as arg with default value `None` to choose the I/O device for saving models and weights.
+ * Mutable tables now restore checkpointed values when loaded from SavedModel.
+ * GPU
+ * TF 2.3 includes PTX kernels only for [compute capability](https://developer.nvidia.com/cuda-gpus) 7.0 to reduce the TF pip binary size. Earlier releases included PTX for a variety of older compute capabilities.
+ * Others
+ * Retain parent namescope for ops added inside `tf.while_loop`/`tf.cond`/`tf.switch_case`.
+ * Update `tf.vectorized_map` to support vectorizing `tf.while_loop` and TensorList operations.
+ * `tf.custom_gradient` can now be applied to functions that accept nested structures of `tensors` as inputs (instead of just a list of tensors). Note that Python structures such as tuples and lists now won't be treated as tensors, so if you still want them to be treated that way, you need to wrap them with `tf.convert_to_tensor`.
+ * No lowering on gradient case op when input is `DeviceIndex` op.
+ * Extend the ragged version of `tf.gather` to support `batch_dims` and `axis` args.
+ * Update `tf.map_fn` to support RaggedTensors and SparseTensors.
+ * Deprecate `tf.group`. It is not useful in eager mode.
+ * Add CPU and GPU implementation of modified variation of [`FTRL`](https://www.tensorflow.org/versions/r2.3/api_docs/python/tf/raw_ops/ApplyFtrl)/[`FTRLV2`](https://www.tensorflow.org/versions/r2.3/api_docs/python/tf/raw_ops/ApplyFtrlV2) that can triggerred by `multiply_linear_by_lr` allowing a learning rate of zero.
+
+### `tf.data`:
+ * `tf.data.experimental.dense_to_ragged_batch` works correctly with tuples.
+ * `tf.data.experimental.dense_to_ragged_batch` to output variable ragged rank.
+ * `tf.data.experimental.cardinality` is now a method on `tf.data.Dataset`.
+ * `tf.data.Dataset` now supports `len(Dataset)` when the cardinality is finite.
+
+### `tf.distribute`:
+ * Expose experimental [`tf.distribute.DistributedDataset`](https://www.tensorflow.org/versions/r2.3/api_docs/python/tf/distribute/DistributedDataset?hl=en) and [`tf.distribute.DistributedIterator`](https://www.tensorflow.org/versions/r2.3/api_docs/python/tf/distribute/DistributedIterator) to distribute input data when using `tf.distribute` to scale training on multiple devices.
+ * Added a [`get_next_as_optional`](https://www.tensorflow.org/versions/r2.3/api_docs/python/tf/distribute/DistributedIterator?hl=en#get_next_as_optional) method for [`tf.distribute.DistributedIterator`](https://www.tensorflow.org/versions/r2.3/api_docs/python/tf/distribute/DistributedIterator?hl=en) class to return a `tf.experimental.Optional` instance that contains the next value for all replicas or none instead of raising an out of range error. Also see *new* [guide on input distribution](https://www.tensorflow.org/tutorials/distribute/input).
+ * Allow var.assign on MirroredVariables with aggregation=NONE in replica context. Previously this would raise an error. We now allow this because many users and library writers find using `.assign` in replica context to be more convenient, instead of having to use `Strategy.extended.update` which was the previous way of updating variables in this situation.
+ * `tf.distribute.experimental.MultiWorkerMirroredStrategy` adds support for partial batches. Workers running out of data now continue to participate in the training with empty inputs, instead of raising an error. Learn more about [partial batches here](https://www.tensorflow.org/tutorials/distribute/input#partial_batches).
+ * Improve the performance of reading metrics eagerly under `tf.distribute.experimental.MultiWorkerMirroredStrategy`.
+ * Fix the issue that `strategy.reduce()` inside `tf.function` may raise exceptions when the values to reduce are from loops or if-clauses.
+ * Fix the issue that `tf.distribute.MirroredStrategy` cannot be used together with `tf.distribute.experimental.MultiWorkerMirroredStrategy`.
+ * Add a `tf.distribute.cluster_resolver.TPUClusterResolver.connect` API to simplify TPU initialization.
+
+### `tf.keras`:
+ * Introduces experimental preprocessing layers API (`tf.keras.layers.experimental.preprocessing`) to handle data preprocessing operations such as categorical feature encoding, text vectorization, data normalization, and data discretization (binning). The newly added layers provide a replacement for the legacy feature column API, and support composite tensor inputs.
+ * Added **categorical data** processing layers:
+ * `IntegerLookup` & `StringLookup`: build an index of categorical feature values
+ * `CategoryEncoding`: turn integer-encoded categories into one-hot, multi-hot, or tf-idf encoded representations
+ * `CategoryCrossing`: create new categorical features representing co-occurrences of previous categorical feature values
+ * `Hashing`: the hashing trick, for large-vocabulary categorical features
+ * `Discretization`: turn continuous numerical features into categorical features by binning their values
+ * Improved **image preprocessing** layers: `CenterCrop`, `Rescaling`
+ * Improved **image augmentation** layers: `RandomCrop`, `RandomFlip`, `RandomTranslation`, `RandomRotation`, `RandomHeight`, `RandomWidth`, `RandomZoom`, `RandomContrast`
+ * Improved **`TextVectorization`** layer, which handles string tokenization, n-gram generation, and token encoding
+ * The `TextVectorization` layer now accounts for the mask_token as part of the vocabulary size when output_mode='int'. This means that, if you have a max_tokens value of 5000, your output will have 5000 unique values (not 5001 as before).
+ * Change the return value of `TextVectorization.get_vocabulary()` from `byte` to `string`. Users who previously were calling 'decode' on the output of this method should no longer need to do so.
+ * Introduce new Keras dataset generation utilities :
+ * **[`image_dataset_from_directory`](https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image_dataset_from_directory)** is a utility based on `tf.data.Dataset`, meant to replace the legacy `ImageDataGenerator`. It takes you from a structured directory of images to a labeled dataset, in one function call. Note that it doesn't perform image data augmentation (which is meant to be done using preprocessing layers).
+ * **[`text_dataset_from_directory`](https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/text_dataset_from_directory)** takes you from a structured directory of text files to a labeled dataset, in one function call.
+ * **[`timeseries_dataset_from_array`](https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/timeseries_dataset_from_array)** is a `tf.data.Dataset`-based replacement of the legacy `TimeseriesGenerator`. It takes you from an array of timeseries data to a dataset of shifting windows with their targets.
+ * Added [`experimental_steps_per_execution`](https://www.tensorflow.org/versions/r2.3/api_docs/python/tf/keras/Model?hl=en#compile)
+ arg to `model.compile` to indicate the number of batches to run per `tf.function` call. This can speed up Keras Models on TPUs up to 3x.
+ * Extends `tf.keras.layers.Lambda` layers to support multi-argument lambdas, and keyword arguments when calling the layer.
+ * Functional models now get constructed if *any* tensor in a layer call's arguments/keyword arguments comes from a keras input. Previously the functional api would only work if all of the elements in the first argument to the layer came from a keras input.
+ * Clean up `BatchNormalization` layer's `trainable` property to act like standard python state when it's used inside `tf.functions` (frozen at tracing time), instead of acting like a pseudo-variable whose updates *kind of sometimes* get reflected in already-traced `tf.function` traces.
+ * Add the `Conv1DTranspose` layer.
+ * Refine the semantics of `SensitivitySpecificityBase` derived metrics. See the updated API docstrings for [`tf.keras.metrics.SensitivityAtSpecificity`](https://www.tensorflow.org/versions/r2.3/api_docs/python/tf/keras/metrics/SensitivityAtSpecificity) and [`tf.keras.metrics.SpecificityAtSensitivty`](https://www.tensorflow.org/versions/r2.3/api_docs/python/tf/keras/metrics/SpecificityAtSensitivity).
+
+### `tf.lite`:
+ * Converter
+ * Restored `inference_input_type` and `inference_output_type` flags in TF 2.x TFLiteConverter (backward compatible with TF 1.x) to support integer (tf.int8, tf.uint8) input and output types in post training full integer quantized models.
+ * Added support for converting and resizing models with dynamic (placeholder) dimensions. Previously, there was only limited support for dynamic batch size, and even that did not guarantee that the model could be properly resized at runtime.
+ * Enabled experimental support for a new quantization mode with 16-bit activations and 8-bit weights. See `lite.OpsSet.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8`.
+ * CPU
+ * Fix an issue w/ dynamic weights and `Conv2D` on x86.
+ * Add a runtime Android flag for enabling `XNNPACK` for optimized CPU performance.
+ * Add a runtime iOS flag for enabling `XNNPACK` for optimized CPU performance.
+ * Add a compiler flag to enable building a TFLite library that applies `XNNPACK` delegate automatically when the model has a `fp32` operation.
+ * GPU
+ * Allow GPU acceleration starting with internal graph nodes
+ * Experimental support for quantized models with the Android GPU delegate
+ * Add GPU delegate whitelist.
+ * Rename GPU whitelist -> compatibility (list).
+ * Improve GPU compatibility list entries from crash reports.
+ * NNAPI
+ * Set default value for `StatefulNnApiDelegate::Options::max_number_delegated_partitions` to 3.
+ * Add capability to disable `NNAPI` CPU and check `NNAPI` Errno.
+ * Fix crashes when using `NNAPI` with target accelerator specified with model containing Conv2d or FullyConnected or LSTM nodes with quantized weights.
+ * Fix `ANEURALNETWORKS_BAD_DATA` execution failures with `sum`/`max`/`min`/`reduce` operations with `scalar` inputs.
+ * Hexagon
+ * TFLite Hexagon Delegate out of experimental.
+ * Experimental `int8` support for most hexagon ops.
+ * Experimental per-channel quant support for `conv` in Hexagon delegate.
+ * Support dynamic batch size in C++ API.
+ * CoreML
+ * Opensource CoreML delegate
+ * Misc
+ * Enable building Android TFLite targets on Windows
+ * Add support for `BatchMatMul`.
+ * Add support for `half_pixel_centers` with `ResizeNearestNeighbor`.
+ * Add 3D support for `BatchToSpaceND`.
+ * Add 5D support for `BroadcastSub`, `Maximum`, `Minimum`, `Transpose` and `BroadcastDiv`.
+ * Rename `kTfLiteActRelu1` to `kTfLiteActReluN1To1`.
+ * Enable flex delegate on tensorflow.lite.Interpreter Python package.
+ * Add `Buckettize`, `SparseCross` and `BoostedTreesBucketize` to the flex whitelist.
+ * Add support for selective registration of flex ops.
+ * Add missing kernels for flex delegate whitelisted ops.
+ * Fix issue when using direct `ByteBuffer` inputs with graphs that have dynamic shapes.
+ * Fix error checking supported operations in a model containing `HardSwish`.
+
+### Packaging Support
+ * Added `tf.sysconfig.get_build_info()`. Returns a dict that describes the build environment of the currently installed TensorFlow package, e.g. the NVIDIA CUDA and NVIDIA CuDNN versions used when TensorFlow was built.
+
+### Profiler
+ * Fix a subtle use-after-free issue in `XStatVisitor::RefValue()`.
+
+### TPU Enhancements
+ * Adds 3D mesh support in TPU configurations ops.
+ * Added TPU code for `FTRL` with `multiply_linear_by_lr`.
+ * Silently adds a new file system registry at `gstpu`.
+ * Support `restartType` in cloud tpu client.
+ * Depend on a specific version of google-api-python-client.
+ * Fixes apiclient import.
+
+### Tracing and Debugging
+ * Add a `TFE_Py_Execute` traceme.
+
+### XLA Support
+ * Implement stable `argmin` and `argmax`
+
+## Thanks to our Contributors
+
+This release contains contributions from many people at Google, as well as:
+
+902449@58880@bigcat_chen@ASIC, Abdul Baseer Khan, Abhineet Choudhary, Abolfazl Shahbazi, Adam Hillier, ag.ramesh, Agoniii, Ajay P, Alex Hoffman, Alexander Bayandin, Alexander Grund, Alexandre Abadie, Alexey Rogachevskiy, amoitra, Andrew Stevens, Angus-Luo, Anshuman Tripathy, Anush Elangovan, Artem Mavrin, Ashutosh Hathidara, autoih, Ayushman Kumar, ayushmankumar7, Bairen Yi, Bas Aarts, Bastian Eichenberger, Ben Barsdell, bhack, Bharat Raghunathan, Biagio Montaruli, Bigcat-Himax, blueyi, Bryan Cutler, Byambaa, Carlos Hernandez-Vaquero, Chen Lei, Chris Knorowski, Christian Clauss, chuanqiw, CuiYifeng, Daniel Situnayake, Daria Zhuravleva, Dayananda-V, Deven Desai, Devi Sandeep Endluri, Dmitry Zakharov, Dominic Jack, Duncan Riach, Edgar Liberis, Ehsan Toosi, ekuznetsov139, Elena Zhelezina, Eugene Kuznetsov, Eugene Mikhantiev, Evgenii Zheltonozhskii, Fabio Di Domenico, Fausto Morales, Fei Sun, feihugis, Felix E. Klee, flyingcat, Frederic Bastien, Fredrik Knutsson, frreiss, fsx950223, ganler, Gaurav Singh, Georgios Pinitas, Gian Marco Iodice, Giorgio Arena, Giuseppe Rossini, Gregory Keith, Guozhong Zhuang, gurushantj, Hahn Anselm, Harald Husum, Harjyot Bagga, Hristo Vrigazov, Ilya Persky, Ir1d, Itamar Turner-Trauring, jacco, Jake Tae, Janosh Riebesell, Jason Zaman, jayanth, Jeff Daily, Jens Elofsson, Jinzhe Zeng, JLZ, Jonas Skog, Jonathan Dekhtiar, Josh Meyer, Joshua Chia, Judd, justkw, Kaixi Hou, Kam D Kasravi, Kamil Rakoczy, Karol Gugala, Kayou, Kazuaki Ishizaki, Keith Smiley, Khaled Besrour, Kilaru Yasaswi Sri Chandra Gandhi, Kim, Young Soo, Kristian Hartikainen, Kwabena W. Agyeman, Leslie-Fang, Leslie-Fang-Intel, Li, Guizi, Lukas Geiger, Lutz Roeder, M\U00E5Ns Nilsson, Mahmoud Abuzaina, Manish, Marcel Koester, Marcin Sielski, marload, Martin Jul, Matt Conley, mdfaijul, Meng, Peng, Meteorix, Michael Käufl, Michael137, Milan Straka, Mitchell Vitez, Ml-0, Mokke Meguru, Mshr-H, nammbash, Nathan Luehr, naumkin, Neeraj Bhadani, ngc92, Nick Morgan, nihui, Niranjan Hasabnis, Niranjan Yadla, Nishidha Panpaliya, Oceania2018, oclyke, Ouyang Jin, OverLordGoldDragon, Owen Lyke, Patrick Hemmer, Paul Andrey, Peng Sun, periannath, Phil Pearl, Prashant Dandriyal, Prashant Kumar, Rahul Huilgol, Rajan Singh, Rajeshwar Reddy T, rangjiaheng, Rishit Dagli, Rohan Reddy, rpalakkal, rposts, Ruan Kunliang, Rushabh Vasani, Ryohei Ikegami, Semun Lee, Seo-Inyoung, Sergey Mironov, Sharada Shiddibhavi, ShengYang1, Shraiysh Vaishay, Shunya Ueta, shwetaoj, Siyavash Najafzade, Srinivasan Narayanamoorthy, Stephan Uphoff, storypku, sunchenggen, sunway513, Sven-Hendrik Haase, Swapnil Parekh, Tamas Bela Feher, Teng Lu, tigertang, tomas, Tomohiro Ubukata, tongxuan.ltx, Tony Tonev, Tzu-Wei Huang, Téo Bouvard, Uday Bondhugula, Vaibhav Jade, Vijay Tadikamalla, Vikram Dattu, Vincent Abriou, Vishnuvardhan Janapati, Vo Van Nghia, VoVAllen, Will Battel, William D. Irons, wyzhao, Xiaoming (Jason) Cui, Xiaoquan Kong, Xinan Jiang, xutianming, Yair Ehrenwald, Yasir Modak, Yasuhiro Matsumoto, Yixing Fu, Yong Tang, Yuan Tang, zhaozheng09, Zilin Zhu, zilinzhu, 张志豪
# Release 2.1.1
@@ -210,7 +487,7 @@ Coinciding with this change, new releases of [TensorFlow's Docker images](https:
`Strategy.extended.update` and `Strategy.extended.update_non_slot`.
* Experimental support for shape invariants has been enabled in
`tf.function`. See the API docs for
- `tf.autograph.experimental.set_loop_options` for additonal info.
+ `tf.autograph.experimental.set_loop_options` for additional info.
* AutoGraph error messages now exclude frames corresponding to APIs
internal to AutoGraph.
* Improve shape inference for `tf.function` input arguments to unlock more
@@ -293,7 +570,7 @@ Coinciding with this change, new releases of [TensorFlow's Docker images](https:
also deterministic back-prop of bias-addition in Keras layers) to
include when XLA JIT compilation is enabled.
* Fix problem, when running on a CUDA GPU and when either environment
- variable `TF_DETERMINSTIC_OPS` or environment variable
+ variable `TF_DETERMINISTIC_OPS` or environment variable
`TF_CUDNN_DETERMINISTIC` is set to "true" or "1", in which some layer
configurations led to an exception with the message "No algorithm
worked!"
@@ -336,32 +613,86 @@ This release contains contributions from many people at Google, as well as:
TensorFlow 2.1 will be the last TF release supporting Python 2. Python 2 support [officially ends an January 1, 2020](https://www.python.org/dev/peps/pep-0373/#update). [As announced earlier](https://groups.google.com/a/tensorflow.org/d/msg/announce/gVwS5RC8mds/dCt1ka2XAAAJ), TensorFlow will also stop supporting Python 2 starting January 1, 2020, and no more releases are expected in 2019.
## Major Features and Improvements
-* The `tensorflow` pip package now includes GPU support by default (same as `tensorflow-gpu`) for both Linux and Windows. This runs on machines with and without NVIDIA GPUs. `tensorflow-gpu` is still available, and CPU-only packages can be downloaded at `tensorflow-cpu` for users who are concerned about package size.
-* **Windows users:** Officially-released `tensorflow` Pip packages are now built with Visual Studio 2019 version 16.4 in order to take advantage of the new `/d2ReducedOptimizeHugeFunctions` compiler flag. To use these new packages, you must install "Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017 and 2019", available from Microsoft's website [here](https://support.microsoft.com/help/2977003/the-latest-supported-visual-c-downloads).
- * This does not change the minimum required version for building TensorFlow from source on Windows, but builds enabling `EIGEN_STRONG_INLINE` can take over 48 hours to compile without this flag. Refer to `configure.py` for more information about `EIGEN_STRONG_INLINE` and `/d2ReducedOptimizeHugeFunctions`.
- * If either of the required DLLs, `msvcp140.dll` (old) or `msvcp140_1.dll` (new), are missing on your machine, `import tensorflow` will print a warning message.
-* The `tensorflow` pip package is built with CUDA 10.1 and cuDNN 7.6.
-* `tf.keras`
- * Experimental support for mixed precision is available on GPUs and Cloud TPUs. See [usage guide](https://www.tensorflow.org/guide/keras/mixed_precision).
- * Introduced the `TextVectorization` layer, which takes as input raw strings and takes care of text standardization, tokenization, n-gram generation, and vocabulary indexing. See this [end-to-end text classification example](https://colab.research.google.com/drive/1RvCnR7h0_l4Ekn5vINWToI9TNJdpUZB3).
- * Keras `.compile` `.fit` `.evaluate` and `.predict` are allowed to be outside of the DistributionStrategy scope, as long as the model was constructed inside of a scope.
- * Experimental support for Keras `.compile`, `.fit`, `.evaluate`, and `.predict` is available for Cloud TPUs, Cloud TPU, for all types of Keras models (sequential, functional and subclassing models).
- * Automatic outside compilation is now enabled for Cloud TPUs. This allows `tf.summary` to be used more conveniently with Cloud TPUs.
- * Dynamic batch sizes with DistributionStrategy and Keras are supported on Cloud TPUs.
- * Support for `.fit`, `.evaluate`, `.predict` on TPU using numpy data, in addition to `tf.data.Dataset`.
- * Keras reference implementations for many popular models are available in the TensorFlow [Model Garden](https://github.com/tensorflow/models/tree/master/official).
-* `tf.data`
- * Changes rebatching for `tf.data datasets` + DistributionStrategy for better performance. Note that the dataset also behaves slightly differently, in that the rebatched dataset cardinality will always be a multiple of the number of replicas.
- * `tf.data.Dataset` now supports automatic data distribution and sharding in distributed environments, including on TPU pods.
- * Distribution policies for `tf.data.Dataset` can now be tuned with 1. `tf.data.experimental.AutoShardPolicy(OFF, AUTO, FILE, DATA)` 2. `tf.data.experimental.ExternalStatePolicy(WARN, IGNORE, FAIL)`
-* `tf.debugging`
- * Add `tf.debugging.enable_check_numerics()` and `tf.debugging.disable_check_numerics()` to help debugging the root causes of issues involving infinities and `NaN`s.
-* `tf.distribute`
- * Custom training loop support on TPUs and TPU pods is avaiable through `strategy.experimental_distribute_dataset`, `strategy.experimental_distribute_datasets_from_function`, `strategy.experimental_run_v2`, `strategy.reduce`.
- * Support for a global distribution strategy through `tf.distribute.experimental_set_strategy(),` in addition to `strategy.scope()`.
-* `TensorRT`
- * [TensorRT 6.0](https://developer.nvidia.com/tensorrt#tensorrt-whats-new) is now supported and enabled by default. This adds support for more TensorFlow ops including Conv3D, Conv3DBackpropInputV2, AvgPool3D, MaxPool3D, ResizeBilinear, and ResizeNearestNeighbor. In addition, the TensorFlow-TensorRT python conversion API is exported as `tf.experimental.tensorrt.Converter`.
-* Environment variable `TF_DETERMINISTIC_OPS` has been added. When set to "true" or "1", this environment variable makes `tf.nn.bias_add` operate deterministically (i.e. reproducibly), but currently only when XLA JIT compilation is *not* enabled. Setting `TF_DETERMINISTIC_OPS` to "true" or "1" also makes cuDNN convolution and max-pooling operate deterministically. This makes Keras Conv\*D and MaxPool\*D layers operate deterministically in both the forward and backward directions when running on a CUDA-enabled GPU.
+
+* The `tensorflow` pip package now includes GPU support by default (same as
+ `tensorflow-gpu`) for both Linux and Windows. This runs on machines with and
+ without NVIDIA GPUs. `tensorflow-gpu` is still available, and CPU-only
+ packages can be downloaded at `tensorflow-cpu` for users who are concerned
+ about package size.
+* **Windows users:** Officially-released `tensorflow` Pip packages are now
+ built with Visual Studio 2019 version 16.4 in order to take advantage of the
+ new `/d2ReducedOptimizeHugeFunctions` compiler flag. To use these new
+ packages, you must install "Microsoft Visual C++ Redistributable for Visual
+ Studio 2015, 2017 and 2019", available from Microsoft's website
+ [here](https://support.microsoft.com/help/2977003/the-latest-supported-visual-c-downloads).
+ * This does not change the minimum required version for building
+ TensorFlow from source on Windows, but builds enabling
+ `EIGEN_STRONG_INLINE` can take over 48 hours to compile without this
+ flag. Refer to `configure.py` for more information about
+ `EIGEN_STRONG_INLINE` and `/d2ReducedOptimizeHugeFunctions`.
+ * If either of the required DLLs, `msvcp140.dll` (old) or `msvcp140_1.dll`
+ (new), are missing on your machine, `import tensorflow` will print a
+ warning message.
+* The `tensorflow` pip package is built with CUDA 10.1 and cuDNN 7.6.
+* `tf.keras`
+ * Experimental support for mixed precision is available on GPUs and Cloud
+ TPUs. See
+ [usage guide](https://www.tensorflow.org/guide/keras/mixed_precision).
+ * Introduced the `TextVectorization` layer, which takes as input raw
+ strings and takes care of text standardization, tokenization, n-gram
+ generation, and vocabulary indexing. See this
+ [end-to-end text classification example](https://colab.research.google.com/drive/1RvCnR7h0_l4Ekn5vINWToI9TNJdpUZB3).
+ * Keras `.compile` `.fit` `.evaluate` and `.predict` are allowed to be
+ outside of the DistributionStrategy scope, as long as the model was
+ constructed inside of a scope.
+ * Experimental support for Keras `.compile`, `.fit`, `.evaluate`, and
+ `.predict` is available for Cloud TPUs, Cloud TPU, for all types of
+ Keras models (sequential, functional and subclassing models).
+ * Automatic outside compilation is now enabled for Cloud TPUs. This allows
+ `tf.summary` to be used more conveniently with Cloud TPUs.
+ * Dynamic batch sizes with DistributionStrategy and Keras are supported on
+ Cloud TPUs.
+ * Support for `.fit`, `.evaluate`, `.predict` on TPU using numpy data, in
+ addition to `tf.data.Dataset`.
+ * Keras reference implementations for many popular models are available in
+ the TensorFlow
+ [Model Garden](https://github.com/tensorflow/models/tree/master/official).
+* `tf.data`
+ * Changes rebatching for `tf.data datasets` + DistributionStrategy for
+ better performance. Note that the dataset also behaves slightly
+ differently, in that the rebatched dataset cardinality will always be a
+ multiple of the number of replicas.
+ * `tf.data.Dataset` now supports automatic data distribution and sharding
+ in distributed environments, including on TPU pods.
+ * Distribution policies for `tf.data.Dataset` can now be tuned with 1.
+ `tf.data.experimental.AutoShardPolicy(OFF, AUTO, FILE, DATA)` 2.
+ `tf.data.experimental.ExternalStatePolicy(WARN, IGNORE, FAIL)`
+* `tf.debugging`
+ * Add `tf.debugging.enable_check_numerics()` and
+ `tf.debugging.disable_check_numerics()` to help debugging the root
+ causes of issues involving infinities and `NaN`s.
+* `tf.distribute`
+ * Custom training loop support on TPUs and TPU pods is available through
+ `strategy.experimental_distribute_dataset`,
+ `strategy.experimental_distribute_datasets_from_function`,
+ `strategy.experimental_run_v2`, `strategy.reduce`.
+ * Support for a global distribution strategy through
+ `tf.distribute.experimental_set_strategy(),` in addition to
+ `strategy.scope()`.
+* `TensorRT`
+ * [TensorRT 6.0](https://developer.nvidia.com/tensorrt#tensorrt-whats-new)
+ is now supported and enabled by default. This adds support for more
+ TensorFlow ops including Conv3D, Conv3DBackpropInputV2, AvgPool3D,
+ MaxPool3D, ResizeBilinear, and ResizeNearestNeighbor. In addition, the
+ TensorFlow-TensorRT python conversion API is exported as
+ `tf.experimental.tensorrt.Converter`.
+* Environment variable `TF_DETERMINISTIC_OPS` has been added. When set to
+ "true" or "1", this environment variable makes `tf.nn.bias_add` operate
+ deterministically (i.e. reproducibly), but currently only when XLA JIT
+ compilation is *not* enabled. Setting `TF_DETERMINISTIC_OPS` to "true" or
+ "1" also makes cuDNN convolution and max-pooling operate deterministically.
+ This makes Keras Conv\*D and MaxPool\*D layers operate deterministically in
+ both the forward and backward directions when running on a CUDA-enabled GPU.
## Breaking Changes
* Deletes `Operation.traceback_with_start_lines` for which we know of no usages.
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index 8a0918b416f..d1c1d7dcdef 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -260,6 +260,36 @@ config_setting(
visibility = ["//visibility:public"],
)
+config_setting(
+ name = "armeabi",
+ values = {"cpu": "armeabi"},
+ visibility = ["//visibility:public"],
+)
+
+config_setting(
+ name = "armeabi-v7a",
+ values = {"cpu": "armeabi-v7a"},
+ visibility = ["//visibility:public"],
+)
+
+config_setting(
+ name = "arm64-v8a",
+ values = {"cpu": "arm64-v8a"},
+ visibility = ["//visibility:public"],
+)
+
+selects.config_setting_group(
+ name = "arm_any",
+ match_any = [
+ ":arm",
+ ":armeabi",
+ ":armeabi-v7a",
+ ":arm64-v8a",
+ ":linux_aarch64",
+ ":linux_armhf",
+ ],
+)
+
config_setting(
name = "freebsd",
values = {"cpu": "freebsd"},
@@ -532,16 +562,14 @@ selects.config_setting_group(
package_group(
name = "internal",
packages = [
- # To pass open source testing in the pip Kokoros.
- "//bazel_pip/tensorflow/...",
"//learning/brain/swift/x10/...",
"//perftools/accelerators/xprof/api/...",
- "//third_party/py/autograph/...",
- "//third_party/swift/tensorflow/x10/...",
- "//third_party/swift/tensorflow_apis/...",
"//tensorflow/...",
"//tensorflow_estimator/python/estimator/...",
"//tensorflow_models/official/...",
+ "//third_party/py/autograph/...",
+ "//third_party/swift/tensorflow/x10/...",
+ "//third_party/swift/tensorflow_apis/...",
],
)
diff --git a/tensorflow/api_template.__init__.py b/tensorflow/api_template.__init__.py
index f0f977aa0b5..5932dda514d 100644
--- a/tensorflow/api_template.__init__.py
+++ b/tensorflow/api_template.__init__.py
@@ -137,7 +137,7 @@ if _running_from_pip_package():
# TODO(gunan): Add sanity checks to loaded modules here.
for _s in _site_packages_dirs:
# Load first party dynamic kernels.
- _main_dir = _os.path.join(_s, 'tensorflow_core/core/kernels')
+ _main_dir = _os.path.join(_s, 'tensorflow/core/kernels')
if _fi.file_exists(_main_dir):
_ll.load_library(_main_dir)
@@ -158,4 +158,23 @@ if hasattr(_current_module, 'keras'):
setattr(_current_module, "initializers", initializers)
# pylint: enable=undefined-variable
+# Delete modules that should be hidden from dir().
+# Don't fail if these modules are not available.
+# For e.g. this file will be originally placed under tensorflow/_api/v1 which
+# does not have 'python', 'core' directories. Then, it will be copied
+# to tensorflow/ which does have these two directories.
+# pylint: disable=undefined-variable
+try:
+ del python
+except NameError:
+ pass
+try:
+ del core
+except NameError:
+ pass
+try:
+ del compiler
+except NameError:
+ pass
+
# __all__ PLACEHOLDER
diff --git a/tensorflow/api_template_v1.__init__.py b/tensorflow/api_template_v1.__init__.py
index dad91f2d5b2..0d1d2e56fae 100644
--- a/tensorflow/api_template_v1.__init__.py
+++ b/tensorflow/api_template_v1.__init__.py
@@ -147,7 +147,7 @@ if _running_from_pip_package():
# TODO(gunan): Add sanity checks to loaded modules here.
for _s in _site_packages_dirs:
# Load first party dynamic kernels.
- _main_dir = _os.path.join(_s, 'tensorflow_core/core/kernels')
+ _main_dir = _os.path.join(_s, 'tensorflow/core/kernels')
if _fi.file_exists(_main_dir):
_ll.load_library(_main_dir)
@@ -156,4 +156,25 @@ if _running_from_pip_package():
if _fi.file_exists(_plugin_dir):
_ll.load_library(_plugin_dir)
+# Delete modules that should be hidden from dir().
+# Don't fail if these modules are not available.
+# For e.g. this file will be originally placed under tensorflow/_api/v1 which
+# does not have 'python', 'core' directories. Then, it will be copied
+# to tensorflow/ which does have these two directories.
+
+# pylint: disable=undefined-variable
+try:
+ del python
+except NameError:
+ pass
+try:
+ del core
+except NameError:
+ pass
+try:
+ del compiler
+except NameError:
+ pass
+
+
# __all__ PLACEHOLDER
diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD
index 410fc22069f..e5efe323922 100644
--- a/tensorflow/c/BUILD
+++ b/tensorflow/c/BUILD
@@ -213,6 +213,17 @@ tf_cuda_library(
alwayslink = 1,
)
+cc_library(
+ name = "logging",
+ srcs = ["logging.cc"],
+ hdrs = ["logging.h"],
+ deps = [
+ ":c_api_macros",
+ "//tensorflow/core/platform:logging",
+ "//tensorflow/core/platform:stringprintf",
+ ],
+)
+
tf_cuda_library(
name = "tf_status_internal",
hdrs = [
diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc
index 36a08c8cfc9..2e1759ecea0 100644
--- a/tensorflow/c/c_api.cc
+++ b/tensorflow/c/c_api.cc
@@ -213,7 +213,6 @@ void TF_Reset(const TF_SessionOptions* opt, const char** containers,
namespace tensorflow {
-
Status MessageToBuffer(const tensorflow::protobuf::MessageLite& in,
TF_Buffer* out) {
if (out->data != nullptr) {
@@ -306,8 +305,8 @@ void TF_GraphSetOutputHandleShapesAndTypes(TF_Graph* graph, TF_Output output,
}
// Helpers for loading a TensorFlow plugin (a .so file).
-Status LoadLibrary(const char* library_filename, void** result,
- const void** buf, size_t* len);
+Status LoadDynamicLibrary(const char* library_filename, void** result,
+ const void** buf, size_t* len);
// TODO(josh11b,mrry): Change Session to be able to use a Graph*
// directly, instead of requiring us to serialize to a GraphDef and
@@ -552,7 +551,7 @@ void TF_PRun(TF_DeprecatedSession* s, const char* handle,
TF_Library* TF_LoadLibrary(const char* library_filename, TF_Status* status) {
TF_Library* lib_handle = new TF_Library;
- status->status = tensorflow::LoadLibrary(
+ status->status = tensorflow::LoadDynamicLibrary(
library_filename, &lib_handle->lib_handle, &lib_handle->op_list.data,
&lib_handle->op_list.length);
if (!status->status.ok()) {
diff --git a/tensorflow/c/c_api.h b/tensorflow/c/c_api.h
index 808bcf3bd80..0b4d9993e4d 100644
--- a/tensorflow/c/c_api.h
+++ b/tensorflow/c/c_api.h
@@ -125,6 +125,14 @@ TF_CAPI_EXPORT extern void TF_DeleteBuffer(TF_Buffer*);
TF_CAPI_EXPORT extern TF_Buffer TF_GetBuffer(TF_Buffer* buffer);
+// --------------------------------------------------------------------------
+// Used to return strings across the C API. The caller does not take ownership
+// of the underlying data pointer and is not responsible for freeing it.
+typedef struct TF_StringView {
+ const char* data;
+ size_t len;
+} TF_StringView;
+
// --------------------------------------------------------------------------
// TF_SessionOptions holds options that can be passed during session creation.
typedef struct TF_SessionOptions TF_SessionOptions;
diff --git a/tensorflow/c/c_api_experimental.cc b/tensorflow/c/c_api_experimental.cc
index 831c6a0ad40..b4297033b6d 100644
--- a/tensorflow/c/c_api_experimental.cc
+++ b/tensorflow/c/c_api_experimental.cc
@@ -29,6 +29,7 @@ limitations under the License.
#include "tensorflow/core/common_runtime/eager/context.h"
#include "tensorflow/core/common_runtime/eager/eager_operation.h"
#include "tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h"
+#include "tensorflow/core/framework/collective.h"
#include "tensorflow/core/framework/node_def.pb.h"
#include "tensorflow/core/framework/shape_inference.h"
#include "tensorflow/core/framework/tensor.pb.h"
@@ -525,12 +526,12 @@ tensorflow::Status EnableCollectiveOps(const tensorflow::ServerDef& server_def,
LOG_AND_RETURN_IF_ERROR(context->StoreCollectiveOpsServer(
std::move(new_server), grpc_server->worker_env()->device_mgr,
- grpc_server->worker_env()->collective_executor_mgr));
+ grpc_server->worker_env()->collective_executor_mgr.get()));
} else {
LOG_AND_RETURN_IF_ERROR(grpc_server->UpdateServerDef(server_def));
LOG_AND_RETURN_IF_ERROR(context->StoreCollectiveOpsServer(
/*new_server=*/nullptr, grpc_server->worker_env()->device_mgr,
- grpc_server->worker_env()->collective_executor_mgr));
+ grpc_server->worker_env()->collective_executor_mgr.get()));
}
return tensorflow::Status::OK();
#undef LOG_AND_RETURN_IF_ERROR
@@ -551,6 +552,14 @@ TF_CAPI_EXPORT extern void TFE_EnableCollectiveOps(TFE_Context* ctx,
status->status = EnableCollectiveOps(server_def, ctx);
}
+TF_CAPI_EXPORT extern void TFE_AbortCollectiveOps(TFE_Context* ctx,
+ TF_Status* status) {
+ tensorflow::EagerContext* context =
+ tensorflow::ContextFromInterface(tensorflow::unwrap(ctx));
+ auto collective_executor_handle = context->GetCollectiveExecutorHandle();
+ collective_executor_handle->get()->StartAbort(status->status);
+}
+
TF_ShapeAndTypeList* TF_NewShapeAndTypeList(int num_items) {
TF_ShapeAndTypeList* result = new TF_ShapeAndTypeList;
result->num_items = num_items;
diff --git a/tensorflow/c/c_api_experimental.h b/tensorflow/c/c_api_experimental.h
index d0ffbf125fb..ebd14b4b571 100644
--- a/tensorflow/c/c_api_experimental.h
+++ b/tensorflow/c/c_api_experimental.h
@@ -230,6 +230,14 @@ TF_CAPI_EXPORT extern void TFE_EnableCollectiveOps(TFE_Context* ctx,
size_t proto_len,
TF_Status* status);
+// Aborts all ongoing collectives with the specified status. After abortion,
+// subsequent collectives will error with this status immediately.
+//
+// This is intended to be used when a peer failure is detected. There's yet no
+// way to reset the collectives other than restarting the program.
+TF_CAPI_EXPORT extern void TFE_AbortCollectiveOps(TFE_Context* ctx,
+ TF_Status* status);
+
// Information about the shape of a Tensor and its type.
struct TF_ShapeAndType {
// Number of dimensions. -1 indicates unknown rank.
diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD
index a77e76644b8..61701bc8b21 100644
--- a/tensorflow/c/eager/BUILD
+++ b/tensorflow/c/eager/BUILD
@@ -240,6 +240,8 @@ tf_cuda_cc_test(
"//tensorflow/c:c_api",
"//tensorflow/c:c_test_util",
"//tensorflow/c:tf_status_helper",
+ "//tensorflow/c/experimental/gradients:math_grad",
+ "//tensorflow/c/experimental/ops:array_ops",
"//tensorflow/cc/profiler",
"//tensorflow/compiler/mlir/tensorflow/c:mlir_c_api_registration",
"//tensorflow/core:lib",
@@ -260,6 +262,7 @@ cc_library(
],
deps = [
"//tensorflow/core:protos_all_cc",
+ "//tensorflow/core/platform:refcount",
],
)
@@ -308,6 +311,8 @@ cc_library(
"//tensorflow/core:framework",
"//tensorflow/core:lib",
"//tensorflow/core:protos_all_cc",
+ "//tensorflow/core/util:abstract_stack_trace",
+ "@com_google_absl//absl/types:optional",
"@com_google_absl//absl/types:span",
],
)
@@ -514,7 +519,6 @@ tf_cuda_cc_test(
extra_copts = tfe_xla_copts(),
tags = [
"no_windows",
- "noasan", # leaks gRPC server instances
],
deps = [
":c_api",
@@ -581,7 +585,6 @@ tf_cuda_cc_test(
extra_copts = tfe_xla_copts(),
tags = [
"no_windows",
- "noasan", # leaks gRPC server instances
],
deps = [
":c_api",
diff --git a/tensorflow/c/eager/abstract_tensor_handle.h b/tensorflow/c/eager/abstract_tensor_handle.h
index de041690420..37e6d1bf29c 100644
--- a/tensorflow/c/eager/abstract_tensor_handle.h
+++ b/tensorflow/c/eager/abstract_tensor_handle.h
@@ -18,11 +18,12 @@ limitations under the License.
#include
#include "tensorflow/core/framework/types.pb.h"
+#include "tensorflow/core/platform/refcount.h"
namespace tensorflow {
// Abstract interface to a Tensor handle in either tracing or immediate
// execution mode.
-class AbstractTensorHandle {
+class AbstractTensorHandle : public core::RefCounted {
protected:
enum AbstractTensorHandleKind { kGraph, kMlir, kEager, kTfrt };
explicit AbstractTensorHandle(AbstractTensorHandleKind kind) : kind_(kind) {}
@@ -34,14 +35,6 @@ class AbstractTensorHandle {
AbstractTensorHandleKind getKind() const { return kind_; }
- // Release any underlying resources, including the interface object.
- //
- // WARNING: The destructor of this class is marked as protected to disallow
- // clients from directly destroying this object since it may manage it's own
- // lifetime through ref counting. Thus this must be allocated on the heap and
- // clients MUST call Release() in order to destroy an instance of this class.
- virtual void Release() = 0;
-
private:
const AbstractTensorHandleKind kind_;
};
@@ -50,7 +43,7 @@ namespace internal {
struct AbstractTensorHandleDeleter {
void operator()(AbstractTensorHandle* p) const {
if (p != nullptr) {
- p->Release();
+ p->Unref();
}
}
};
diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc
index 70acd710166..fefa753c608 100644
--- a/tensorflow/c/eager/c_api.cc
+++ b/tensorflow/c/eager/c_api.cc
@@ -94,7 +94,6 @@ limitations under the License.
#include "tensorflow/core/profiler/lib/traceme.h"
#include "tensorflow/core/public/version.h"
-using tensorflow::int64;
using tensorflow::string;
namespace {
@@ -725,13 +724,7 @@ void TFE_DeleteContextOptions(TFE_ContextOptions* options) { delete options; }
TFE_Context* TFE_NewContext(const TFE_ContextOptions* opts, TF_Status* status) {
if (opts->use_tfrt) {
#ifdef PLATFORM_GOOGLE
- tfrt::SmallVector op_handler_chains;
- tfrt::SmallVector device_attributes;
- status->status = tfrt::ListOpHandlerChains(
- opts->session_options.options, &op_handler_chains, &device_attributes);
- if (!status->status.ok()) return nullptr;
- return tensorflow::wrap(new tfrt::ContextInterface(
- op_handler_chains, device_attributes, opts->async));
+ return tensorflow::wrap(new tfrt::tf::ContextInterface(opts->async));
#else
status->status = tensorflow::errors::Unimplemented("TFRT is not supported");
return nullptr;
@@ -974,7 +967,7 @@ int64_t TFE_TensorHandleNumElements(TFE_TensorHandle* h, TF_Status* status) {
return -1;
}
- int64 num_elements = -1;
+ tensorflow::int64 num_elements = -1;
status->status = tensorflow::unwrap(h)->NumElements(&num_elements);
return num_elements;
}
@@ -986,7 +979,7 @@ int64_t TFE_TensorHandleDim(TFE_TensorHandle* h, int dim_index,
return -1;
}
- int64 dim = -1;
+ tensorflow::int64 dim = -1;
status->status = tensorflow::unwrap(h)->Dim(dim_index, &dim);
return dim;
}
@@ -1079,11 +1072,13 @@ TFE_TensorHandle* TFE_NewTensorHandleFromDeviceMemory(
status->status = context->FindDeviceFromName(device_name, &device);
tensorflow::CustomDevice* custom_device = nullptr;
if (!status->status.ok()) {
- status->status =
- context->FindCustomDeviceFromName(device_name, &custom_device);
- if (!status->status.ok()) {
+ if (!context->FindCustomDeviceFromName(device_name, &custom_device)) {
deallocator(data, len, deallocator_arg);
+ status->status =
+ tensorflow::errors::InvalidArgument(device_name, " unknown device.");
return nullptr;
+ } else {
+ status->status = tensorflow::Status::OK();
}
}
std::vector dimvec(num_dims);
diff --git a/tensorflow/c/eager/c_api_debug.cc b/tensorflow/c/eager/c_api_debug.cc
index 6827021455b..dd55f05283b 100644
--- a/tensorflow/c/eager/c_api_debug.cc
+++ b/tensorflow/c/eager/c_api_debug.cc
@@ -26,14 +26,13 @@ limitations under the License.
#include "tensorflow/compiler/jit/xla_device.h"
#endif // TENSORFLOW_EAGER_USE_XLA
-using tensorflow::int64;
using tensorflow::string;
namespace {
-std::vector TensorShapeAsVector(const tensorflow::TensorHandle& handle,
- tensorflow::Status* status) {
- std::vector shape;
+std::vector TensorShapeAsVector(
+ const tensorflow::TensorHandle& handle, tensorflow::Status* status) {
+ std::vector shape;
int rank = -1;
*status = handle.NumDims(&rank);
if (!status->ok()) {
@@ -79,7 +78,7 @@ TF_CAPI_EXPORT extern TFE_TensorDebugInfo* TFE_TensorHandleTensorDebugInfo(
return nullptr;
}
if (VLOG_IS_ON(3)) {
- std::vector shape_to_log =
+ std::vector shape_to_log =
TensorShapeAsVector(*handle, &status->status);
if (!status->status.ok()) {
// Ignore the status here as we are simply logging.
@@ -128,14 +127,14 @@ TF_CAPI_EXPORT extern TFE_TensorDebugInfo* TFE_TensorHandleTensorDebugInfo(
}
int rank = padded_shape.dimensions_size();
- std::vector dev_dims;
+ std::vector dev_dims;
dev_dims.reserve(rank);
if (rank == 1) {
// Rank 1 tensors might not have padded_shape.layout.minor_to_major set,
dev_dims.push_back(padded_shape.dimensions(0));
} else {
for (int i = rank - 1; i >= 0; --i) {
- int64 dim_index = padded_shape.layout().minor_to_major(i);
+ tensorflow::int64 dim_index = padded_shape.layout().minor_to_major(i);
dev_dims.push_back(padded_shape.dimensions(dim_index));
}
}
@@ -146,7 +145,8 @@ TF_CAPI_EXPORT extern TFE_TensorDebugInfo* TFE_TensorHandleTensorDebugInfo(
// If the tensor is not an XLA tensor, the device shape is
// the same as regular tensor shape.
- std::vector dev_dims = TensorShapeAsVector(*handle, &status->status);
+ std::vector dev_dims =
+ TensorShapeAsVector(*handle, &status->status);
if (!status->status.ok()) {
return nullptr;
}
diff --git a/tensorflow/c/eager/c_api_distributed_test.cc b/tensorflow/c/eager/c_api_distributed_test.cc
index a6547e23454..3738768cf02 100644
--- a/tensorflow/c/eager/c_api_distributed_test.cc
+++ b/tensorflow/c/eager/c_api_distributed_test.cc
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
+#include // NOLINT
+
#include "tensorflow/c/eager/c_api.h"
#include "tensorflow/c/eager/c_api_experimental.h"
#include "tensorflow/c/eager/c_api_internal.h"
@@ -174,9 +176,9 @@ void TestFunctionWithPackedInput(const bool remote) {
const char task2_name[] = "/job:localhost/replica:0/task:2/device:CPU:0";
// Create one variable per task.
- TFE_TensorHandle* h0 = TestVariable(ctx, 1.0, task0_name);
- TFE_TensorHandle* h1 = TestVariable(ctx, 2.0, task1_name);
- TFE_TensorHandle* h2 = TestVariable(ctx, 3.0, task2_name);
+ TFE_TensorHandle* h0 = TestVariable(ctx, 1.0, task1_name);
+ TFE_TensorHandle* h1 = TestVariable(ctx, 2.0, task2_name);
+ TFE_TensorHandle* h2 = TestVariable(ctx, 3.0, task0_name);
// Add a sync point in order to make sure that variables have been initialized
// before the function execution starts.
@@ -185,6 +187,9 @@ void TestFunctionWithPackedInput(const bool remote) {
VarIsInitialized(ctx, h2);
// Pack 3 variable handles into one TFE_TensorHandle.
+ // When remote is false, function device is placed on task0. Handle types are
+ // REMOTE, REMOTE, LOCAL on task0. When remote is true, function device is
+ // placed on task1, Handle types are LOCAL, REMOTE, LOCAL on task1.
int num_replicas = 3;
std::vector handles = {h0, h1, h2};
TFE_TensorHandle* packed_handle =
@@ -259,61 +264,64 @@ TEST(CAPI, TestRemoteFunctionWithPackedInput) {
TestFunctionWithPackedInput(/*remote=*/true);
}
+string VariableAddFunctionSignature() {
+ return " signature {"
+ " name: 'VariableAddFunction'"
+ " input_arg {"
+ " name: 'var0'"
+ " type: DT_RESOURCE"
+ " }"
+ " output_arg {"
+ " name: 'var0_value'"
+ " type: DT_FLOAT"
+ " }"
+ " }"
+ " node_def {"
+ " name: 'read0'"
+ " op: 'ReadVariableOp'"
+ " input: 'var0'"
+ " attr {"
+ " key: 'dtype'"
+ " value {"
+ " type: DT_FLOAT"
+ " }"
+ " }"
+ " }"
+ " node_def {"
+ " name: 'add'"
+ " op: 'Add'"
+ " input: 'read0:value:0'"
+ " input: 'read0:value:0'"
+ " device: '/job:localhost/task:1/device:CPU:0'"
+ " attr {"
+ " key: 'T'"
+ " value {"
+ " type: DT_FLOAT"
+ " }"
+ " }"
+ " }"
+ " node_def {"
+ " name: 'identity'"
+ " op: 'Identity'"
+ " input: 'add:z:0'"
+ " device: '/job:localhost/task:0/device:CPU:0'"
+ " attr {"
+ " key: 'T'"
+ " value {"
+ " type: DT_FLOAT"
+ " }"
+ " }"
+ " }"
+ " ret {"
+ " key: 'var0_value'"
+ " value: 'identity:output:0'"
+ " }";
+}
+
string VariableAddFunction() {
tensorflow::FunctionDef def;
CHECK(tensorflow::protobuf::TextFormat::ParseFromString(
- " signature {"
- " name: 'VariableAddFunction'"
- " input_arg {"
- " name: 'var0'"
- " type: DT_RESOURCE"
- " }"
- " output_arg {"
- " name: 'var0_value'"
- " type: DT_FLOAT"
- " }"
- " }"
- " node_def {"
- " name: 'read0'"
- " op: 'ReadVariableOp'"
- " input: 'var0'"
- " attr {"
- " key: 'dtype'"
- " value {"
- " type: DT_FLOAT"
- " }"
- " }"
- " }"
- " node_def {"
- " name: 'add'"
- " op: 'Add'"
- " input: 'read0:value:0'"
- " input: 'read0:value:0'"
- " device: '/job:localhost/task:1/device:CPU:0'"
- " attr {"
- " key: 'T'"
- " value {"
- " type: DT_FLOAT"
- " }"
- " }"
- " }"
- " node_def {"
- " name: 'identity'"
- " op: 'Identity'"
- " input: 'add:z:0'"
- " device: '/job:localhost/task:0/device:CPU:0'"
- " attr {"
- " key: 'T'"
- " value {"
- " type: DT_FLOAT"
- " }"
- " }"
- " }"
- " ret {"
- " key: 'var0_value'"
- " value: 'identity:output:0'"
- " }",
- &def));
+ VariableAddFunctionSignature(), &def));
return def.SerializeAsString();
}
@@ -425,6 +433,17 @@ TEST(CAPI, DistributedFunctionGraphPassOnlyOnce) {
GraphErrorInjectionPass::enabled_ = false;
}
+string VariableAddFunctionWithGraphError() {
+ string signature = VariableAddFunctionSignature();
+ // Replace the node 'read0' with 'read0_maybe_with_graph_error', so that the
+ // error injecting pass can identify and introduce graph pass errors.
+ signature = std::regex_replace(signature, std::regex("read0"),
+ "read0_maybe_with_graph_error");
+ tensorflow::FunctionDef def;
+ CHECK(tensorflow::protobuf::TextFormat::ParseFromString(signature, &def));
+ return def.SerializeAsString();
+}
+
class FunctionErrorInjectionPass : public tensorflow::FunctionOptimizationPass {
public:
FunctionErrorInjectionPass(string error_node, string error_device)
@@ -471,16 +490,19 @@ void TestDistributedFunctionCancellation(bool inject_error) {
const char dev2_name[] = "/job:localhost/replica:0/task:2/device:CPU:0";
if (inject_error) {
- // Inject a function optimization pass failure when it sees the 'read0' op
- // having a requested device `dev2_name`. During execution:
- // * task:0 processes the main function `VariableAddFunction` and places
- // the read0 op on task:2
- // * task:0 partitions the main function with a subgraph containing read0
- // sent to task:2
- // * task:2 graph pass reports an error when it sees read0 with dev2_name
+ // Inject a function optimization pass failure when it sees the
+ // 'read0_maybe_with_graph_error' op having a requested device `dev2_name`.
+ // During execution:
+ // * task:0 processes main function `VariableAddFunctionWithGraphError`
+ // and places the 'read0_maybe_with_graph_error' op on task:2
+ // * task:0 partitions the main function with a subgraph containing
+ // 'read0_maybe_with_graph_error' sent to task:2
+ // * task:2 graph pass reports an error when it sees
+ // 'read0_maybe_with_graph_error' with dev2_name
tensorflow::function_optimization_registration::
FunctionOptimizationPassRegistration register_test_pass(
- std::make_unique("read0", dev2_name));
+ std::make_unique(
+ "read0_maybe_with_graph_error", dev2_name));
}
TF_Status* status = TF_NewStatus();
@@ -496,7 +518,7 @@ void TestDistributedFunctionCancellation(bool inject_error) {
TFE_TensorHandle* var_handle = TestVariable(ctx, 2.0, dev2_name);
EXPECT_NE(var_handle, nullptr);
- const string function_def = VariableAddFunction();
+ const string function_def = VariableAddFunctionWithGraphError();
TFE_ContextAddFunctionDef(ctx, function_def.data(), function_def.size(),
status);
ASSERT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
diff --git a/tensorflow/c/eager/c_api_remote_test.cc b/tensorflow/c/eager/c_api_remote_test.cc
index 94c32cf3f30..e99f6d6e170 100644
--- a/tensorflow/c/eager/c_api_remote_test.cc
+++ b/tensorflow/c/eager/c_api_remote_test.cc
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
+#include "absl/strings/str_cat.h"
#include "tensorflow/c/eager/c_api.h"
#include "tensorflow/c/eager/c_api_experimental.h"
#include "tensorflow/c/eager/c_api_internal.h"
@@ -115,40 +116,42 @@ void TestRemoteExecute(bool async) {
TEST(CAPI, RemoteExecute) { TestRemoteExecute(false); }
TEST(CAPI, RemoteExecuteAsync) { TestRemoteExecute(true); }
-string MatMulFunction() {
+string MatMulFunction(const string& matmul_device) {
tensorflow::FunctionDef def;
CHECK(tensorflow::protobuf::TextFormat::ParseFromString(
- " signature {"
- " name: 'MatMulFunction'"
- " input_arg {"
- " name: 'a'"
- " type: DT_FLOAT"
- " }"
- " input_arg {"
- " name: 'b'"
- " type: DT_FLOAT"
- " }"
- " output_arg {"
- " name: 'm'"
- " type: DT_FLOAT"
- " }"
- " }"
- " node_def {"
- " name: 'matmul'"
- " op: 'MatMul'"
- " input: 'a'"
- " input: 'b'"
- " attr {"
- " key: 'T'"
- " value {"
- " type: DT_FLOAT"
- " }"
- " }"
- " }"
- " ret {"
- " key: 'm'"
- " value: 'matmul:product'"
- " }",
+ absl::StrCat(" signature {"
+ " name: 'MatMulFunction'"
+ " input_arg {"
+ " name: 'a'"
+ " type: DT_FLOAT"
+ " }"
+ " input_arg {"
+ " name: 'b'"
+ " type: DT_FLOAT"
+ " }"
+ " output_arg {"
+ " name: 'm'"
+ " type: DT_FLOAT"
+ " }"
+ " }"
+ " node_def {"
+ " name: 'matmul'"
+ " op: 'MatMul'"
+ " input: 'a'"
+ " input: 'b'"
+ " device: '",
+ matmul_device, "'",
+ " attr {"
+ " key: 'T'"
+ " value {"
+ " type: DT_FLOAT"
+ " }"
+ " }"
+ " }"
+ " ret {"
+ " key: 'm'"
+ " value: 'matmul:product'"
+ " }"),
&def));
return def.SerializeAsString();
}
@@ -157,7 +160,8 @@ string MatMulFunction() {
// which creates a remote remote input, to simulate a scenario that the remote
// input is not ready when we start running an op or a function.
void TestRemoteExecuteSilentCopies(bool async, bool remote, bool func,
- bool heavy_load_on_streaming_rpc) {
+ bool heavy_load_on_streaming_rpc,
+ bool remote_func_outputs = false) {
tensorflow::ServerDef server_def = GetServerDef(3);
// This server def has the task index set to 0.
@@ -214,7 +218,8 @@ void TestRemoteExecuteSilentCopies(bool async, bool remote, bool func,
TFE_Op* matmul = nullptr;
if (func) {
- string function_def = MatMulFunction();
+ const string matmul_device = remote_func_outputs ? task2_name : "";
+ string function_def = MatMulFunction(matmul_device);
TFE_ContextAddFunctionDef(ctx, function_def.data(), function_def.size(),
status);
CHECK_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
@@ -250,7 +255,7 @@ void TestRemoteExecuteSilentCopies(bool async, bool remote, bool func,
EXPECT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
// TODO(gjn): Add support for waiting on async local mirrors
- if (!remote && !async) {
+ if (!remote && !async && !remote_func_outputs) {
auto remote_arg =
tensorflow::TensorHandleFromInterface(tensorflow::unwrap(h1_task2));
// The input handles should never change since they have been mirrored.
@@ -329,6 +334,19 @@ TEST(CAPI, RemoteExecuteSilentCopiesLocalAsyncFunc) {
TestRemoteExecuteSilentCopies(/*async=*/true, /*remote=*/false, /*func=*/true,
/*heavy_load_on_streaming_rpc=*/false);
}
+// TODO(b/162618595): Enable this test once we remove the check of remote
+// outputs in ProcessFunctionLibraryRuntime.
+TEST(CAPI, DISABLED_RemoteExecuteSilentCopiesLocalFuncRemoteOutputs) {
+ TestRemoteExecuteSilentCopies(/*async=*/false, /*remote=*/false,
+ /*func=*/true,
+ /*heavy_load_on_streaming_rpc=*/false,
+ /*remote_func_outputs=*/true);
+}
+TEST(CAPI, DISABLED_RemoteExecuteSilentCopiesLocalAsyncFuncRemoteOutputs) {
+ TestRemoteExecuteSilentCopies(/*async=*/true, /*remote=*/false, /*func=*/true,
+ /*heavy_load_on_streaming_rpc=*/false,
+ /*remote_func_outputs=*/true);
+}
TEST(CAPI, RemoteExecuteSilentCopiesLocalAsyncFuncOrdering) {
// A remote input may be not ready when we start running a function. Test that
// the function execution should wait until the remote input is ready.
diff --git a/tensorflow/c/eager/c_api_test_util.cc b/tensorflow/c/eager/c_api_test_util.cc
index 4b5ad8f50f7..192f10533a6 100644
--- a/tensorflow/c/eager/c_api_test_util.cc
+++ b/tensorflow/c/eager/c_api_test_util.cc
@@ -88,6 +88,20 @@ TFE_TensorHandle* TestMatrixTensorHandle(TFE_Context* ctx) {
return th;
}
+TFE_TensorHandle* TestMatrixTensorHandleWithInput(TFE_Context* ctx,
+ float data[], int64_t dims[],
+ int num_dims) {
+ TF_Status* status = TF_NewStatus();
+ TF_Tensor* t =
+ TFE_AllocateHostTensor(ctx, TF_FLOAT, &dims[0], num_dims, status);
+ memcpy(TF_TensorData(t), &data[0], TF_TensorByteSize(t));
+ TFE_TensorHandle* th = TFE_NewTensorHandleFromTensor(ctx, t, status);
+ CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+ TF_DeleteTensor(t);
+ TF_DeleteStatus(status);
+ return th;
+}
+
TFE_TensorHandle* TestMatrixTensorHandle100x100(TFE_Context* ctx) {
constexpr int64_t dims[] = {100, 100};
constexpr int num_elements = dims[0] * dims[1];
@@ -143,7 +157,7 @@ TFE_TensorHandle* TestVariable(TFE_Context* ctx, float value,
if (TF_GetCode(status) != TF_OK) return nullptr;
TFE_OpSetAttrType(op, "dtype", TF_FLOAT);
TFE_OpSetAttrShape(op, "shape", {}, 0, status);
- TFE_OpSetAttrString(op, "container", "", 0);
+ TFE_OpSetAttrString(op, "container", "localhost", 0);
TFE_OpSetAttrString(op, "shared_name", "", 0);
if (!device_name.empty()) {
TFE_OpSetDevice(op, device_name.c_str(), status);
diff --git a/tensorflow/c/eager/c_api_test_util.h b/tensorflow/c/eager/c_api_test_util.h
index fcf62223f14..fcf407aa9c3 100644
--- a/tensorflow/c/eager/c_api_test_util.h
+++ b/tensorflow/c/eager/c_api_test_util.h
@@ -34,6 +34,12 @@ TFE_TensorHandle* DoubleTestMatrixTensorHandle(TFE_Context* ctx);
// Return a tensor handle containing a 2x2 matrix of floats
TFE_TensorHandle* TestMatrixTensorHandle(TFE_Context* ctx);
+// Return a tensor handle containing 2D matrix containing given data and
+// dimensions
+TFE_TensorHandle* TestMatrixTensorHandleWithInput(TFE_Context* ctx,
+ float data[], int64_t dims[],
+ int num_dims);
+
// Return a tensor handle containing a 100x100 matrix of floats
TFE_TensorHandle* TestMatrixTensorHandle100x100(TFE_Context* ctx);
diff --git a/tensorflow/c/eager/c_api_unified_experimental.cc b/tensorflow/c/eager/c_api_unified_experimental.cc
index 605a60c186c..8408f7ef60f 100644
--- a/tensorflow/c/eager/c_api_unified_experimental.cc
+++ b/tensorflow/c/eager/c_api_unified_experimental.cc
@@ -147,7 +147,7 @@ TF_AbstractOp* TF_NewAbstractOp(TF_ExecutionContext* c) {
void TF_DeleteAbstractOp(TF_AbstractOp* op) { unwrap(op)->Release(); }
-void TF_DeleteAbstractTensor(TF_AbstractTensor* t) { unwrap(t)->Release(); }
+void TF_DeleteAbstractTensor(TF_AbstractTensor* t) { unwrap(t)->Unref(); }
TF_OutputList* TF_NewOutputList() { return wrap(new OutputList); }
void TF_DeleteOutputList(TF_OutputList* o) { delete unwrap(o); }
diff --git a/tensorflow/c/eager/c_api_unified_experimental_graph.cc b/tensorflow/c/eager/c_api_unified_experimental_graph.cc
index 6165a7d14a3..7bda3aed76d 100644
--- a/tensorflow/c/eager/c_api_unified_experimental_graph.cc
+++ b/tensorflow/c/eager/c_api_unified_experimental_graph.cc
@@ -33,6 +33,7 @@ limitations under the License.
using tensorflow::dyn_cast;
using tensorflow::string;
+using tensorflow::gtl::ArraySlice;
namespace tensorflow {
namespace tracing {
@@ -48,7 +49,6 @@ class GraphTensor : public TracingTensorHandle {
public:
explicit GraphTensor(TF_Output output)
: TracingTensorHandle(kGraph), output_(output) {}
- void Release() override { delete this; }
tensorflow::DataType DataType() const override {
return static_cast(TF_OperationOutputType(output_));
@@ -138,20 +138,23 @@ class GraphOperation : public TracingOperation {
Status SetAttrString(const char* attr_name, const char* data,
size_t length) override {
- return tensorflow::errors::Unimplemented(
- "SetAttrString has not been implemented yet.");
+ tensorflow::StringPiece s(data, length);
+ op_->node_builder.Attr(attr_name, s);
+ return Status::OK();
}
Status SetAttrInt(const char* attr_name, int64_t value) override {
- return tensorflow::errors::Unimplemented(
- "SetAttrInt has not been implemented yet.");
+ static_assert(sizeof(int64_t) == sizeof(tensorflow::int64),
+ "64-bit int types should match in size");
+ op_->node_builder.Attr(attr_name, static_cast(value));
+ return Status::OK();
}
Status SetAttrFloat(const char* attr_name, float value) override {
- return tensorflow::errors::Unimplemented(
- "SetAttrFloat has not been implemented yet.");
+ op_->node_builder.Attr(attr_name, value);
+ return Status::OK();
}
Status SetAttrBool(const char* attr_name, bool value) override {
- return tensorflow::errors::Unimplemented(
- "SetAttrBool has not been implemented yet.");
+ op_->node_builder.Attr(attr_name, value);
+ return Status::OK();
}
Status SetAttrType(const char* const attr_name, DataType value) override {
if (!op_) {
@@ -164,8 +167,15 @@ class GraphOperation : public TracingOperation {
}
Status SetAttrShape(const char* attr_name, const int64_t* dims,
const int num_dims) override {
- return tensorflow::errors::Unimplemented(
- "SetAttrShape has not been implemented yet.");
+ PartialTensorShape shape;
+ if (num_dims >= 0) {
+ static_assert(sizeof(int64_t) == sizeof(tensorflow::int64),
+ "64-bit int types should match in size");
+ shape = PartialTensorShape(ArraySlice(
+ reinterpret_cast(dims), num_dims));
+ }
+ op_->node_builder.Attr(attr_name, shape);
+ return Status::OK();
}
Status SetAttrFunction(const char* attr_name,
const AbstractOperation* value) override {
@@ -174,8 +184,10 @@ class GraphOperation : public TracingOperation {
}
Status SetAttrFunctionName(const char* attr_name, const char* value,
size_t length) override {
- return tensorflow::errors::Unimplemented(
- "SetAttrFunctionName has not been implemented yet.");
+ tensorflow::NameAttrList func_name;
+ func_name.set_name(string(value, value + length));
+ op_->node_builder.Attr(attr_name, func_name);
+ return Status::OK();
}
Status SetAttrTensor(const char* attr_name,
AbstractTensorInterface* tensor) override {
@@ -184,33 +196,71 @@ class GraphOperation : public TracingOperation {
}
Status SetAttrStringList(const char* attr_name, const void* const* values,
const size_t* lengths, int num_values) override {
- return tensorflow::errors::Unimplemented(
- "SetAttrStringList has not been implemented yet.");
+ if (strcmp(attr_name, tensorflow::kColocationAttrName) == 0) {
+ op_->colocation_constraints.clear();
+ for (int i = 0; i < num_values; ++i) {
+ op_->colocation_constraints.emplace(static_cast(values[i]),
+ lengths[i]);
+ }
+ } else {
+ std::vector v;
+ v.reserve(num_values);
+ for (int i = 0; i < num_values; ++i) {
+ v.emplace_back(static_cast(values[i]), lengths[i]);
+ }
+ op_->node_builder.Attr(attr_name, v);
+ }
+ return Status::OK();
}
Status SetAttrFloatList(const char* attr_name, const float* values,
int num_values) override {
- return tensorflow::errors::Unimplemented(
- "SetAttrFloatList has not been implemented yet.");
+ op_->node_builder.Attr(attr_name,
+ ArraySlice(values, num_values));
+ return Status::OK();
}
Status SetAttrIntList(const char* attr_name, const int64_t* values,
int num_values) override {
- return tensorflow::errors::Unimplemented(
- "SetAttrIntList has not been implemented yet.");
+ static_assert(sizeof(int64_t) == sizeof(tensorflow::int64),
+ "64-bit int types should match in size");
+ op_->node_builder.Attr(
+ attr_name,
+ ArraySlice(
+ reinterpret_cast(values), num_values));
+ return Status::OK();
}
Status SetAttrTypeList(const char* attr_name, const DataType* values,
int num_values) override {
- return tensorflow::errors::Unimplemented(
- "SetAttrTypeList has not been implemented yet.");
+ op_->node_builder.Attr(attr_name,
+ ArraySlice(values, num_values));
+ return Status::OK();
}
Status SetAttrBoolList(const char* attr_name, const unsigned char* values,
int num_values) override {
- return tensorflow::errors::Unimplemented(
- "SetAttrBoolList has not been implemented yet.");
+ std::unique_ptr b(new bool[num_values]);
+ for (int i = 0; i < num_values; ++i) {
+ b[i] = values[i];
+ }
+ op_->node_builder.Attr(attr_name,
+ ArraySlice(b.get(), num_values));
+
+ return Status::OK();
}
Status SetAttrShapeList(const char* attr_name, const int64_t** dims,
const int* num_dims, int num_values) override {
- return tensorflow::errors::Unimplemented(
- "SetAttrShapeList has not been implemented yet.");
+ std::vector shapes;
+ shapes.reserve(num_values);
+ for (int i = 0; i < num_values; ++i) {
+ if (num_dims[i] < 0) {
+ shapes.emplace_back();
+ } else {
+ static_assert(sizeof(int64_t) == sizeof(tensorflow::int64),
+ "64-bit int types should match in size");
+ shapes.emplace_back(ArraySlice(
+ reinterpret_cast(dims[i]), num_dims[i]));
+ }
+ }
+ op_->node_builder.Attr(attr_name, shapes);
+ return Status::OK();
}
Status SetAttrFunctionList(
const char* attr_name,
diff --git a/tensorflow/c/eager/c_api_unified_experimental_test.cc b/tensorflow/c/eager/c_api_unified_experimental_test.cc
index a25dccc4638..c56e8ab05fc 100644
--- a/tensorflow/c/eager/c_api_unified_experimental_test.cc
+++ b/tensorflow/c/eager/c_api_unified_experimental_test.cc
@@ -92,9 +92,255 @@ TEST_P(UnifiedCAPI, TestBasicEager) {
TF_DeleteExecutionContext(ctx);
}
+// MatMul Test
+TEST_P(UnifiedCAPI, TestBasicEagerMatMul) {
+ std::unique_ptr status(
+ TF_NewStatus(), TF_DeleteStatus);
+ TFE_ContextOptions* opts = TFE_NewContextOptions();
+ TF_ExecutionContext* ctx = TF_NewEagerExecutionContext(opts, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+ TFE_DeleteContextOptions(opts);
+
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ /* Want to test simple MatMul example:
+ [[0,0], * [[0,0], = [[0,0],
+ [0,0]] [0,0]] [0,0]]
+ */
+
+ // Build an abstract input tensor.
+ int64_t dims[] = {2, 2}; // Matrices will be 2 x 2
+ int num_dims = sizeof(dims) / sizeof(dims[0]);
+
+ float vals[] = {0.0f, 0.0f, 0.0f, 0.0f};
+ TFE_Context* eager_ctx = TF_ExecutionContextGetTFEContext(ctx, status.get());
+ TFE_TensorHandle* t =
+ TestMatrixTensorHandleWithInput(eager_ctx, vals, dims, num_dims);
+
+ TF_AbstractTensor* at = TF_CreateAbstractTensorFromEagerTensor(
+ t, status.get()); // get abstract tensor
+
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ // Build an abstract operation.
+ auto* op = TF_NewAbstractOp(ctx);
+ TF_AbstractOpSetOpType(op, "MatMul", status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ // Build inputs and outputs.
+ TF_AbstractTensor* inputs[2] = {at, at};
+ TF_OutputList* o = TF_NewOutputList();
+ TF_OutputListSetNumOutputs(o, 1, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ // Execute.
+ TF_ExecuteOperation(op, 2, inputs, o, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ // Clean up operation and inputs.
+ TF_DeleteAbstractOp(op);
+ TF_DeleteAbstractTensor(at);
+
+ // Verify the results.
+ ASSERT_EQ(1, TF_OutputListNumOutputs(o));
+ TF_AbstractTensor* result = TF_OutputListGet(o, 0);
+ TFE_TensorHandle* result_t =
+ TF_AbstractTensorGetEagerTensor(result, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+ TF_Tensor* result_tensor = TFE_TensorHandleResolve(result_t, status.get());
+
+ // Copy Tensor data into an array.
+ float result_data[4] = {0};
+ memcpy(&result_data[0], TF_TensorData(result_tensor),
+ TF_TensorByteSize(result_tensor));
+
+ int data_len = 4; // length of result_data
+ for (int i = 0; i < data_len; i++) {
+ EXPECT_EQ(result_data[i], 0);
+ }
+
+ TF_DeleteTensor(result_tensor);
+ TF_DeleteAbstractTensor(result);
+ TF_DeleteOutputList(o);
+ TF_DeleteExecutionContext(ctx);
+}
+
+// MatMul Test 2
+TEST_P(UnifiedCAPI, TestBasicEagerMatMul2) {
+ std::unique_ptr status(
+ TF_NewStatus(), TF_DeleteStatus);
+ TFE_ContextOptions* opts = TFE_NewContextOptions();
+ TF_ExecutionContext* ctx = TF_NewEagerExecutionContext(opts, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+ TFE_DeleteContextOptions(opts);
+
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ /* Want to test simple MatMul example with abstract tensors:
+ [[1,2], * [[5,6], = [[19,22],
+ [3,4]] [7,8]] [43,50]]
+ */
+
+ // Build 1st Matrix.
+ int64_t dims[] = {2, 2}; // Matrices will be 2 x 2
+ int num_dims = sizeof(dims) / sizeof(dims[0]);
+
+ float vals1[] = {1.0f, 2.0f, 3.0f, 4.0f};
+ TFE_Context* eager_ctx = TF_ExecutionContextGetTFEContext(ctx, status.get());
+ TFE_TensorHandle* t1 =
+ TestMatrixTensorHandleWithInput(eager_ctx, vals1, dims, num_dims);
+
+ TF_AbstractTensor* at1 = TF_CreateAbstractTensorFromEagerTensor(
+ t1, status.get()); // get abstract tensor
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ // Build 2nd Matrix.
+ float vals2[] = {5.0f, 6.0f, 7.0f, 8.0f};
+ TFE_TensorHandle* t2 =
+ TestMatrixTensorHandleWithInput(eager_ctx, vals2, dims, num_dims);
+
+ TF_AbstractTensor* at2 = TF_CreateAbstractTensorFromEagerTensor(
+ t2, status.get()); // get abstract tensor
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ // Build an abstract operation.
+ auto* op = TF_NewAbstractOp(ctx);
+ TF_AbstractOpSetOpType(op, "MatMul", status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ // Build inputs and outputs.
+ TF_AbstractTensor* inputs[2] = {at1, at2};
+ TF_OutputList* o = TF_NewOutputList();
+ TF_OutputListSetNumOutputs(o, 1, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ // Execute.
+ TF_ExecuteOperation(op, 2, inputs, o, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ // Clean up operation and inputs.
+ TF_DeleteAbstractOp(op);
+ TF_DeleteAbstractTensor(at1);
+ TF_DeleteAbstractTensor(at2);
+
+ // Verify the results.
+ ASSERT_EQ(1, TF_OutputListNumOutputs(o));
+ TF_AbstractTensor* result = TF_OutputListGet(o, 0);
+ TFE_TensorHandle* result_t =
+ TF_AbstractTensorGetEagerTensor(result, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ TF_Tensor* result_tensor = TFE_TensorHandleResolve(result_t, status.get());
+
+ // Copy Tensor data into array.
+ float result_data[4] = {0};
+ memcpy(&result_data[0], TF_TensorData(result_tensor),
+ TF_TensorByteSize(result_tensor));
+
+ // Build expected result & verify.
+ float e_vals[] = {19.0f, 22.0f, 43.0f, 50.0f};
+
+ int data_len = 4; // length of e_vals
+ for (int i = 0; i < data_len; i++) {
+ EXPECT_EQ(result_data[i], e_vals[i]);
+ }
+
+ TF_DeleteTensor(result_tensor);
+ TF_DeleteAbstractTensor(result);
+ TF_DeleteOutputList(o);
+ TF_DeleteExecutionContext(ctx);
+}
+
+// MatAdd
+TEST_P(UnifiedCAPI, TestBasicEagerMatAdd) {
+ std::unique_ptr status(
+ TF_NewStatus(), TF_DeleteStatus);
+ TFE_ContextOptions* opts = TFE_NewContextOptions();
+ TF_ExecutionContext* ctx = TF_NewEagerExecutionContext(opts, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+ TFE_DeleteContextOptions(opts);
+
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ /* Want to test simple MatAdd example with abstract tensors:
+ [[1,2] , + [[5,6], = [[6,8],
+ [3,4] ] [7,8] ] [10,12]]
+ */
+
+ // Build 1st Matrix.
+ int64_t dims[] = {2, 2}; // Matrices will be 2 x 2
+ int num_dims = sizeof(dims) / sizeof(dims[0]);
+
+ float vals1[] = {1.0f, 2.0f, 3.0f, 4.0f};
+ TFE_Context* eager_ctx = TF_ExecutionContextGetTFEContext(ctx, status.get());
+ TFE_TensorHandle* t1 =
+ TestMatrixTensorHandleWithInput(eager_ctx, vals1, dims, num_dims);
+
+ TF_AbstractTensor* at1 = TF_CreateAbstractTensorFromEagerTensor(
+ t1, status.get()); // get abstract tensor
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ // Build 2nd Matrix.
+ float vals2[] = {5.0f, 6.0f, 7.0f, 8.0f};
+ TFE_TensorHandle* t2 =
+ TestMatrixTensorHandleWithInput(eager_ctx, vals2, dims, num_dims);
+
+ TF_AbstractTensor* at2 = TF_CreateAbstractTensorFromEagerTensor(
+ t2, status.get()); // get abstract tensor
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ // Build an abstract operation.
+ auto* op = TF_NewAbstractOp(ctx);
+ TF_AbstractOpSetOpType(op, "Add", status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ // Build inputs and outputs.
+ TF_AbstractTensor* inputs[2] = {at1, at2};
+ TF_OutputList* o = TF_NewOutputList();
+ TF_OutputListSetNumOutputs(o, 1, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ // Execute.
+ TF_ExecuteOperation(op, 2, inputs, o, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ // Clean up operation and inputs.
+ TF_DeleteAbstractOp(op);
+ TF_DeleteAbstractTensor(at1);
+ TF_DeleteAbstractTensor(at2);
+
+ // Verify the results.
+ ASSERT_EQ(1, TF_OutputListNumOutputs(o));
+ TF_AbstractTensor* result = TF_OutputListGet(o, 0);
+ TFE_TensorHandle* result_t =
+ TF_AbstractTensorGetEagerTensor(result, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ TF_Tensor* result_tensor = TFE_TensorHandleResolve(result_t, status.get());
+
+ // Copy Tensor data into array.
+ float result_data[4] = {0};
+ memcpy(&result_data[0], TF_TensorData(result_tensor),
+ TF_TensorByteSize(result_tensor));
+
+ // Build expected result & verify.
+ float e_vals[] = {6.0f, 8.0f, 10.0f, 12.0f};
+
+ int data_len = 4; // length of e_vals
+ for (int i = 0; i < data_len; i++) {
+ EXPECT_EQ(result_data[i], e_vals[i]);
+ }
+
+ TF_DeleteTensor(result_tensor);
+ TF_DeleteAbstractTensor(result);
+ TF_DeleteOutputList(o);
+ TF_DeleteExecutionContext(ctx);
+}
+
TEST_P(UnifiedCAPI, TestBasicGraph) {
std::unique_ptr status(
TF_NewStatus(), TF_DeleteStatus);
+
// Start a new function / execution context.
string fn_name = "double";
TF_ExecutionContext* graph_ctx =
@@ -142,6 +388,7 @@ TEST_P(UnifiedCAPI, TestBasicGraph) {
TF_ExecutionContextRegisterFunction(eager_execution_ctx, func, status.get());
ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
// Build the abstract op to run the function.
TF_AbstractOp* fn_op = TF_NewAbstractOp(eager_execution_ctx);
TF_AbstractOpSetOpType(fn_op, fn_name.c_str(), status.get());
@@ -180,6 +427,111 @@ TEST_P(UnifiedCAPI, TestBasicGraph) {
TF_DeleteExecutionContext(eager_execution_ctx);
}
+// Graph Tracing for MatMul
+TEST_P(UnifiedCAPI, TestBasicGraphMatMul) {
+ std::unique_ptr status(
+ TF_NewStatus(), TF_DeleteStatus);
+
+ // Start a new function / execution context.
+ string fn_name = "matrix_multiply";
+ TF_ExecutionContext* graph_ctx =
+ TF_CreateFunction(fn_name.c_str(), status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ auto* placeholder_t =
+ TF_AddFunctionParameter(graph_ctx, TF_FLOAT, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ // Build an abstract operation.
+ auto* matmul_op = TF_NewAbstractOp(graph_ctx);
+ TF_AbstractOpSetOpType(matmul_op, "MatMul", status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+ TF_AbstractOpSetOpName(matmul_op, "my_matmul", status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ // Build inputs and outputs.
+ TF_AbstractTensor* inputs[2] = {placeholder_t, placeholder_t};
+ TF_OutputList* mm_outputs = TF_NewOutputList();
+ TF_OutputListSetNumOutputs(mm_outputs, 1, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ // Execute.
+ TF_ExecuteOperation(matmul_op, 2, inputs, mm_outputs, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ // Clean up operation and inputs.
+ TF_DeleteAbstractOp(matmul_op);
+
+ TF_AbstractFunction* func =
+ TF_FinalizeFunction(graph_ctx, mm_outputs, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ /* Now that the graph is built, test graph implementation on matmul example:
+ [[1,1] , * [[1,1] , = [[2,2],
+ [1,1]] [1,1]] [2,2]]
+ */
+
+ // Build eager context.
+ TFE_ContextOptions* opts = TFE_NewContextOptions();
+ TF_ExecutionContext* eager_execution_ctx =
+ TF_NewEagerExecutionContext(opts, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+ TFE_DeleteContextOptions(opts);
+
+ TF_ExecutionContextRegisterFunction(eager_execution_ctx, func, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ // Build the abstract op to run the function.
+ TF_AbstractOp* fn_op = TF_NewAbstractOp(eager_execution_ctx);
+ TF_AbstractOpSetOpType(fn_op, fn_name.c_str(), status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ // Build an abstract input tensor.
+ TFE_Context* eager_ctx =
+ TF_ExecutionContextGetTFEContext(eager_execution_ctx, status.get());
+
+ float vals[] = {1.0f, 1.0f, 1.0f, 1.0f};
+ int64_t dims[] = {2, 2}; // Matrices will be 2 x 2
+ int num_dims = sizeof(dims) / sizeof(dims[0]);
+
+ TFE_TensorHandle* input_eager =
+ TestMatrixTensorHandleWithInput(eager_ctx, vals, dims, num_dims);
+ TF_AbstractTensor* input_t =
+ TF_CreateAbstractTensorFromEagerTensor(input_eager, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ TF_OutputListSetNumOutputs(mm_outputs, 1, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+ TF_ExecuteOperation(fn_op, 1, &input_t, mm_outputs, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ ASSERT_EQ(1, TF_OutputListNumOutputs(mm_outputs));
+ TF_AbstractTensor* final_result = TF_OutputListGet(mm_outputs, 0);
+ TFE_TensorHandle* final =
+ TF_AbstractTensorGetEagerTensor(final_result, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+ TF_Tensor* f_t = TFE_TensorHandleResolve(final, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ float result_data[4] = {0};
+ memcpy(&result_data[0], TF_TensorData(f_t), TF_TensorByteSize(f_t));
+
+ int data_len = 4;
+ for (int i = 0; i < data_len; i++) {
+ ASSERT_EQ(result_data[i], 2.0f);
+ }
+
+ TF_DeleteAbstractTensor(final_result);
+ TF_DeleteOutputList(mm_outputs);
+ TF_DeleteAbstractTensor(placeholder_t);
+ TF_DeleteAbstractOp(fn_op);
+ TF_DeleteAbstractTensor(input_t);
+ TF_DeleteTensor(f_t);
+ TF_DeleteAbstractFunction(func);
+
+ TF_DeleteExecutionContext(eager_execution_ctx);
+}
+
TEST_P(UnifiedCAPI, TestMultiOutputGraph) {
std::unique_ptr status(
TF_NewStatus(), TF_DeleteStatus);
@@ -336,6 +688,217 @@ TEST_P(UnifiedCAPI, TestMultiOutputGraph) {
TF_DeleteAbstractFunction(func);
}
+TEST_P(UnifiedCAPI, TestMultiOutputGraphMatMul) {
+ std::unique_ptr status(
+ TF_NewStatus(), TF_DeleteStatus);
+ TF_Status* s = status.get();
+
+ // Start a new function / execution context.
+ string fn_name = "two_adds_and_matmul";
+ TF_ExecutionContext* graph_ctx = TF_CreateFunction(fn_name.c_str(), s);
+ ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+
+ auto* arg0 = TF_AddFunctionParameter(graph_ctx, TF_FLOAT, s);
+ ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+ auto* arg1 = TF_AddFunctionParameter(graph_ctx, TF_FLOAT, s);
+ ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+
+ // Create a first "Add" computing `arg0 + arg1`.
+ TF_AbstractTensor* add_output1;
+ {
+ // Build an abstract operation, inputs and output.
+ auto* add_op = TF_NewAbstractOp(graph_ctx);
+ TF_AbstractOpSetOpType(add_op, "Add", s);
+ ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+ TF_AbstractOpSetOpName(add_op, "my_add1", s);
+ ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+ TF_AbstractTensor* inputs[2] = {arg0, arg1};
+ TF_OutputList* add_outputs = TF_NewOutputList();
+ TF_OutputListSetNumOutputs(add_outputs, 1, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ // Trace the operation now (create a node in the graph).
+ TF_ExecuteOperation(add_op, 2, inputs, add_outputs, s);
+ ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+ TF_DeleteAbstractOp(add_op);
+
+ // Extract the resulting tensor.
+ add_output1 = TF_OutputListGet(add_outputs, 0);
+ TF_DeleteOutputList(add_outputs);
+ }
+
+ // Same with a second "Add" computing `arg1 + arg1`.
+ TF_AbstractTensor* add_output2;
+ {
+ // Build an abstract operation, inputs and output.
+ auto* add_op = TF_NewAbstractOp(graph_ctx);
+ TF_AbstractOpSetOpType(add_op, "Add", s);
+ ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+ TF_AbstractOpSetOpName(add_op, "my_add2", s);
+ ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+ TF_AbstractTensor* inputs[2] = {arg1, arg1};
+ TF_OutputList* add_outputs = TF_NewOutputList();
+ TF_OutputListSetNumOutputs(add_outputs, 1, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ // Trace the operation now (create a node in the graph).
+ TF_ExecuteOperation(add_op, 2, inputs, add_outputs, s);
+ ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+ TF_DeleteAbstractOp(add_op);
+
+ // Extract the resulting tensor.
+ add_output2 = TF_OutputListGet(add_outputs, 0);
+ TF_DeleteOutputList(add_outputs);
+ }
+
+ // 3rd Output will be Matrix Multiplication of add_output1 and add_output2
+ TF_AbstractTensor* mm_output;
+ {
+ // Build an abstract operation, inputs and output.
+ auto* mm_op = TF_NewAbstractOp(graph_ctx);
+ TF_AbstractOpSetOpType(mm_op, "MatMul", s);
+ ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+ TF_AbstractOpSetOpName(mm_op, "mm", s);
+ ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+ TF_AbstractTensor* inputs[2] = {add_output1, add_output2};
+ TF_OutputList* mm_outputs = TF_NewOutputList();
+ TF_OutputListSetNumOutputs(mm_outputs, 1, status.get());
+ ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get());
+
+ // Trace the operation now (create a node in the graph).
+ TF_ExecuteOperation(mm_op, 2, inputs, mm_outputs, s);
+ ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+ TF_DeleteAbstractOp(mm_op);
+
+ // Extract the resulting tensor.
+ mm_output = TF_OutputListGet(mm_outputs, 0);
+ TF_DeleteOutputList(mm_outputs);
+ }
+
+ // Finalize the function by providing the returned values.
+ TF_AbstractFunction* func;
+ {
+ // We want to return the output of both add operations and MatMul operation,
+ // create a new list and populate it.
+ TF_OutputList* func_outputs = TF_NewOutputList();
+ TF_OutputListPushBack(func_outputs, add_output1, s);
+ ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+ TF_OutputListPushBack(func_outputs, add_output2, s);
+ ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+ TF_OutputListPushBack(func_outputs, mm_output, s);
+ ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+ func = TF_FinalizeFunction(graph_ctx, func_outputs, s);
+ ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+ TF_DeleteOutputList(func_outputs);
+ }
+
+ /**
+ * We traced so far this function:
+ *
+ * def two_adds_and_mm(A, B):
+ * my_add1 = A + B
+ * my_add2 = B + B
+ * mm = tf.MatMul(my_add1,my_add2)
+ * return my_add1, my_add2, mm
+ *
+ * Now we will execute this function with an eager context:
+ *
+ * A =[[0, 1],[1, 0]]
+ * B =[[1, 0],[0, 1]]
+ *
+ * output1, output2, output3 = two_adds_and_mm(A, B)
+ *
+ * We expect outputs:
+ *
+ * output1 = [[1, 1],[1, 1]]
+ * output2 = [[2, 0],[0, 2]]
+ * output3 = [[2, 2],[2, 2]]
+ *
+ */
+
+ // Build eager context.
+ TFE_ContextOptions* opts = TFE_NewContextOptions();
+ TF_ExecutionContext* eager_execution_ctx =
+ TF_NewEagerExecutionContext(opts, s);
+ ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+ TFE_DeleteContextOptions(opts);
+
+ TF_ExecutionContextRegisterFunction(eager_execution_ctx, func, s);
+ ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+
+ // Build the abstract op to run the function.
+ TF_AbstractOp* fn_op = TF_NewAbstractOp(eager_execution_ctx);
+ TF_AbstractOpSetOpType(fn_op, fn_name.c_str(), s);
+ ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+
+ // Build two abstract input tensors as function arguments.
+ std::vector func_args;
+ {
+ TFE_Context* eager_ctx =
+ TF_ExecutionContextGetTFEContext(eager_execution_ctx, s);
+
+ // 1st Arg
+ float vals1[] = {0.0f, 1.0f, 1.0f, 0.0f};
+ int64_t dims[] = {2, 2}; // Matrices will be 2 x 2
+ int num_dims = sizeof(dims) / sizeof(dims[0]);
+
+ TFE_TensorHandle* input_eager =
+ TestMatrixTensorHandleWithInput(eager_ctx, vals1, dims, num_dims);
+ func_args.push_back(TF_CreateAbstractTensorFromEagerTensor(input_eager, s));
+ ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+
+ // 2nd Arg
+ float vals2[] = {1.0f, 0.0f, 0.0f, 1.0f};
+ input_eager =
+ TestMatrixTensorHandleWithInput(eager_ctx, vals2, dims, num_dims);
+ func_args.push_back(TF_CreateAbstractTensorFromEagerTensor(input_eager, s));
+ ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+ }
+
+ TF_OutputList* func_outputs = TF_NewOutputList();
+ TF_OutputListSetNumOutputs(func_outputs, 3, s);
+ ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+ TF_ExecuteOperation(fn_op, func_args.size(), func_args.data(), func_outputs,
+ s);
+ ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+ TF_DeleteAbstractOp(fn_op);
+ for (TF_AbstractTensor* t : func_args) TF_DeleteAbstractTensor(t);
+
+ ASSERT_EQ(3, TF_OutputListNumOutputs(func_outputs));
+
+ float expected_outputs[3][4] = {{1.0f, 1.0f, 1.0f, 1.0f},
+ {2.0f, 0.0f, 0.0f, 2.0f},
+ {2.0f, 2.0f, 2.0f, 2.0f}};
+
+ float result_data[4];
+ for (int idx = 0; idx < 3; ++idx) {
+ TF_AbstractTensor* result = TF_OutputListGet(func_outputs, idx);
+ TFE_TensorHandle* handle = TF_AbstractTensorGetEagerTensor(result, s);
+ ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+ TF_Tensor* f_t = TFE_TensorHandleResolve(handle, s);
+ ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s);
+
+ memcpy(&result_data[0], TF_TensorData(f_t), TF_TensorByteSize(f_t));
+
+ // Verify results for each output
+ for (int j = 0; j < 4; j++) {
+ ASSERT_EQ(result_data[j], expected_outputs[idx][j]);
+ }
+
+ TF_DeleteTensor(f_t);
+ }
+
+ // Free memory associated with add and MatMul outputs
+ for (int idx = 0; idx < 3; ++idx) {
+ TF_AbstractTensor* result = TF_OutputListGet(func_outputs, idx);
+ TF_DeleteAbstractTensor(result);
+ }
+
+ TF_DeleteOutputList(func_outputs);
+ TF_DeleteExecutionContext(eager_execution_ctx);
+ TF_DeleteAbstractFunction(func);
+}
+
TEST_P(UnifiedCAPI, TF_ExecutionContextToFunctionWithEagerContextRaises) {
std::unique_ptr status(
TF_NewStatus(), TF_DeleteStatus);
diff --git a/tensorflow/c/eager/gradients.cc b/tensorflow/c/eager/gradients.cc
index 3a7a6282192..39cadd421e2 100644
--- a/tensorflow/c/eager/gradients.cc
+++ b/tensorflow/c/eager/gradients.cc
@@ -51,25 +51,14 @@ int64 ToId(AbstractTensorHandle* t) {
TapeTensor::TapeTensor(AbstractTensorHandle* handle, AbstractContext* ctx)
: handle_(handle), ctx_(ctx) {
- // TODO(b/160888114): Make AbstractTensorHandle RefCounted. Right now we rely
- // on the client to keep this tensor live for the duration of the gradient
- // computation.
- // handle_->Ref();
+ handle_->Ref();
}
TapeTensor::TapeTensor(const TapeTensor& other) {
handle_ = other.handle_;
- // TODO(b/160888114): Make AbstractTensorHandle RefCounted. Right now we rely
- // on the client to keep this tensor live for the duration of the gradient
- // computation.
- // handle_->Ref();
+ handle_->Ref();
ctx_ = other.ctx_;
}
-TapeTensor::~TapeTensor() {
- // TODO(b/160888114): Make AbstractTensorHandle RefCounted. Right now we rely
- // on the client to keep this tensor live for the duration of the gradient
- // computation.
- // handle_->Unref();
-}
+TapeTensor::~TapeTensor() { handle_->Unref(); }
tensorflow::int64 TapeTensor::GetID() const { return ToId(handle_); }
@@ -112,7 +101,7 @@ AbstractTensorHandle* TapeTensor::ZerosLike() const {
}
if (isa(op.get())) {
s = dyn_cast(op.get())->SetOpName(
- absl::StrCat("OnesLike", ToId(handle_)).c_str());
+ absl::StrCat("ZerosLike", ToId(handle_)).c_str());
if (!s.ok()) {
return nullptr;
}
@@ -175,7 +164,8 @@ Status TapeVSpace::CallBackwardFunction(
gtl::ArraySlice output_gradients,
std::vector* result) const {
if (backward_function == nullptr) return Status::OK();
- return backward_function->Compute(output_gradients, result);
+ Context ctx = {ctx_};
+ return backward_function->Compute(&ctx, output_gradients, result);
}
// Looks up the ID of a Gradient.
@@ -191,7 +181,7 @@ TapeTensor TapeVSpace::TapeTensorFromGradient(AbstractTensorHandle* g) const {
void TapeVSpace::MarkAsResult(AbstractTensorHandle* gradient) const {}
void TapeVSpace::DeleteGradient(AbstractTensorHandle* gradient) const {
- gradient->Release();
+ gradient->Unref();
}
// Helper functions which delegate to `AbstractOperation`, update
@@ -373,6 +363,10 @@ Status Execute(AbstractOperation* op_, AbstractContext* ctx,
input_ids[i] = ToId(forward_op_->inputs[i]);
input_dtypes[i] = forward_op_->inputs[i]->DataType();
}
+ for (int i = 0; i < *num_retvals; i++) {
+ // TODO(srbs): Manage refcount of ForwardOperation's inputs/outputs.
+ forward_op_->outputs.push_back(retvals[i]);
+ }
std::vector tape_tensors;
for (auto t : retvals) {
tape_tensors.push_back(TapeTensor(t, ctx));
diff --git a/tensorflow/c/eager/gradients.h b/tensorflow/c/eager/gradients.h
index e09b6ff8613..267ee5b7ab2 100644
--- a/tensorflow/c/eager/gradients.h
+++ b/tensorflow/c/eager/gradients.h
@@ -31,7 +31,8 @@ namespace gradients {
//
// class AddGradientFunction : public GradientFunction {
// public:
-// Status Compute(absl::Span grad_inputs,
+// Status Compute(Context* ctx,
+// absl::Span grad_inputs,
// std::vector* grad_outputs) override {
// grad_outputs->resize(2);
// (*grad_outputs)[0] = grad_inputs[0];
@@ -50,11 +51,16 @@ namespace gradients {
// Status RegisterGradients(GradientRegistry* registry) {
// return registry->Register("Add", AddRegisterer);
// }
+struct Context {
+ public:
+ AbstractContext* ctx;
+};
class GradientFunction {
public:
// TODO(srbs): How we support CompositeTensors e.g. IndexedSlices in
// `grad_inputs`.
- virtual Status Compute(absl::Span grad_inputs,
+ virtual Status Compute(Context* ctx,
+ absl::Span grad_inputs,
std::vector* grad_outputs) = 0;
virtual ~GradientFunction() {}
};
diff --git a/tensorflow/c/eager/gradients_test.cc b/tensorflow/c/eager/gradients_test.cc
index 5820058f3e2..41993b3e125 100644
--- a/tensorflow/c/eager/gradients_test.cc
+++ b/tensorflow/c/eager/gradients_test.cc
@@ -23,6 +23,8 @@ limitations under the License.
#include "tensorflow/c/eager/c_api_unified_experimental.h"
#include "tensorflow/c/eager/c_api_unified_experimental_internal.h"
#include "tensorflow/c/eager/gradients_internal.h"
+#include "tensorflow/c/experimental/gradients/math_grad.h"
+#include "tensorflow/c/experimental/ops/array_ops.h"
#include "tensorflow/c/tf_status_helper.h"
#include "tensorflow/c/tf_tensor.h"
#include "tensorflow/core/lib/llvm_rtti/llvm_rtti.h"
@@ -42,55 +44,12 @@ class CppGradients
}
};
-// Creates an Identity op.
-Status Identity(AbstractContext* ctx,
- absl::Span inputs,
- absl::Span outputs, const char* name) {
- AbstractOperationPtr identity_op(ctx->CreateOperation());
- TF_RETURN_IF_ERROR(
- identity_op->Reset("Identity", /*raw_device_name=*/nullptr));
- if (isa(identity_op.get())) {
- TF_RETURN_IF_ERROR(dyn_cast(identity_op.get())
- ->SetOpName(name));
- }
- TF_RETURN_IF_ERROR(identity_op->AddInput(inputs[0]));
- int num_retvals = 1;
- TF_RETURN_IF_ERROR(identity_op->Execute(outputs, &num_retvals));
+Status RegisterGradients(GradientRegistry* registry) {
+ TF_RETURN_IF_ERROR(registry->Register("Add", AddRegisterer));
+ TF_RETURN_IF_ERROR(registry->Register("Exp", ExpRegisterer));
return Status::OK();
}
-// =================== Register gradients for Add ============================
-class AddGradientFunction : public GradientFunction {
- public:
- explicit AddGradientFunction(AbstractContext* ctx) : ctx_(ctx) {}
- Status Compute(absl::Span grad_inputs,
- std::vector* grad_outputs) override {
- grad_outputs->resize(2);
- std::vector identity_outputs(1);
- TF_RETURN_IF_ERROR(Identity(ctx_, {grad_inputs[0]},
- absl::MakeSpan(identity_outputs), "Id0"));
- (*grad_outputs)[0] = identity_outputs[0];
- TF_RETURN_IF_ERROR(Identity(ctx_, {grad_inputs[0]},
- absl::MakeSpan(identity_outputs), "Id1"));
- (*grad_outputs)[1] = identity_outputs[0];
- return Status::OK();
- }
- ~AddGradientFunction() override {}
-
- private:
- AbstractContext* ctx_;
-};
-
-GradientFunction* AddRegisterer(const ForwardOperation& op) {
- return new AddGradientFunction(op.ctx);
-}
-
-Status RegisterGradients(GradientRegistry* registry) {
- return registry->Register("Add", AddRegisterer);
-}
-
-// =================== End gradient registrations ============================
-
// Computes `inputs[0] + inputs[1]` and records it on the tape.
Status Add(AbstractContext* ctx, Tape* tape,
absl::Span inputs,
@@ -112,6 +71,26 @@ Status Add(AbstractContext* ctx, Tape* tape,
registry);
}
+// Computes `exp(inputs[0])` and records it on the tape.
+Status Exp(AbstractContext* ctx, Tape* tape,
+ absl::Span inputs,
+ absl::Span outputs,
+ const GradientRegistry& registry) {
+ AbstractOperationPtr exp_op(ctx->CreateOperation());
+ ForwardOperation forward_op;
+ forward_op.ctx = ctx;
+ TF_RETURN_IF_ERROR(
+ Reset(exp_op.get(), "Exp", /*raw_device_name=*/nullptr, &forward_op));
+ if (isa(exp_op.get())) {
+ TF_RETURN_IF_ERROR(
+ dyn_cast(exp_op.get())->SetOpName("my_exp"));
+ }
+ TF_RETURN_IF_ERROR(AddInput(exp_op.get(), inputs[0], &forward_op));
+ int num_retvals = 1;
+ return Execute(exp_op.get(), ctx, outputs, &num_retvals, &forward_op, tape,
+ registry);
+}
+
// Computes
// y = inputs[0] + inputs[1]
// return grad(y, {inputs[0], inputs[1]})
@@ -136,7 +115,7 @@ Status AddGradModel(AbstractContext* ctx,
source_tensors_that_are_targets,
/*output_gradients=*/{}, &out_grads));
for (auto add_output : add_outputs) {
- add_output->Release();
+ add_output->Unref();
}
outputs[0] = out_grads[0];
outputs[1] = out_grads[1];
@@ -144,6 +123,35 @@ Status AddGradModel(AbstractContext* ctx,
return Status::OK();
}
+// Computes
+// y = exp(inputs[0])
+// return grad(y, {inputs[0]})
+Status ExpGradModel(AbstractContext* ctx,
+ absl::Span inputs,
+ absl::Span outputs,
+ const GradientRegistry& registry) {
+ TapeVSpace vspace(ctx);
+ auto tape = new Tape(/*persistent=*/false);
+ tape->Watch(ToId(inputs[0])); // Watch x.
+ std::vector exp_outputs(1);
+ TF_RETURN_IF_ERROR(Exp(ctx, tape, inputs, absl::MakeSpan(exp_outputs),
+ registry)); // Compute x+y.
+ std::unordered_map
+ source_tensors_that_are_targets;
+
+ std::vector out_grads;
+ TF_RETURN_IF_ERROR(tape->ComputeGradient(
+ vspace, /*target_tensor_ids=*/{ToId(exp_outputs[0])},
+ /*source_tensor_ids=*/{ToId(inputs[0])}, source_tensors_that_are_targets,
+ /*output_gradients=*/{}, &out_grads));
+ for (auto exp_output : exp_outputs) {
+ exp_output->Unref();
+ }
+ outputs[0] = out_grads[0];
+ delete tape;
+ return Status::OK();
+}
+
AbstractContext* BuildFunction(const char* fn_name) {
std::unique_ptr status(
TF_NewStatus(), TF_DeleteStatus);
@@ -187,14 +195,15 @@ Status RunModel(Model model, AbstractContext* ctx,
TF_RETURN_IF_ERROR(model(func_ctx.get(), absl::MakeSpan(func_inputs),
absl::MakeSpan(output_list.outputs), registry));
for (auto func_input : func_inputs) {
- func_input->Release();
+ func_input->Unref();
}
AbstractFunction* func = nullptr;
TF_RETURN_IF_ERROR(dyn_cast(func_ctx.get())
->Finalize(&output_list, &func));
scoped_func.reset(func);
- output_list.outputs[0]->Release();
- output_list.outputs[1]->Release();
+ for (auto output : output_list.outputs) {
+ output->Unref();
+ }
TF_RETURN_IF_ERROR(ctx->RegisterFunction(func));
}
@@ -295,7 +304,7 @@ TEST_P(CppGradients, TestAddGrad) {
ASSERT_EQ(errors::OK, s.code()) << s.error_message();
auto result_value = static_cast(TF_TensorData(result_tensor));
EXPECT_EQ(*result_value, 1.0);
- outputs[0]->Release();
+ outputs[0]->Unref();
TF_DeleteTensor(result_tensor);
result_tensor = nullptr;
@@ -303,17 +312,61 @@ TEST_P(CppGradients, TestAddGrad) {
ASSERT_EQ(errors::OK, s.code()) << s.error_message();
result_value = static_cast(TF_TensorData(result_tensor));
EXPECT_EQ(*result_value, 1.0);
- outputs[1]->Release();
+ outputs[1]->Unref();
TF_DeleteTensor(result_tensor);
}
+TEST_P(CppGradients, TestExpGrad) {
+ std::unique_ptr status(
+ TF_NewStatus(), TF_DeleteStatus);
+ AbstractContextPtr ctx;
+ {
+ AbstractContext* ctx_raw = nullptr;
+ Status s =
+ BuildImmediateExecutionContext(std::get<1>(GetParam()), &ctx_raw);
+ ASSERT_EQ(errors::OK, s.code()) << s.error_message();
+ ctx.reset(ctx_raw);
+ }
+
+ AbstractTensorHandlePtr x;
+ {
+ AbstractTensorHandle* x_raw = nullptr;
+ Status s = TestScalarTensorHandle(ctx.get(), 1.0f, &x_raw);
+ ASSERT_EQ(errors::OK, s.code()) << s.error_message();
+ x.reset(x_raw);
+ }
+
+ GradientRegistry registry;
+ Status s = RegisterGradients(®istry);
+ ASSERT_EQ(errors::OK, s.code()) << s.error_message();
+
+ // Pseudo-code:
+ //
+ // tape.watch(x)
+ // y = exp(x)
+ // outputs = tape.gradient(y, x)
+ std::vector outputs(1);
+ s = RunModel(ExpGradModel, ctx.get(), {x.get()}, absl::MakeSpan(outputs),
+ /*use_function=*/!std::get<2>(GetParam()), registry);
+ ASSERT_EQ(errors::OK, s.code()) << s.error_message();
+
+ TF_Tensor* result_tensor;
+ s = getValue(outputs[0], &result_tensor);
+ ASSERT_EQ(errors::OK, s.code()) << s.error_message();
+ auto result_value = static_cast(TF_TensorData(result_tensor));
+ EXPECT_NEAR(*result_value, 2.718, 0.001);
+ outputs[0]->Unref();
+ TF_DeleteTensor(result_tensor);
+ result_tensor = nullptr;
+}
+
// TODO(b/160888630): Enable this test with mlir after AddInputList is
// supported. It is needed for AddN op which is used for gradient aggregation.
#ifdef PLATFORM_GOOGLE
INSTANTIATE_TEST_SUITE_P(
UnifiedCAPI, CppGradients,
::testing::Combine(::testing::Values("graphdef"),
- /*tfrt*/ ::testing::Values(false),
+ /*tfrt*/ ::testing::Values(true, false),
/*executing_eagerly*/ ::testing::Values(true, false)));
#else
INSTANTIATE_TEST_SUITE_P(
diff --git a/tensorflow/c/eager/immediate_execution_operation.h b/tensorflow/c/eager/immediate_execution_operation.h
index 31a75c5b8c7..ee212b21a96 100644
--- a/tensorflow/c/eager/immediate_execution_operation.h
+++ b/tensorflow/c/eager/immediate_execution_operation.h
@@ -17,6 +17,7 @@ limitations under the License.
#include
+#include "absl/types/optional.h"
#include "absl/types/span.h"
#include "tensorflow/c/eager/abstract_operation.h"
#include "tensorflow/c/eager/immediate_execution_tensor_handle.h"
@@ -26,6 +27,7 @@ limitations under the License.
#include "tensorflow/core/framework/types.pb.h"
#include "tensorflow/core/platform/casts.h"
#include "tensorflow/core/platform/status.h"
+#include "tensorflow/core/util/abstract_stack_trace.h"
struct TFE_Op;
@@ -36,6 +38,10 @@ class ImmediateExecutionOperation : public AbstractOperation {
public:
virtual void Clear() = 0;
+ // Returns the inputs of this op.
+ virtual absl::Span GetInputs()
+ const = 0;
+
virtual const tensorflow::OpDef* OpDef() const = 0;
virtual Status InputLength(const char* input_name, int* length) = 0;
@@ -44,6 +50,12 @@ class ImmediateExecutionOperation : public AbstractOperation {
// Experimental
virtual Status SetUseXla(bool enable) = 0;
+ // Set stack trace to be used for potential async error reporting.
+ virtual void SetStackTrace(AbstractStackTrace stack_trace) = 0;
+
+ // Returns the stack trace set by `SetStackTrace` if exists.
+ virtual absl::optional GetStackTrace() = 0;
+
// For LLVM style RTTI.
static bool classof(const AbstractOperation* ptr) {
return ptr->getKind() == kEager || ptr->getKind() == kTfrt;
diff --git a/tensorflow/c/eager/immediate_execution_tensor_handle.h b/tensorflow/c/eager/immediate_execution_tensor_handle.h
index f7c77aa06db..6d32d482747 100644
--- a/tensorflow/c/eager/immediate_execution_tensor_handle.h
+++ b/tensorflow/c/eager/immediate_execution_tensor_handle.h
@@ -50,6 +50,14 @@ class ImmediateExecutionTensorHandle : public AbstractTensorHandle {
// Return a copy of the handle.
virtual ImmediateExecutionTensorHandle* Copy() = 0;
+ // Release any underlying resources, including the interface object.
+ //
+ // WARNING: The destructor of this class is marked as protected to disallow
+ // clients from directly destroying this object since it may manage it's own
+ // lifetime through ref counting. Thus this must be allocated on the heap and
+ // clients MUST call Release() in order to destroy an instance of this class.
+ virtual void Release() = 0;
+
// For LLVM style RTTI.
static bool classof(const AbstractTensorHandle* ptr) {
return ptr->getKind() == kEager || ptr->getKind() == kTfrt;
diff --git a/tensorflow/c/eager/tape.h b/tensorflow/c/eager/tape.h
index 40cfa87dd66..27629bb3bdf 100644
--- a/tensorflow/c/eager/tape.h
+++ b/tensorflow/c/eager/tape.h
@@ -177,12 +177,12 @@ class GradientTape {
template
class ForwardFunction
: public std::function&,
- std::vector*)> {
+ std::vector*, bool)> {
public:
template
explicit ForwardFunction(lambda_type lambda)
: std::function&,
- std::vector*)>(lambda) {}
+ std::vector*, bool)>(lambda) {}
};
// Computes Jacobian-vector products using forward-mode automatic
@@ -205,8 +205,9 @@ class ForwardAccumulator {
// Does not take ownership of `vspace`, which must outlive the
// ForwardAccumulator.
explicit ForwardAccumulator(
- const VSpace& vspace)
- : vspace_(vspace) {
+ const VSpace& vspace,
+ bool use_batch)
+ : vspace_(vspace), use_batch_(use_batch) {
call_state_.emplace(nullptr, false);
}
@@ -314,6 +315,9 @@ class ForwardAccumulator {
// available in language bindings (e.g. Python).
const VSpace& vspace_;
+ // Decides if tangents are vectorized or not
+ bool use_batch_;
+
struct AccumulatorCallState {
AccumulatorCallState(
GradientTape* backward_tape,
@@ -573,7 +577,7 @@ Status InitialGradients(
gtl::ArraySlice output_gradients, const TensorTape& tensor_tape,
const OpTape& op_tape,
std::unordered_map>* result) {
- for (int i = 0; i < target_tensor_ids.size(); ++i) {
+ for (int i = 0, end = target_tensor_ids.size(); i < end; ++i) {
const int64 id = target_tensor_ids[i];
if (output_gradients.empty() || output_gradients[i] == nullptr) {
auto tensor_it = tensor_tape.find(id);
@@ -699,7 +703,7 @@ Status GradientTape::ComputeGradient(
std::vector out_gradients;
out_gradients.reserve(trace.output_tensor_info.size());
std::vector unneeded_gradients;
- for (int i = 0; i < trace.input_tensor_id.size(); i++) {
+ for (int i = 0, end = trace.input_tensor_id.size(); i < end; i++) {
const auto& in_tensor_id = trace.input_tensor_id[i];
if (tensor_tape_.find(in_tensor_id) == tensor_tape_.end() &&
sources_set.find(in_tensor_id) == sources_set.end()) {
@@ -709,7 +713,7 @@ Status GradientTape::ComputeGradient(
bool any_gradient_nonzero = false;
std::vector zero_indices;
- for (int i = 0; i < trace.output_tensor_info.size(); ++i) {
+ for (int i = 0, end = trace.output_tensor_info.size(); i < end; ++i) {
const int64 id = trace.output_tensor_info[i].GetID();
auto grad_it = gradients.find(id);
if (grad_it == gradients.end()) {
@@ -775,7 +779,7 @@ Status GradientTape::ComputeGradient(
}
VLOG(1) << "Got " << in_gradients.size() << " in_gradients for "
<< trace.input_tensor_id.size() << " sources";
- for (int i = 0; i < in_gradients.size(); ++i) {
+ for (int i = 0, end = in_gradients.size(); i < end; ++i) {
const int64 id = trace.input_tensor_id[i];
if (in_gradients[i] != nullptr) {
auto& unaggregated_grads = gradients[id];
@@ -968,7 +972,7 @@ ForwardAccumulator::ForwardpropFromTape(
targets.reserve(grad.size());
used_in_grads.reserve(grad.size());
std::unordered_map sources_that_are_targets;
- for (int grad_index = 0; grad_index < grad.size(); ++grad_index) {
+ for (int grad_index = 0, end = grad.size(); grad_index < end; ++grad_index) {
Gradient* grad_tensor = grad[grad_index];
if (grad_tensor != nullptr) {
int64 tensor_id = vspace_.TensorId(grad_tensor);
@@ -1062,7 +1066,8 @@ Status ForwardAccumulator::Accumulate(
output_tensors, backward_function_getter, backward_function_deleter,
in_grads, &forward_grads));
} else {
- TF_RETURN_IF_ERROR((*forward_function)(in_grads, &forward_grads));
+ TF_RETURN_IF_ERROR(
+ (*forward_function)(in_grads, &forward_grads, use_batch_));
}
for (int i = 0; i < forward_grads.size(); ++i) {
if (forward_grads[i] != nullptr) {
diff --git a/tensorflow/c/env.cc b/tensorflow/c/env.cc
index ce715c43acb..fbde13dea5a 100644
--- a/tensorflow/c/env.cc
+++ b/tensorflow/c/env.cc
@@ -186,3 +186,22 @@ void TF_JoinThread(TF_Thread* thread) {
// ::tensorflow::Thread joins on destruction
delete reinterpret_cast<::tensorflow::Thread*>(thread);
}
+
+void* TF_LoadSharedLibrary(const char* library_filename, TF_Status* status) {
+ void* handle = nullptr;
+ TF_SetStatus(status, TF_OK, "");
+ ::tensorflow::Set_TF_Status_from_Status(
+ status, ::tensorflow::Env::Default()->LoadDynamicLibrary(library_filename,
+ &handle));
+ return handle;
+}
+
+void* TF_GetSymbolFromLibrary(void* handle, const char* symbol_name,
+ TF_Status* status) {
+ void* symbol = nullptr;
+ TF_SetStatus(status, TF_OK, "");
+ ::tensorflow::Set_TF_Status_from_Status(
+ status, ::tensorflow::Env::Default()->GetSymbolFromLibrary(
+ handle, symbol_name, &symbol));
+ return symbol;
+}
diff --git a/tensorflow/c/env.h b/tensorflow/c/env.h
index 7dc7ac32f08..63e2c86ad44 100644
--- a/tensorflow/c/env.h
+++ b/tensorflow/c/env.h
@@ -184,6 +184,26 @@ TF_CAPI_EXPORT extern TF_Thread* TF_StartThread(const TF_ThreadOptions* options,
// Waits for the given thread to finish execution, then deletes it.
TF_CAPI_EXPORT extern void TF_JoinThread(TF_Thread* thread);
+// \brief Load a dynamic library.
+//
+// Pass "library_filename" to a platform-specific mechanism for dynamically
+// loading a library. The rules for determining the exact location of the
+// library are platform-specific and are not documented here.
+//
+// On success, place OK in status and return the newly created library handle.
+// Otherwise returns nullptr and set error status.
+TF_CAPI_EXPORT extern void* TF_LoadSharedLibrary(const char* library_filename,
+ TF_Status* status);
+
+// \brief Get a pointer to a symbol from a dynamic library.
+//
+// "handle" should be a pointer returned from a previous call to
+// TF_LoadLibraryFromEnv. On success, place OK in status and return a pointer to
+// the located symbol. Otherwise returns nullptr and set error status.
+TF_CAPI_EXPORT extern void* TF_GetSymbolFromLibrary(void* handle,
+ const char* symbol_name,
+ TF_Status* status);
+
#ifdef __cplusplus
}
#endif
diff --git a/tensorflow/c/experimental/BUILD b/tensorflow/c/experimental/BUILD
deleted file mode 100644
index 53cd99f18a6..00000000000
--- a/tensorflow/c/experimental/BUILD
+++ /dev/null
@@ -1,124 +0,0 @@
-# Description:
-# Experimental C APIs for TensorFlow.
-
-load(
- "//tensorflow:tensorflow.bzl",
- "tf_copts",
- "tf_cuda_library",
-)
-load("//tensorflow:tensorflow.bzl", "tf_cuda_cc_test")
-
-package(
- licenses = ["notice"], # Apache 2.0
-)
-
-tf_cuda_library(
- name = "rendezvous_internal",
- srcs = [
- "rendezvous.cc",
- ],
- hdrs = [
- "rendezvous.h",
- "rendezvous_internal.h",
- ],
- copts = tf_copts(),
- visibility = ["//tensorflow/c:__subpackages__"],
- deps = [
- "//tensorflow/c:c_api_internal",
- "//tensorflow/core:framework",
- "//tensorflow/core:framework_internal",
- "//tensorflow/core:lib",
- "//tensorflow/core:lib_internal",
- "//tensorflow/core/distributed_runtime:base_rendezvous_mgr",
- "//tensorflow/core/distributed_runtime:worker_env",
- "//tensorflow/core/distributed_runtime/rpc:grpc_server_lib",
- ],
-)
-
-tf_cuda_library(
- name = "rendezvous",
- hdrs = [
- "rendezvous.h",
- ],
- copts = tf_copts(),
- visibility = ["//visibility:public"],
- deps = [
- ":rendezvous_internal",
- "//tensorflow/c:c_api",
- ],
-)
-
-tf_cuda_library(
- name = "network_internal",
- srcs = [
- "network.cc",
- ],
- hdrs = [
- "network.h",
- "network_internal.h",
- ],
- copts = tf_copts(),
- visibility = ["//tensorflow/c:__subpackages__"],
- deps = [
- ":rendezvous_internal",
- "//tensorflow/c:c_api_internal",
- "//tensorflow/core:framework_internal",
- "//tensorflow/core:lib",
- "//tensorflow/core:lib_internal",
- "//tensorflow/core:protos_all_cc",
- "//tensorflow/core/distributed_runtime:server_lib",
- "//tensorflow/core/distributed_runtime:worker_env",
- "//tensorflow/core/distributed_runtime/rpc:grpc_server_lib",
- ],
-)
-
-tf_cuda_library(
- name = "network",
- hdrs = [
- "network.h",
- ],
- copts = tf_copts(),
- visibility = ["//visibility:public"],
- deps = [
- ":network_internal",
- ":rendezvous",
- "//tensorflow/c:c_api",
- ],
-)
-
-# -----------------------------------------------------------------------------
-# Tests
-
-tf_cuda_cc_test(
- name = "network_test",
- size = "medium",
- srcs = ["network_test.cc"],
- tags = ["noasan"],
- # We must ensure that the dependencies can be dynamically linked since
- # the shared library must be able to use core:framework.
- # linkstatic = tf_kernel_tests_linkstatic(),
- deps = [
- ":network",
- ":network_internal",
- ":rendezvous",
- ":rendezvous_internal",
- "//tensorflow/c:c_api",
- "//tensorflow/c:env",
- "//tensorflow/core:framework",
- "//tensorflow/core:framework_internal",
- "//tensorflow/core:lib",
- "//tensorflow/core:lib_internal",
- "//tensorflow/core:protos_all_cc",
- "//tensorflow/core:test",
- "//tensorflow/core:test_main",
- "//tensorflow/core/distributed_runtime:rendezvous_mgr_interface",
- "//tensorflow/core/distributed_runtime:server_lib",
- "//tensorflow/core/distributed_runtime:session_mgr",
- "//tensorflow/core/distributed_runtime:worker_env",
- "//tensorflow/core/distributed_runtime:worker_session",
- "//tensorflow/core/distributed_runtime/rpc:async_service_interface",
- "//tensorflow/core/distributed_runtime/rpc:grpc_server_lib",
- "@com_google_absl//absl/synchronization",
- "@com_google_absl//absl/time",
- ],
-)
diff --git a/tensorflow/c/experimental/filesystem/filesystem_interface.h b/tensorflow/c/experimental/filesystem/filesystem_interface.h
index 5463eb35088..6e05c861439 100644
--- a/tensorflow/c/experimental/filesystem/filesystem_interface.h
+++ b/tensorflow/c/experimental/filesystem/filesystem_interface.h
@@ -78,6 +78,11 @@ typedef struct TF_Filesystem {
void* plugin_filesystem;
} TF_Filesystem;
+typedef struct TF_TransactionToken {
+ void* token;
+ TF_Filesystem* owner;
+} TF_TransactionToken;
+
/// SECTION 2. Function tables for functionality provided by plugins
/// ----------------------------------------------------------------------------
///
@@ -679,6 +684,133 @@ typedef struct TF_FilesystemOps {
///
/// DEFAULT IMPLEMENTATION: No op.
void (*flush_caches)(const TF_Filesystem* filesystem);
+
+ /// Starts a new transaction.
+ ///
+ /// An opaque transaction token is returned in `token`. Ownership of the token
+ /// is in filesystem. Token will be freed in `end_transaction` call and any
+ /// access to token after that is invalid.
+ ///
+ /// In case of error, plugins must set `status` to a value different than
+ /// `TF_OK`, free memory allocated for `token` and return -1.
+ ///
+ /// The allocation and freeing of memory must happen via the functions sent to
+ /// core TensorFlow upon registration (see the `TF_FilesystemPluginInfo`
+ /// structure in Section 4).
+ ///
+ /// Plugins:
+ /// * Must set `status` to `TF_OK` if transaction successfuly started.
+ /// * Must set `status` to `TF_FAILED_PRECONDITION` if multiple transactions
+ /// are not supported
+ /// * Might use any other error value for `status` to signal other errors.
+ int (*start_transaction)(const TF_Filesystem* filesystem,
+ TF_TransactionToken** token, TF_Status* status);
+
+ /// Ends transaction and free the `token`. Any access to token after
+ /// that will be invalid.
+ ///
+ /// In case of error, plugins must set `status` to a value different than
+ /// `TF_OK`, free memory allocated for `token` and return -1.
+ ///
+ /// The allocation and freeing of memory must happen via the functions sent to
+ /// core TensorFlow upon registration (see the `TF_FilesystemPluginInfo`
+ /// structure in Section 4).
+ ///
+ /// Plugins:
+ /// * Must set `status` to `TF_OK` if transaction successfuly finalized.
+ /// * Must set `status` to `TF_NOT_FOUND` if token is invalid/not found
+ /// * Might use any other error value for `status` to signal other errors.
+ int (*end_transaction)(const TF_Filesystem* filesystem,
+ TF_TransactionToken* token, TF_Status* status);
+
+ /// Adds file/directory in the `path` to transaction in `token`. It is a valid
+ /// operation to add a path that doesn't exist yet to a transaction.
+ ///
+ /// In case of error, plugins must set `status` to a value different than
+ /// `TF_OK`, free memory allocated for `token` and return -1.
+ ///
+ /// The allocation and freeing of memory must happen via the functions sent to
+ /// core TensorFlow upon registration (see the `TF_FilesystemPluginInfo`
+ /// structure in Section 4).
+ ///
+ /// Plugins:
+ /// * Must set `status` to `TF_OK` if path added to transaction successful.
+ /// * Must set `status` to `TF_NOT_FOUND` if `token` is invalid.
+ /// * Must set `status` to `TF_FAILED_PRECONDITION` if file/directory is in
+ /// another transaction and multiple transactions are not supported
+ /// * Might use any other error value for `status` to signal other errors.
+ int (*add_to_transaction)(const TF_Filesystem* filesystem, const char* path,
+ TF_TransactionToken* token, TF_Status* status);
+
+ /// Returns transaction token for file/directory in the `path`. Note that path
+ /// may not exist yet but still might be part of a transaction.
+ ///
+ /// Transaction token is returned in `token`. Ownership of the token is in
+ /// filesystem. Token will be freed in `end_transaction` call and any access
+ /// to token after that is invalid.
+ ///
+ /// In case of error, plugins must set `status` to a value different than
+ /// `TF_OK`, free memory allocated for `token` and return -1.
+ ///
+ /// The allocation and freeing of memory must happen via the functions sent to
+ /// core TensorFlow upon registration (see the `TF_FilesystemPluginInfo`
+ /// structure in Section 4).
+ ///
+ /// Plugins:
+ /// * Must set `status` to `TF_OK` if a transaction for path is found
+ /// * Must set `status` to `TF_NOT_FOUND` if `path` is not part of any
+ /// transaction
+ /// * Must set `status` to `TF_FAILED_PRECONDITION` if `path` is
+ /// not in this filesystem.
+ /// * Might use any other error value for `status` to signal other errors.
+ int (*get_transaction_for_path)(const TF_Filesystem* filesystem,
+ const char* path, TF_TransactionToken** token,
+ TF_Status* status);
+
+ /// Returns transaction token for `path` if it is part of a transaction else
+ /// starts a new transaction and adds `path` to that transaction
+ ///
+ /// Transaction token is returned in `token`. Ownership of the token is in
+ /// filesystem. Token will be freed in `end_transaction` call and any access
+ /// to token after that is invalid.
+ ///
+ /// In case of error, plugins must set `status` to a value different than
+ /// `TF_OK`, free memory allocated for `token` and return -1.
+ ///
+ /// The allocation and freeing of memory must happen via the functions sent to
+ /// core TensorFlow upon registration (see the `TF_FilesystemPluginInfo`
+ /// structure in Section 4).
+ ///
+ /// Plugins:
+ /// * Must set `status` to `TF_OK` if transaction found or successfuly
+ /// started.
+ /// * Must set `status` to `TF_NOT_FOUND` if `path` doesn't point to this
+ /// filesystem
+ /// * Must set `status` to `TF_FAILED_PRECONDITION` if file/directory is
+ /// not in any transaction and multiple transactions are not supported.
+ /// * Might use any other error value for `status` to signal other errors.
+ int (*get_or_start_transaction_for_path)(const TF_Filesystem* filesystem,
+ const char* path,
+ TF_TransactionToken** token,
+ TF_Status* status);
+
+ /// Decodes transaction token in `token` to human readable format for
+ /// debugging.
+ ///
+ /// A new `char*` buffer must be allocated by this method. Core TensorFlow
+ /// manages the lifetime of the buffer after the call. Thus, all callers of
+ /// this method must take ownership of the returned pointer.
+ ///
+ /// Plugins must not return `nullptr`. Returning empty strings is allowed.
+ ///
+ /// The allocation and freeing of memory must happen via the functions sent to
+ /// core TensorFlow upon registration (see the `TF_FilesystemPluginInfo`
+ /// structure in Section 4).
+ ///
+ /// DEFAULT IMPLEMENTATION: Dump token and owner address.
+ char* (*decode_transaction_token)(const TF_Filesystem* filesystem,
+ const TF_TransactionToken* token);
+
} TF_FilesystemOps;
// LINT.ThenChange(:filesystem_ops_version)
diff --git a/tensorflow/c/experimental/filesystem/modular_filesystem.cc b/tensorflow/c/experimental/filesystem/modular_filesystem.cc
index 58541ea2b36..9c8d3518800 100644
--- a/tensorflow/c/experimental/filesystem/modular_filesystem.cc
+++ b/tensorflow/c/experimental/filesystem/modular_filesystem.cc
@@ -35,7 +35,8 @@ using UniquePtrTo_TF_Status =
::std::unique_ptr;
Status ModularFileSystem::NewRandomAccessFile(
- const std::string& fname, std::unique_ptr* result) {
+ const std::string& fname, TransactionToken* token,
+ std::unique_ptr* result) {
if (ops_->new_random_access_file == nullptr)
return errors::Unimplemented(tensorflow::strings::StrCat(
"Filesystem for ", fname, " does not support NewRandomAccessFile()"));
@@ -54,7 +55,8 @@ Status ModularFileSystem::NewRandomAccessFile(
}
Status ModularFileSystem::NewWritableFile(
- const std::string& fname, std::unique_ptr* result) {
+ const std::string& fname, TransactionToken* token,
+ std::unique_ptr* result) {
if (ops_->new_writable_file == nullptr)
return errors::Unimplemented(tensorflow::strings::StrCat(
"Filesystem for ", fname, " does not support NewWritableFile()"));
@@ -73,7 +75,8 @@ Status ModularFileSystem::NewWritableFile(
}
Status ModularFileSystem::NewAppendableFile(
- const std::string& fname, std::unique_ptr* result) {
+ const std::string& fname, TransactionToken* token,
+ std::unique_ptr* result) {
if (ops_->new_appendable_file == nullptr)
return errors::Unimplemented(tensorflow::strings::StrCat(
"Filesystem for ", fname, " does not support NewAppendableFile()"));
@@ -92,7 +95,8 @@ Status ModularFileSystem::NewAppendableFile(
}
Status ModularFileSystem::NewReadOnlyMemoryRegionFromFile(
- const std::string& fname, std::unique_ptr* result) {
+ const std::string& fname, TransactionToken* token,
+ std::unique_ptr* result) {
if (ops_->new_read_only_memory_region_from_file == nullptr)
return errors::Unimplemented(tensorflow::strings::StrCat(
"Filesystem for ", fname,
@@ -112,7 +116,8 @@ Status ModularFileSystem::NewReadOnlyMemoryRegionFromFile(
return StatusFromTF_Status(plugin_status.get());
}
-Status ModularFileSystem::FileExists(const std::string& fname) {
+Status ModularFileSystem::FileExists(const std::string& fname,
+ TransactionToken* token) {
if (ops_->path_exists == nullptr)
return errors::Unimplemented(tensorflow::strings::StrCat(
"Filesystem for ", fname, " does not support FileExists()"));
@@ -125,6 +130,7 @@ Status ModularFileSystem::FileExists(const std::string& fname) {
}
bool ModularFileSystem::FilesExist(const std::vector& files,
+ TransactionToken* token,
std::vector* status) {
if (ops_->paths_exist == nullptr)
return FileSystem::FilesExist(files, status);
@@ -157,6 +163,7 @@ bool ModularFileSystem::FilesExist(const std::vector& files,
}
Status ModularFileSystem::GetChildren(const std::string& dir,
+ TransactionToken* token,
std::vector* result) {
if (ops_->get_children == nullptr)
return errors::Unimplemented(tensorflow::strings::StrCat(
@@ -182,6 +189,7 @@ Status ModularFileSystem::GetChildren(const std::string& dir,
}
Status ModularFileSystem::GetMatchingPaths(const std::string& pattern,
+ TransactionToken* token,
std::vector* result) {
if (ops_->get_matching_paths == nullptr)
return internal::GetMatchingPaths(this, Env::Default(), pattern, result);
@@ -203,7 +211,8 @@ Status ModularFileSystem::GetMatchingPaths(const std::string& pattern,
return StatusFromTF_Status(plugin_status.get());
}
-Status ModularFileSystem::DeleteFile(const std::string& fname) {
+Status ModularFileSystem::DeleteFile(const std::string& fname,
+ TransactionToken* token) {
if (ops_->delete_file == nullptr)
return errors::Unimplemented(tensorflow::strings::StrCat(
"Filesystem for ", fname, " does not support DeleteFile()"));
@@ -216,6 +225,7 @@ Status ModularFileSystem::DeleteFile(const std::string& fname) {
}
Status ModularFileSystem::DeleteRecursively(const std::string& dirname,
+ TransactionToken* token,
int64* undeleted_files,
int64* undeleted_dirs) {
if (undeleted_files == nullptr || undeleted_dirs == nullptr)
@@ -238,7 +248,8 @@ Status ModularFileSystem::DeleteRecursively(const std::string& dirname,
return StatusFromTF_Status(plugin_status.get());
}
-Status ModularFileSystem::DeleteDir(const std::string& dirname) {
+Status ModularFileSystem::DeleteDir(const std::string& dirname,
+ TransactionToken* token) {
if (ops_->delete_dir == nullptr)
return errors::Unimplemented(tensorflow::strings::StrCat(
"Filesystem for ", dirname, " does not support DeleteDir()"));
@@ -250,7 +261,8 @@ Status ModularFileSystem::DeleteDir(const std::string& dirname) {
return StatusFromTF_Status(plugin_status.get());
}
-Status ModularFileSystem::RecursivelyCreateDir(const std::string& dirname) {
+Status ModularFileSystem::RecursivelyCreateDir(const std::string& dirname,
+ TransactionToken* token) {
if (ops_->recursively_create_dir == nullptr)
return FileSystem::RecursivelyCreateDir(dirname);
@@ -261,7 +273,8 @@ Status ModularFileSystem::RecursivelyCreateDir(const std::string& dirname) {
return StatusFromTF_Status(plugin_status.get());
}
-Status ModularFileSystem::CreateDir(const std::string& dirname) {
+Status ModularFileSystem::CreateDir(const std::string& dirname,
+ TransactionToken* token) {
if (ops_->create_dir == nullptr)
return errors::Unimplemented(tensorflow::strings::StrCat(
"Filesystem for ", dirname, " does not support CreateDir()"));
@@ -273,7 +286,8 @@ Status ModularFileSystem::CreateDir(const std::string& dirname) {
return StatusFromTF_Status(plugin_status.get());
}
-Status ModularFileSystem::Stat(const std::string& fname, FileStatistics* stat) {
+Status ModularFileSystem::Stat(const std::string& fname,
+ TransactionToken* token, FileStatistics* stat) {
if (ops_->stat == nullptr)
return errors::Unimplemented(tensorflow::strings::StrCat(
"Filesystem for ", fname, " does not support Stat()"));
@@ -296,7 +310,8 @@ Status ModularFileSystem::Stat(const std::string& fname, FileStatistics* stat) {
return StatusFromTF_Status(plugin_status.get());
}
-Status ModularFileSystem::IsDirectory(const std::string& name) {
+Status ModularFileSystem::IsDirectory(const std::string& name,
+ TransactionToken* token) {
if (ops_->is_directory == nullptr) return FileSystem::IsDirectory(name);
UniquePtrTo_TF_Status plugin_status(TF_NewStatus(), TF_DeleteStatus);
@@ -307,6 +322,7 @@ Status ModularFileSystem::IsDirectory(const std::string& name) {
}
Status ModularFileSystem::GetFileSize(const std::string& fname,
+ TransactionToken* token,
uint64* file_size) {
if (ops_->get_file_size == nullptr) {
FileStatistics stat;
@@ -327,7 +343,8 @@ Status ModularFileSystem::GetFileSize(const std::string& fname,
}
Status ModularFileSystem::RenameFile(const std::string& src,
- const std::string& target) {
+ const std::string& target,
+ TransactionToken* token) {
if (ops_->rename_file == nullptr) {
Status status = CopyFile(src, target);
if (status.ok()) status = DeleteFile(src);
@@ -343,7 +360,8 @@ Status ModularFileSystem::RenameFile(const std::string& src,
}
Status ModularFileSystem::CopyFile(const std::string& src,
- const std::string& target) {
+ const std::string& target,
+ TransactionToken* token) {
if (ops_->copy_file == nullptr) return FileSystem::CopyFile(src, target);
UniquePtrTo_TF_Status plugin_status(TF_NewStatus(), TF_DeleteStatus);
@@ -366,7 +384,7 @@ std::string ModularFileSystem::TranslateName(const std::string& name) const {
return ret;
}
-void ModularFileSystem::FlushCaches() {
+void ModularFileSystem::FlushCaches(TransactionToken* token) {
if (ops_->flush_caches != nullptr) ops_->flush_caches(filesystem_.get());
}
@@ -443,7 +461,7 @@ Status RegisterFilesystemPlugin(const std::string& dso_path) {
// Step 1: Load plugin
Env* env = Env::Default();
void* dso_handle;
- TF_RETURN_IF_ERROR(env->LoadLibrary(dso_path.c_str(), &dso_handle));
+ TF_RETURN_IF_ERROR(env->LoadDynamicLibrary(dso_path.c_str(), &dso_handle));
// Step 2: Load symbol for `TF_InitPlugin`
void* dso_symbol;
diff --git a/tensorflow/c/experimental/filesystem/modular_filesystem.h b/tensorflow/c/experimental/filesystem/modular_filesystem.h
index baf665fd6aa..061a1aa446b 100644
--- a/tensorflow/c/experimental/filesystem/modular_filesystem.h
+++ b/tensorflow/c/experimental/filesystem/modular_filesystem.h
@@ -59,36 +59,48 @@ class ModularFileSystem final : public FileSystem {
~ModularFileSystem() override { ops_->cleanup(filesystem_.get()); }
+ TF_USE_FILESYSTEM_METHODS_WITH_NO_TRANSACTION_SUPPORT;
+
Status NewRandomAccessFile(
- const std::string& fname,
+ const std::string& fname, TransactionToken* token,
std::unique_ptr* result) override;
- Status NewWritableFile(const std::string& fname,
+ Status NewWritableFile(const std::string& fname, TransactionToken* token,
std::unique_ptr* result) override;
- Status NewAppendableFile(const std::string& fname,
+ Status NewAppendableFile(const std::string& fname, TransactionToken* token,
std::unique_ptr* result) override;
Status NewReadOnlyMemoryRegionFromFile(
- const std::string& fname,
+ const std::string& fname, TransactionToken* token,
std::unique_ptr* result) override;
- Status FileExists(const std::string& fname) override;
+ Status FileExists(const std::string& fname, TransactionToken* token) override;
bool FilesExist(const std::vector& files,
+ TransactionToken* token,
std::vector* status) override;
- Status GetChildren(const std::string& dir,
+ Status GetChildren(const std::string& dir, TransactionToken* token,
std::vector* result) override;
- Status GetMatchingPaths(const std::string& pattern,
+ Status GetMatchingPaths(const std::string& pattern, TransactionToken* token,
std::vector* results) override;
- Status DeleteFile(const std::string& fname) override;
- Status DeleteRecursively(const std::string& dirname, int64* undeleted_files,
+ Status DeleteFile(const std::string& fname, TransactionToken* token) override;
+ Status DeleteRecursively(const std::string& dirname, TransactionToken* token,
+ int64* undeleted_files,
int64* undeleted_dirs) override;
- Status DeleteDir(const std::string& dirname) override;
- Status RecursivelyCreateDir(const std::string& dirname) override;
- Status CreateDir(const std::string& dirname) override;
- Status Stat(const std::string& fname, FileStatistics* stat) override;
- Status IsDirectory(const std::string& fname) override;
- Status GetFileSize(const std::string& fname, uint64* file_size) override;
- Status RenameFile(const std::string& src, const std::string& target) override;
- Status CopyFile(const std::string& src, const std::string& target) override;
+ Status DeleteDir(const std::string& dirname,
+ TransactionToken* token) override;
+ Status RecursivelyCreateDir(const std::string& dirname,
+ TransactionToken* token) override;
+ Status CreateDir(const std::string& dirname,
+ TransactionToken* token) override;
+ Status Stat(const std::string& fname, TransactionToken* token,
+ FileStatistics* stat) override;
+ Status IsDirectory(const std::string& fname,
+ TransactionToken* token) override;
+ Status GetFileSize(const std::string& fname, TransactionToken* token,
+ uint64* file_size) override;
+ Status RenameFile(const std::string& src, const std::string& target,
+ TransactionToken* token) override;
+ Status CopyFile(const std::string& src, const std::string& target,
+ TransactionToken* token) override;
std::string TranslateName(const std::string& name) const override;
- void FlushCaches() override;
+ void FlushCaches(TransactionToken* token) override;
private:
std::unique_ptr filesystem_;
diff --git a/tensorflow/c/experimental/filesystem/modular_filesystem_test.cc b/tensorflow/c/experimental/filesystem/modular_filesystem_test.cc
index 8ee47da01dd..7e0a95cc915 100644
--- a/tensorflow/c/experimental/filesystem/modular_filesystem_test.cc
+++ b/tensorflow/c/experimental/filesystem/modular_filesystem_test.cc
@@ -33,7 +33,6 @@ limitations under the License.
// Windows defines the following macros to convert foo to fooA or fooW,
// depending on the type of the string argument. We don't use these macros, so
// undefine them here.
-#undef LoadLibrary
#undef CopyFile
#undef DeleteFile
#undef TranslateName
diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/BUILD b/tensorflow/c/experimental/filesystem/plugins/gcs/BUILD
index a0c13701766..68875d61e47 100644
--- a/tensorflow/c/experimental/filesystem/plugins/gcs/BUILD
+++ b/tensorflow/c/experimental/filesystem/plugins/gcs/BUILD
@@ -25,12 +25,15 @@ cc_library(
"//tensorflow:windows": get_win_copts(),
}),
deps = [
+ ":expiring_lru_cache",
":gcs_helper",
+ ":ram_file_block_cache",
"//tensorflow/c:env",
"//tensorflow/c:tf_status",
"//tensorflow/c/experimental/filesystem:filesystem_interface",
"@com_github_googlecloudplatform_google_cloud_cpp//:storage_client",
"@com_google_absl//absl/strings",
+ "@com_google_absl//absl/types:variant",
],
)
@@ -44,14 +47,6 @@ cc_library(
],
)
-cc_library(
- name = "file_block_cache",
- hdrs = ["file_block_cache.h"],
- deps = [
- "//tensorflow/c:tf_status",
- ],
-)
-
cc_library(
name = "cleanup",
hdrs = ["cleanup.h"],
@@ -63,7 +58,6 @@ cc_library(
hdrs = ["ram_file_block_cache.h"],
deps = [
":cleanup",
- ":file_block_cache",
"//tensorflow/c:env",
"//tensorflow/c:tf_status",
"@com_google_absl//absl/base:core_headers",
diff --git a/tensorflow/c/experimental/filesystem/plugins/gcs/file_block_cache.h b/tensorflow/c/experimental/filesystem/plugins/gcs/file_block_cache.h
deleted file mode 100644
index 3ba7d8d7993..00000000000
--- a/tensorflow/c/experimental/filesystem/plugins/gcs/file_block_cache.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_GCS_FILE_BLOCK_CACHE_H_
-#define TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_GCS_FILE_BLOCK_CACHE_H_
-
-#include
-#include
-#include
-#include